# Programação Python Avançado 🐥

---



## Data classes

As *data classes* foram introduzidas a partir do Python 3.7. Uma *data class* é uma classe que contém somente dados sem restrições.

A vantagem de uso das *data classes* é várias funcionalidades já vem implementadas e são fáceis de serem parametrizadas. 

- Construtor para inicializar todos os parâmetros
- Sobrescrita do operador de comparação `__eq__`
- Sobrescrita dos métodos de `__repr__` e `__str__`
- Representação de dados imutáveis
- Suporte a herança


In [None]:
from dataclasses import dataclass

@dataclass
class Carta:
    valor:str
    naipe:str

In [None]:
carta1 = Carta('4', 'copas')
carta2 = Carta('K', 'paus')
carta3 = Carta(valor='Q', naipe='ouros')

print(carta1)
print(carta2)
print(carta3)

Carta(valor='4', naipe='copas')
Carta(valor='K', naipe='paus')
Carta(valor='Q', naipe='ouros')


In [None]:
carta4 = Carta('J', 'espadinhas')
print(carta4)

carta4.valor = '3'
carta4.naipe = 'copas'
print(carta4)

Carta(valor='J', naipe='espadinhas')
Carta(valor='3', naipe='copas')


In [None]:
@dataclass(frozen=True)
class Ponto:
    x: int = 0
    y: int = 0

p1 = Ponto()
p2 = Ponto(3, 4)
p3 = Ponto(x=1, y=1)

print(p1)
print(p2)
print(p3)

Ponto(x=0, y=0)
Ponto(x=3, y=4)
Ponto(x=1, y=1)


In [None]:
p3 = Ponto(3, 5)
print(p3)

p3.x = 10 # ERRO!
print(p3)

Ponto(x=3, y=5)


FrozenInstanceError: ignored

In [None]:
from dataclasses import dataclass, field
from typing import List

@dataclass
class Baralho:
    cartas: List[Carta] = field(default_factory=list)

baralho = Baralho()
baralho.cartas.append(carta1)
baralho.cartas.append(carta2)
baralho.cartas.append(carta3)

print(baralho)

Baralho(cartas=[Carta(valor='4', naipe='copas'), Carta(valor='K', naipe='paus'), Carta(valor='Q', naipe='ouros')])


## Numpy

Numpy é uma biblioteca para computação científica. Oferece a utilização de vetores e matrizes de alta-performance.

In [None]:
import numpy as np

a = np.array([1, 2, 3])   
print(type(a))            
print(a.shape)            
print(a[0], a[1], a[2])  

a[0] = 5
print(a) 

<class 'numpy.ndarray'>
(3,)
1 2 3
[5 2 3]


In [None]:
b = np.array([[1,2,3],
              [4,5,6]]) 
print(b.shape)                  
print(b[0, 0], b[0, 1], b[1, 0])  

(2, 3)
1 2 4


In [None]:
# Cria uma matriz com zeros
a = np.zeros((2,2)) 
print(a) 

[[0. 0.]
 [0. 0.]]


In [None]:
# Cria uma matriz com uns
b = np.ones((2,2))
print(b)

[[1. 1.]
 [1. 1.]]


In [None]:
# Cria uma matriz com 7s
c = np.full((2,2), 7)  
print(c)               

[[7 7]
 [7 7]]


In [None]:
# Cria uma matriz identidade
d = np.eye(2)         
print(d)              

[[1. 0.]
 [0. 1.]]


In [None]:
# Cria uma matriz aleatória
e = np.random.random((2,2))  
print(e) 

[[0.66966834 0.18415736]
 [0.00982995 0.96856404]]


In [None]:
# Seleciona uma fatia da matriz
a = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])
b = a[:2, 1:3]

# lista[2:3]

print('A =\n', a)
print('B =\n', b)

A =
 [[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]
B =
 [[2 3]
 [6 7]]


In [None]:
b[0, 0] = 77

print('A =\n', a)

A =
 [[ 1 77  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


In [None]:
# Indexação booleana
a = np.array([[1,2], [3, 4], [5, 6]])

bool_idx = (a > 2)
print(bool_idx)

print(a[bool_idx])
print(a[a > 2])

[[False False]
 [ True  True]
 [ True  True]]
[3 4 5 6]
[3 4 5 6]


In [None]:
# Tipos de dados
x = np.array([1, 2])  
print(x.dtype)

x = np.array([1.0, 2.0])
print(x.dtype)

x = np.array([1, 2], dtype=np.int64)
print(x.dtype)

int64
float64
int64


### Operações

In [None]:
x = np.array([[1,2],[3,4]], dtype=np.float64)
y = np.array([[5,6],[7,8]], dtype=np.float64)

#print(x + y)
print(np.add(x, y))

#print(x - y)
print(np.subtract(x, y))

#print(x * y)
print(np.multiply(x, y))

#print(x / y)
print(np.divide(x, y))

print(np.sqrt(x))

[[ 6.  8.]
 [10. 12.]]
[[-4. -4.]
 [-4. -4.]]
[[ 5. 12.]
 [21. 32.]]
[[0.2        0.33333333]
 [0.42857143 0.5       ]]
[[1.         1.41421356]
 [1.73205081 2.        ]]


In [None]:
x = np.array([[1,2],[3,4]])
y = np.array([[5,6],[7,8]])

print(x.dot(y))
#print(np.dot(x, y))

v = np.array([9,10])
w = np.array([11, 12])

print(v.dot(w))
#print(np.dot(v, w))


[[19 22]
 [43 50]]
219


In [None]:
# Transposta de uma matriz
x = np.array([[1,2],[3,4]])

print(x)
print(x.T)

[[1 2]
 [3 4]]
[[1 3]
 [2 4]]


## Pandas

In [None]:
import numpy as np
import pandas as pd

# Criando uma Serie
s = pd.Series([1, 3, 5, np.nan, 6, 8])
print(s)

# Criando um Dataframe
df = pd.DataFrame(np.random.randn(6, 4))
print(df)

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64
          0         1         2         3
0  0.769319  0.359988 -0.848426 -1.027449
1  0.042838  0.907680 -0.151509 -0.015764
2  0.060718  1.454284 -0.504820  0.494322
3 -0.078760 -0.784879 -0.202529 -0.323693
4  0.343759 -0.124571 -0.374346 -0.168120
5  0.489918  0.093358  0.422607  2.426590


In [None]:
# Passando o nome das linhas e colunas
df = pd.DataFrame(np.random.randn(6, 4), index=list('ABCDEF'), columns=list('ABCD'))
print(df)


          A         B         C         D
A -1.718753  0.100824  0.105931  0.174252
B -0.161030 -0.674524 -0.874370  1.502434
C  1.438019 -0.022273 -2.232227  0.560568
D  1.453960  0.007467 -0.769017 -0.562516
E  0.279268  0.782655 -1.583777 -0.687524
F -0.955058 -0.312042  0.222821  2.727108


In [None]:
# Criando dataframe a partir de um dict
df2 = pd.DataFrame({'A': 1.,
                    'B': pd.Timestamp('20130102'),
                    'C': pd.Series(1, index=list(range(4)), dtype='float32'),
                    'D': np.array([3] * 4, dtype='int32'),
                    'E': pd.Categorical(["teste", "treino", "teste", "treino"]),
                    'F': 'foo'})

In [None]:
# Visualizando os dados
dates = pd.date_range('20200901', periods=10)
df = pd.DataFrame(np.random.randn(10, 4), index=dates, columns=list('ABCD'))

In [None]:
df

Unnamed: 0,A,B,C,D
2020-09-01,-2.104635,-0.136122,-0.310485,-1.404145
2020-09-02,0.66668,-0.160861,-0.041246,-0.709716
2020-09-03,0.816202,0.171767,-1.501401,-0.02587
2020-09-04,0.263428,0.149646,0.003573,0.005116
2020-09-05,0.529207,-0.283964,0.477704,0.123173
2020-09-06,2.3596,-0.155144,1.738644,1.732793
2020-09-07,1.258376,0.16359,1.210463,1.849546
2020-09-08,-1.051489,-0.259404,0.104084,0.09346
2020-09-09,-0.302262,-0.621633,0.570294,-0.694591
2020-09-10,0.740744,-0.442086,0.574647,-1.163627


In [None]:
df.head()

Unnamed: 0,A,B,C,D
2020-09-01,-2.104635,-0.136122,-0.310485,-1.404145
2020-09-02,0.66668,-0.160861,-0.041246,-0.709716
2020-09-03,0.816202,0.171767,-1.501401,-0.02587
2020-09-04,0.263428,0.149646,0.003573,0.005116
2020-09-05,0.529207,-0.283964,0.477704,0.123173


In [None]:
df.tail()

Unnamed: 0,A,B,C,D
2020-09-06,2.3596,-0.155144,1.738644,1.732793
2020-09-07,1.258376,0.16359,1.210463,1.849546
2020-09-08,-1.051489,-0.259404,0.104084,0.09346
2020-09-09,-0.302262,-0.621633,0.570294,-0.694591
2020-09-10,0.740744,-0.442086,0.574647,-1.163627


In [None]:
df.index

DatetimeIndex(['2020-09-01', '2020-09-02', '2020-09-03', '2020-09-04',
               '2020-09-05', '2020-09-06', '2020-09-07', '2020-09-08',
               '2020-09-09', '2020-09-10'],
              dtype='datetime64[ns]', freq='D')

In [None]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [None]:
df.to_numpy()

array([[-2.10463521, -0.13612155, -0.31048476, -1.40414537],
       [ 0.66668014, -0.16086088, -0.04124568, -0.70971649],
       [ 0.81620236,  0.17176669, -1.50140145, -0.02586959],
       [ 0.26342782,  0.1496458 ,  0.00357257,  0.00511645],
       [ 0.52920666, -0.28396388,  0.47770427,  0.12317285],
       [ 2.35960042, -0.15514376,  1.73864417,  1.73279267],
       [ 1.2583756 ,  0.16359018,  1.21046282,  1.8495458 ],
       [-1.05148852, -0.25940409,  0.10408384,  0.09346012],
       [-0.30226193, -0.62163252,  0.57029401, -0.69459056],
       [ 0.74074447, -0.44208613,  0.57464688, -1.16362684]])

In [None]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2020-09-01,-1.404145,-0.310485,-0.136122,-2.104635
2020-09-02,-0.709716,-0.041246,-0.160861,0.66668
2020-09-03,-0.02587,-1.501401,0.171767,0.816202
2020-09-04,0.005116,0.003573,0.149646,0.263428
2020-09-05,0.123173,0.477704,-0.283964,0.529207
2020-09-06,1.732793,1.738644,-0.155144,2.3596
2020-09-07,1.849546,1.210463,0.16359,1.258376
2020-09-08,0.09346,0.104084,-0.259404,-1.051489
2020-09-09,-0.694591,0.570294,-0.621633,-0.302262
2020-09-10,-1.163627,0.574647,-0.442086,0.740744


In [None]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2020-09-09,-0.302262,-0.621633,0.570294,-0.694591
2020-09-10,0.740744,-0.442086,0.574647,-1.163627
2020-09-05,0.529207,-0.283964,0.477704,0.123173
2020-09-08,-1.051489,-0.259404,0.104084,0.09346
2020-09-02,0.66668,-0.160861,-0.041246,-0.709716
2020-09-06,2.3596,-0.155144,1.738644,1.732793
2020-09-01,-2.104635,-0.136122,-0.310485,-1.404145
2020-09-04,0.263428,0.149646,0.003573,0.005116
2020-09-07,1.258376,0.16359,1.210463,1.849546
2020-09-03,0.816202,0.171767,-1.501401,-0.02587


In [None]:
df['A']

2020-09-01   -2.104635
2020-09-02    0.666680
2020-09-03    0.816202
2020-09-04    0.263428
2020-09-05    0.529207
2020-09-06    2.359600
2020-09-07    1.258376
2020-09-08   -1.051489
2020-09-09   -0.302262
2020-09-10    0.740744
Freq: D, Name: A, dtype: float64

In [None]:
df[0:3]

Unnamed: 0,A,B,C,D
2020-09-01,-2.104635,-0.136122,-0.310485,-1.404145
2020-09-02,0.66668,-0.160861,-0.041246,-0.709716
2020-09-03,0.816202,0.171767,-1.501401,-0.02587


In [None]:
df.loc['20200902', 'A']

0.6666801429018127

In [None]:
df.loc['20200902', ['A', 'B']]

A    0.666680
B   -0.160861
Name: 2020-09-02 00:00:00, dtype: float64

In [None]:
df.iloc[3]

A    0.263428
B    0.149646
C    0.003573
D    0.005116
Name: 2020-09-04 00:00:00, dtype: float64

In [None]:
df.iloc[1, 1]
df.iat[1, 1]

-0.1608608834139378

In [None]:
df[df['A'] > 0]

Unnamed: 0,A,B,C,D
2020-09-02,0.66668,-0.160861,-0.041246,-0.709716
2020-09-03,0.816202,0.171767,-1.501401,-0.02587
2020-09-04,0.263428,0.149646,0.003573,0.005116
2020-09-05,0.529207,-0.283964,0.477704,0.123173
2020-09-06,2.3596,-0.155144,1.738644,1.732793
2020-09-07,1.258376,0.16359,1.210463,1.849546
2020-09-10,0.740744,-0.442086,0.574647,-1.163627


In [None]:
df[df > 0]

Unnamed: 0,A,B,C,D
2020-09-01,,,,
2020-09-02,0.66668,,,
2020-09-03,0.816202,0.171767,,
2020-09-04,0.263428,0.149646,0.003573,0.005116
2020-09-05,0.529207,,0.477704,0.123173
2020-09-06,2.3596,,1.738644,1.732793
2020-09-07,1.258376,0.16359,1.210463,1.849546
2020-09-08,,,0.104084,0.09346
2020-09-09,,,0.570294,
2020-09-10,0.740744,,0.574647,


In [None]:
#df = pd.read_csv('foo.csv')
#df.to_csv('foo.csv')

In [None]:
from google.colab import files
import io

arquivo = files.upload()
df = pd.read_csv(io.BytesIO(arquivo['teste.csv']))

df

Saving teste.csv to teste.csv


Unnamed: 0,A,B,C
0,D,E,F


## Referências bibliográficas

- [Numpy](https://numpy.org/doc/stable/reference/)
- [Pandas](https://pandas.pydata.org/docs/)
