In [1]:
import numpy as np
import pandas as pd

## Series

In [2]:
labels = ['a', 'b', 'c']

In [4]:
my_list = [10, 20, 30]
arr = np.array([10,20,30])
d = {'a':10, 'b':20, 'c':30}

In [5]:
pd.Series(data=my_list, index=labels)

a    10
b    20
c    30
dtype: int64

## DataFrame

In [6]:
np.random.seed(101)

In [8]:
df = pd.DataFrame(data=np.random.randn(5,4), index='A B C D E'.split(), columns='W X Y Z'.split())

In [10]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [12]:
df['W']

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [13]:
df['new'] = df['W'] + df['Y']

In [14]:
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.614819
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542
E,0.190794,1.978757,2.605967,0.683509,2.796762


In [15]:
df.drop('new', axis=1, inplace=True)

In [16]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [17]:
df.loc['A', 'W']

2.706849839399938

In [18]:
df.iloc[0,0]

2.706849839399938

In [19]:
bol = df > 0

In [20]:
df[bol]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [21]:
df[df['W']>0]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [22]:
df[df['W'] > 0]['Y']

A    0.907969
B   -0.848077
D   -0.933237
E    2.605967
Name: Y, dtype: float64

In [24]:
df[(df['W'] >0) & (df['Y'] >1)]

Unnamed: 0,W,X,Y,Z
E,0.190794,1.978757,2.605967,0.683509


In [25]:
df.reset_index()

Unnamed: 0,index,W,X,Y,Z
0,A,2.70685,0.628133,0.907969,0.503826
1,B,0.651118,-0.319318,-0.848077,0.605965
2,C,-2.018168,0.740122,0.528813,-0.589001
3,D,0.188695,-0.758872,-0.933237,0.955057
4,E,0.190794,1.978757,2.605967,0.683509


In [26]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [27]:
df['Estado'] = 'RS RJ SP AM SC'.split()

In [28]:
df

Unnamed: 0,W,X,Y,Z,Estado
A,2.70685,0.628133,0.907969,0.503826,RS
B,0.651118,-0.319318,-0.848077,0.605965,RJ
C,-2.018168,0.740122,0.528813,-0.589001,SP
D,0.188695,-0.758872,-0.933237,0.955057,AM
E,0.190794,1.978757,2.605967,0.683509,SC


In [29]:
df.set_index('Estado', inplace=True)

In [30]:
df

Unnamed: 0_level_0,W,X,Y,Z
Estado,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RS,2.70685,0.628133,0.907969,0.503826
RJ,0.651118,-0.319318,-0.848077,0.605965
SP,-2.018168,0.740122,0.528813,-0.589001
AM,0.188695,-0.758872,-0.933237,0.955057
SC,0.190794,1.978757,2.605967,0.683509


In [31]:
outside = 'G1 G1 G1 G2 G2 G2'.split()

In [32]:
inside = [1, 2, 3, 1, 2, 3]

In [33]:
hier_index = list(zip(outside, inside))

In [34]:
hier_index = pd.MultiIndex.from_tuples(hier_index)

In [36]:
hier_index

MultiIndex([('G1', 1),
            ('G1', 2),
            ('G1', 3),
            ('G2', 1),
            ('G2', 2),
            ('G2', 3)],
           )

In [38]:
df = pd.DataFrame(np.random.randn(6, 2), index=hier_index, columns=['A', 'B'])

In [39]:
df

Unnamed: 0,Unnamed: 1,A,B
G1,1,0.302665,1.693723
G1,2,-1.706086,-1.159119
G1,3,-0.134841,0.390528
G2,1,0.166905,0.184502
G2,2,0.807706,0.07296
G2,3,0.638787,0.329646


In [40]:
df.loc['G1']

Unnamed: 0,A,B
1,0.302665,1.693723
2,-1.706086,-1.159119
3,-0.134841,0.390528


In [43]:
df.loc['G1'].loc[1]

A    0.302665
B    1.693723
Name: 1, dtype: float64

In [46]:
df.index.names = ['Grupo', 'Numero']

In [47]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Grupo,Numero,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,0.302665,1.693723
G1,2,-1.706086,-1.159119
G1,3,-0.134841,0.390528
G2,1,0.166905,0.184502
G2,2,0.807706,0.07296
G2,3,0.638787,0.329646


In [51]:
df.xs(1, level='Numero')

Unnamed: 0_level_0,A,B
Grupo,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,0.302665,1.693723
G2,0.166905,0.184502


## Dados ausentes

In [52]:
d = {'A': [1, 2, np.nan], 'B': [5, np.nan, np.nan], 'C': [1,2,3]}

In [53]:
df = pd.DataFrame(d)

In [55]:
df

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,,2
2,,,3


In [56]:
df.dropna(thresh=2)

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,,2


In [58]:
df.fillna(value='OI')

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,OI,2
2,OI,OI,3


In [59]:
df['A'].fillna(method='ffill')

0    1.0
1    2.0
2    2.0
Name: A, dtype: float64

In [60]:
# Pode ser usado operações de conjuntos como em SQL

## Operações

In [14]:
import pandas as pd
df = pd.DataFrame({'col1': [1,2,3,4], 'col2': [444,555,666,444], 'col3': ['abc','def', 'ghi', 'xyz'] })

In [3]:
df

Unnamed: 0,col1,col2,col3
0,1,444,abc
1,2,555,def
2,3,666,ghi
3,4,444,xyz


In [4]:
df['col2'].unique()

array([444, 555, 666])

In [5]:
df['col2'].nunique()

3

In [6]:
df['col2'].value_counts()

444    2
555    1
666    1
Name: col2, dtype: int64

In [8]:
df[(df['col1'] > 2) & (df['col2'] == 444)]

Unnamed: 0,col1,col2,col3
3,4,444,xyz


In [9]:
def fn(x):
    return x*2

In [10]:
df['col1'].apply(fn)

0    2
1    4
2    6
3    8
Name: col1, dtype: int64

In [11]:
del df['col2']

In [12]:
df

Unnamed: 0,col1,col3
0,1,abc
1,2,def
2,3,ghi
3,4,xyz


In [13]:
df.index

RangeIndex(start=0, stop=4, step=1)

In [15]:
df.sort_values(by='col2')

Unnamed: 0,col1,col2,col3
0,1,444,abc
3,4,444,xyz
1,2,555,def
2,3,666,ghi
