In [1]:
import pandas as pd

# Criando um data frame

In [5]:
data = [1, 2, 3, 4, 5]
index = ['Linha' + str(i) for i in range(5)]
s = pd.Series(data = data, index = index)
s

Linha0    1
Linha1    2
Linha2    3
Linha3    4
Linha4    5
dtype: int64

## Pode ser por meio de um dicionário

In [7]:
data = {'Linha' + str(i) : i + 1 for i in range(5) }
s = pd.Series(data)
s

Linha0    1
Linha1    2
Linha2    3
Linha3    4
Linha4    5
dtype: int64

#  Data frame é um conjunto de 'series' e é possível fazer operações nos objetos do tupo 'series'

In [9]:
s + 2

Linha0    3
Linha1    4
Linha2    5
Linha3    6
Linha4    7
dtype: int64

# Outro exemplo de criar df

In [11]:
data = [[1, 2, 3], 
        [4, 5, 6], 
        [7, 8, 9]]
index = ['Linha' + str(i) for i in range(3)]
df1 = pd.DataFrame(data = data, index = index)
df1

Unnamed: 0,0,1,2
Linha0,1,2,3
Linha1,4,5,6
Linha2,7,8,9


In [12]:
columns = ['Coluna' + str(i) for i in range(3)]
df2 = pd.DataFrame(data = data, index = index, columns = columns)
df2

Unnamed: 0,Coluna0,Coluna1,Coluna2
Linha0,1,2,3
Linha1,4,5,6
Linha2,7,8,9


In [16]:
df3 = pd.DataFrame(data = data, index = index, columns = columns)
df3

Unnamed: 0,Coluna0,Coluna1,Coluna2
Linha0,1,2,3
Linha1,4,5,6
Linha2,7,8,9


# Fazendo alterações

In [18]:
df1[df1 > 0] = 'A'
df1

Unnamed: 0,0,1,2
Linha0,A,A,A
Linha1,A,A,A
Linha2,A,A,A


In [19]:
df2[df2 > 0] = 'B'
df2

Unnamed: 0,Coluna0,Coluna1,Coluna2
Linha0,B,B,B
Linha1,B,B,B
Linha2,B,B,B


# Concatenando

In [20]:
df4 = pd.concat([df1, df2, df3])
df4

  index = _union_indexes(indexes, sort=sort)


Unnamed: 0,0,1,2,Coluna0,Coluna1,Coluna2
Linha0,A,A,A,,,
Linha1,A,A,A,,,
Linha2,A,A,A,,,
Linha0,,,,B,B,B
Linha1,,,,B,B,B
Linha2,,,,B,B,B
Linha0,,,,1,2,3
Linha1,,,,4,5,6
Linha2,,,,7,8,9


In [21]:
df4 = pd.concat([df1, df2, df3], axis = 1)
df4

Unnamed: 0,0,1,2,Coluna0,Coluna1,Coluna2,Coluna0.1,Coluna1.1,Coluna2.1
Linha0,A,A,A,B,B,B,1,2,3
Linha1,A,A,A,B,B,B,4,5,6
Linha2,A,A,A,B,B,B,7,8,9


# Criando seleções 

In [22]:
numeros = [i for i in range(11)]
letras = [chr(i + 65) for i in range(11)]
nome_coluna = ['N']

df = pd.DataFrame(data = numeros, index = letras, columns = nome_coluna)
df.head()

Unnamed: 0,N
A,0
B,1
C,2
D,3
E,4


In [23]:
selecao = df['N'].isin([i for i in range(11) if i % 2 == 0])
df = df[selecao]
df

Unnamed: 0,N
A,0
C,2
E,4
G,6
I,8
K,10


# Organizando data frames

In [30]:
data = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
list('321')
df = pd.DataFrame(data, list('321'), list('ZYX'))
df

Unnamed: 0,Z,Y,X
3,1,2,3
2,4,5,6
1,7,8,9


In [31]:
df.sort_index(inplace = True)
df

Unnamed: 0,Z,Y,X
1,7,8,9
2,4,5,6
3,1,2,3


In [33]:
df.sort_index(inplace = True, axis = 1)
df

Unnamed: 0,X,Y,Z
1,9,8,7
2,6,5,4
3,3,2,1


In [36]:
df.sort_values(by = 'X', inplace = True)
df

Unnamed: 0,X,Y,Z
3,3,2,1
2,6,5,4
1,9,8,7


In [39]:
df.sort_values(by = ['X','Y'], inplace = True)
df

Unnamed: 0,X,Y,Z
3,3,2,1
2,6,5,4
1,9,8,7


In [40]:
df.sort_values(by = '3', axis = 1, inplace = True)
df

Unnamed: 0,Z,Y,X
3,1,2,3
2,4,5,6
1,7,8,9


# Seleções

In [43]:
data = [(1, 2, 3, 4),
        (5, 6, 7, 8),
        (8, 10, 11, 12),
        (13, 14, 15, 16)]
df = pd.DataFrame(data, 'l1 l2 l3 l4'.split(), 'c1 c2 c3 c4'.split())
df

Unnamed: 0,c1,c2,c3,c4
l1,1,2,3,4
l2,5,6,7,8
l3,8,10,11,12
l4,13,14,15,16


In [44]:
df['c1']

l1     1
l2     5
l3     8
l4    13
Name: c1, dtype: int64

In [46]:
df[['c3','c1']]

Unnamed: 0,c3,c1
l1,3,1
l2,7,5
l3,11,8
l4,15,13


In [47]:
df[:]

Unnamed: 0,c1,c2,c3,c4
l1,1,2,3,4
l2,5,6,7,8
l3,8,10,11,12
l4,13,14,15,16


In [48]:
df[1:3]

Unnamed: 0,c1,c2,c3,c4
l2,5,6,7,8
l3,8,10,11,12


In [49]:
df[1:][['c3', 'c1']]

Unnamed: 0,c3,c1
l2,7,5
l3,11,8
l4,15,13


In [51]:
df.loc['l3']

c1     8
c2    10
c3    11
c4    12
Name: l3, dtype: int64

In [52]:
df.loc[['l3', 'l2']]

Unnamed: 0,c1,c2,c3,c4
l3,8,10,11,12
l2,5,6,7,8


In [55]:
df.iloc[0,1]

2

In [57]:
df.iloc[[2,0],[3,0]]

Unnamed: 0,c4,c1
l3,12,8
l1,4,1


# OR ( | )
# AND ( & )

In [60]:
data = [0.5, None, None, 0.52, 0.54, None, None, 0.59, 0.6, None, 0.7]
s = pd.Series(data)
s

0     0.50
1      NaN
2      NaN
3     0.52
4     0.54
5      NaN
6      NaN
7     0.59
8     0.60
9      NaN
10    0.70
dtype: float64

In [61]:
s.fillna(0)

0     0.50
1     0.00
2     0.00
3     0.52
4     0.54
5     0.00
6     0.00
7     0.59
8     0.60
9     0.00
10    0.70
dtype: float64

In [62]:
s.fillna(method = 'ffill')

0     0.50
1     0.50
2     0.50
3     0.52
4     0.54
5     0.54
6     0.54
7     0.59
8     0.60
9     0.60
10    0.70
dtype: float64

In [64]:
s.fillna(method = 'bfill')

0     0.50
1     0.52
2     0.52
3     0.52
4     0.54
5     0.59
6     0.59
7     0.59
8     0.60
9     0.70
10    0.70
dtype: float64

In [65]:
s.fillna(s.mean())

0     0.500
1     0.575
2     0.575
3     0.520
4     0.540
5     0.575
6     0.575
7     0.590
8     0.600
9     0.575
10    0.700
dtype: float64

In [66]:
s.fillna(method = 'ffill', limit = 1)

0     0.50
1     0.50
2      NaN
3     0.52
4     0.54
5     0.54
6      NaN
7     0.59
8     0.60
9     0.60
10    0.70
dtype: float64

# Contadores

In [2]:
s = pd.Series(list('asdadeadesdasesda'))
s

0     a
1     s
2     d
3     a
4     d
5     e
6     a
7     d
8     e
9     s
10    d
11    a
12    s
13    e
14    s
15    d
16    a
dtype: object

In [3]:
s.unique()

array(['a', 's', 'd', 'e'], dtype=object)

In [4]:
s.value_counts()

d    5
a    5
s    4
e    3
dtype: int64

In [15]:
m1 = 'CCcCCccCCCccCcCccCcCcCCCcCCcccCCcCcCcCcccCCcCcccCc'
m2 = 'CCCCCccCccCcCCCCccCccccCccCccCCcCccCcCcCCcCccCccCc'
m3 = 'CccCCccCcCCCCCCCCCCcccCccCCCCCCccCCCcccCCCcCCcccCC'
m4 = 'cCCccCCccCCccCCccccCcCcCcCcCcCcCCCCccccCCCcCCcCCCC'
m5 = 'CCCcCcCcCcCCCcCCcCcCCccCcCCcccCccCCcCcCcCcCcccccCc'

In [19]:
eventos = {'m1': list(m1), 
           'm2': list(m2), 
           'm3': list(m3), 
           'm4': list(m4), 
           'm5': list(m5)}
moedas = pd.DataFrame(eventos)
df = pd.DataFrame(data = ['Cara', 'Coroa'], index = ['c', 'C'], columns = ['Faces'])
print(df)
for item in moedas:
    df = pd.concat([df, moedas[item].value_counts()], axis = 1)


   Faces
c   Cara
C  Coroa


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  # Remove the CWD from sys.path while we load stuff.


In [20]:
precos = pd.DataFrame([['Feira', 'Cebola', 2.5], 
                        ['Mercado', 'Cebola', 1.99], 
                        ['Supermercado', 'Cebola', 1.69], 
                        ['Feira', 'Tomate', 4], 
                        ['Mercado', 'Tomate', 3.29], 
                        ['Supermercado', 'Tomate', 2.99], 
                        ['Feira', 'Batata', 4.2], 
                        ['Mercado', 'Batata', 3.99], 
                        ['Supermercado', 'Batata', 3.69]], 
                        columns = ['Local', 'Produto', 'Preço'])
precos

Unnamed: 0,Local,Produto,Preço
0,Feira,Cebola,2.5
1,Mercado,Cebola,1.99
2,Supermercado,Cebola,1.69
3,Feira,Tomate,4.0
4,Mercado,Tomate,3.29
5,Supermercado,Tomate,2.99
6,Feira,Batata,4.2
7,Mercado,Batata,3.99
8,Supermercado,Batata,3.69


In [21]:
precos.describe().round(2)

Unnamed: 0,Preço
count,9.0
mean,3.15
std,0.92
min,1.69
25%,2.5
50%,3.29
75%,3.99
max,4.2


In [22]:
precos['a'] = precos['Preço']+1
precos.describe()

Unnamed: 0,Preço,a
count,9.0,9.0
mean,3.148889,4.148889
std,0.919721,0.919721
min,1.69,2.69
25%,2.5,3.5
50%,3.29,4.29
75%,3.99,4.99
max,4.2,5.2
