# DataFrame

In [1]:
import pandas as pd 
import numpy as np

## Criando dataframe

### Criando dataframe com listas/dicioários

In [2]:
s1 = pd.Series([1,2,3,4], name = "C1")
s2 = pd.Series([10,20,30,40], name = "C2")

# Devemos passar como bibliotecas para o convesor funcione
pd.DataFrame({s1.name: s1 ,s2.name: s2}) 

Unnamed: 0,C1,C2
0,1,10
1,2,20
2,3,30
3,4,40


In [3]:
df = pd.DataFrame({s1.name: s1 ,s2.name: s2}) 

In [6]:
lista1 = [1000,2,1,2,21,1,1]
lista2 = [51,12,15,32,15,2,5]  

In [14]:
dic = {"a":lista1, "b":lista1 } 
dic

{'a': [10, 2, 1, 2, 21, 1, 1], 'b': [10, 2, 1, 2, 21, 1, 1]}

In [9]:
lista1[0] = 10

In [15]:
a = pd.DataFrame(dic)  # Criando df a partir de dicionário

Unnamed: 0,a,b
0,10,10
1,2,2
2,1,1
3,2,2
4,21,21
5,1,1
6,1,1


## Seleção de colunas

In [17]:
df["C1"]

0    1
1    2
2    3
3    4
Name: C1, dtype: int64

In [26]:
# Para pegar mais de uma coluna do df é necessário que se passe uma lista
df[["C2","C1"]] 

Unnamed: 0,C2,C1
0,10,1
1,20,2
2,30,3
3,40,4


### A maneira na qual é realizada a consulta no df, pode alterar o tipo da saida dos dados 

In [28]:
type (df["C1"])

pandas.core.series.Series

In [29]:
type (df[["C1"]])

pandas.core.frame.DataFrame

In [43]:
df["C2"][0:4] # sucesso -> tipo series

0    10
1    20
2    30
3    40
Name: C2, dtype: int64

In [40]:
df[["C1"]][3] # Erro -> tipo df 

## Alterando DataFrame

### Adicionando/Deletanto colunas

In [82]:
df["media"] = np.average(df["C2"])
df

Unnamed: 0,C1,C2,Soma,media
0,1,10,11,25.0
1,2,20,22,25.0
2,3,30,33,25.0
3,4,40,44,25.0


In [52]:
df["Soma"] = df["C2"] + df["C1"]
df

Unnamed: 0,C1,C2,media,Soma
0,1,10,25.0,11
1,2,20,25.0,22
2,3,30,25.0,33
3,4,40,25.0,44


In [62]:
df.insert(1,"Inserindo","Pedro Lindo")
df

Unnamed: 0,C1,Inserindo,C2,Soma
0,1,Pedro Lindo,10,11
1,2,Pedro Lindo,20,22
2,3,Pedro Lindo,30,33
3,4,Pedro Lindo,40,44


In [63]:
del df["Inserindo"] # deleta todo conteudo 
df

Unnamed: 0,C1,C2,Soma
0,1,10,11
1,2,20,22
2,3,30,33
3,4,40,44


In [83]:
df.pop("media") # Remove e mostra a chave removida


0    25.0
1    25.0
2    25.0
3    25.0
Name: media, dtype: float64

In [84]:
df

Unnamed: 0,C1,C2,Soma
0,1,10,11
1,2,20,22
2,3,30,33
3,4,40,44


### Renomeando colunas

In [66]:
import os 
from pathlib import Path

In [69]:
p = Path(os.getcwd())
base = pd.read_csv(str(p.parent) + "\\data\\houses_to_rent.csv")

In [74]:
nomes = ["indice", "cidade", "area_m2", "quartos", "banheiros", "vagas_estacionamento", "andar", "aceita_animais", "mobiliado", "hoa","valor_aluguel", "taxas", "seguro_incendio", "total"]
colunas = dict (zip(base.columns.to_list(), nomes))  # zip irá mesclar as duas listas
base.rename(columns=colunas)
base.head()

Unnamed: 0.1,Unnamed: 0,city,area,rooms,bathroom,parking spaces,floor,animal,furniture,hoa,rent amount,property tax,fire insurance,total
0,0,1,240,3,3,4,-,acept,furnished,R$0,"R$8,000","R$1,000",R$121,"R$9,121"
1,1,0,64,2,1,1,10,acept,not furnished,R$540,R$820,R$122,R$11,"R$1,493"
2,2,1,443,5,5,4,3,acept,furnished,"R$4,172","R$7,000","R$1,417",R$89,"R$12,680"
3,3,1,73,2,2,1,12,acept,not furnished,R$700,"R$1,250",R$150,R$16,"R$2,116"
4,4,1,19,1,1,0,-,not acept,not furnished,R$0,"R$1,200",R$41,R$16,"R$1,257"


In [76]:
base.columns = [ x.replace(" ","_") for x in base.columns.to_list()]
base.head()

Unnamed: 0,Unnamed:_0,city,area,rooms,bathroom,parking_spaces,floor,animal,furniture,hoa,rent_amount,property_tax,fire_insurance,total
0,0,1,240,3,3,4,-,acept,furnished,R$0,"R$8,000","R$1,000",R$121,"R$9,121"
1,1,0,64,2,1,1,10,acept,not furnished,R$540,R$820,R$122,R$11,"R$1,493"
2,2,1,443,5,5,4,3,acept,furnished,"R$4,172","R$7,000","R$1,417",R$89,"R$12,680"
3,3,1,73,2,2,1,12,acept,not furnished,R$700,"R$1,250",R$150,R$16,"R$2,116"
4,4,1,19,1,1,0,-,not acept,not furnished,R$0,"R$1,200",R$41,R$16,"R$1,257"


## Propridades do dataframe

In [18]:
df.columns # Vendo colunas

Index(['C1', 'C2'], dtype='object')

In [13]:
(df.index)  # Vendo indices

RangeIndex(start=0, stop=4, step=1)