# Manipulação de Dados com Pandas - Python

In [1]:
import pandas as pd
import numpy as np

In [2]:
series = ([7,4,2,np.nan,6,9])
series

[7, 4, 2, nan, 6, 9]

In [3]:
type(series)

list

In [4]:
# obtendo datas

data = pd.date_range('20180101', periods = 6)

In [5]:
data

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-06'],
              dtype='datetime64[ns]', freq='D')

In [9]:
# criando um dataframe com números aleatórios e usando as datas criadas anteiormente

df = pd.DataFrame(np.random.randn(6,4), index = data, columns = list('ABCD'))

# randn(6,4), ou seja, chamamos 6 linnhas e 4 colunas com números random, colocando como index a data que criamos anteriormente

In [8]:
df

Unnamed: 0,A,B,C,D
2018-01-01,-1.083591,-0.849522,0.439021,-1.771952
2018-01-02,0.72209,0.645839,0.602345,-0.248874
2018-01-03,-0.505782,0.189015,0.014541,-0.841502
2018-01-04,0.950462,0.365604,-0.930167,0.470656
2018-01-05,-0.263711,0.208123,-0.258834,-0.161585
2018-01-06,0.488903,-1.00988,2.07923,-0.484692


In [10]:
# vendo o tipo

type(df)

pandas.core.frame.DataFrame

In [11]:
#outra forma de criar um dataframe

df2 = pd.DataFrame({'A': 7,
                   'B': pd.Timestamp('20190101'),
                   'C': pd.Series(1, index=list(range(4)), dtype='float32'),
                   'D': np.array([3] * 4, dtype='int32'),
                   'E': pd.Categorical(['test', 'train', 'test', 'train']),
                   'F': 'Python'})

In [12]:
df2

Unnamed: 0,A,B,C,D,E,F
0,7,2019-01-01,1.0,3,test,Python
1,7,2019-01-01,1.0,3,train,Python
2,7,2019-01-01,1.0,3,test,Python
3,7,2019-01-01,1.0,3,train,Python


In [14]:
df2.dtypes

A             int64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

## Criando um Terceiro DataFrame

In [18]:
data = pd.date_range('20190101', periods = 60, freq='D')

df = pd.DataFrame(np.random.randn(60,5), index = data, columns = list('ABCDE'))

In [19]:
df

Unnamed: 0,A,B,C,D,E
2019-01-01,1.279378,1.714901,-2.74828,0.563366,-0.781734
2019-01-02,-1.923841,-0.116341,-0.753045,0.921399,-0.685622
2019-01-03,0.819105,1.664087,0.76896,1.256874,0.197748
2019-01-04,0.092787,-0.525227,0.244183,1.970564,0.938943
2019-01-05,0.341919,-0.715365,1.363344,1.447022,1.215355
2019-01-06,1.22949,-0.653691,-1.671514,0.340238,0.98158
2019-01-07,-0.632821,1.608482,-0.648588,1.190914,0.240317
2019-01-08,-1.916005,0.089007,1.035161,1.117194,0.367738
2019-01-09,0.196397,-1.427208,-0.196197,-1.891822,0.493924
2019-01-10,0.937249,-0.662815,-1.231659,-1.109656,0.475241


In [20]:
# vendo a dimensão do dataframe

df.shape

(60, 5)

## Inserindo uma Nova Coluna

In [21]:
df['F'] = 1

In [22]:
df.head(10)

Unnamed: 0,A,B,C,D,E,F
2019-01-01,1.279378,1.714901,-2.74828,0.563366,-0.781734,1
2019-01-02,-1.923841,-0.116341,-0.753045,0.921399,-0.685622,1
2019-01-03,0.819105,1.664087,0.76896,1.256874,0.197748,1
2019-01-04,0.092787,-0.525227,0.244183,1.970564,0.938943,1
2019-01-05,0.341919,-0.715365,1.363344,1.447022,1.215355,1
2019-01-06,1.22949,-0.653691,-1.671514,0.340238,0.98158,1
2019-01-07,-0.632821,1.608482,-0.648588,1.190914,0.240317,1
2019-01-08,-1.916005,0.089007,1.035161,1.117194,0.367738,1
2019-01-09,0.196397,-1.427208,-0.196197,-1.891822,0.493924,1
2019-01-10,0.937249,-0.662815,-1.231659,-1.109656,0.475241,1


In [23]:
# inserindo uma coluna com os valores seguindo uma sequência

df['G'] = range(60)

In [24]:
df.head(15)

Unnamed: 0,A,B,C,D,E,F,G
2019-01-01,1.279378,1.714901,-2.74828,0.563366,-0.781734,1,0
2019-01-02,-1.923841,-0.116341,-0.753045,0.921399,-0.685622,1,1
2019-01-03,0.819105,1.664087,0.76896,1.256874,0.197748,1,2
2019-01-04,0.092787,-0.525227,0.244183,1.970564,0.938943,1,3
2019-01-05,0.341919,-0.715365,1.363344,1.447022,1.215355,1,4
2019-01-06,1.22949,-0.653691,-1.671514,0.340238,0.98158,1,5
2019-01-07,-0.632821,1.608482,-0.648588,1.190914,0.240317,1,6
2019-01-08,-1.916005,0.089007,1.035161,1.117194,0.367738,1,7
2019-01-09,0.196397,-1.427208,-0.196197,-1.891822,0.493924,1,8
2019-01-10,0.937249,-0.662815,-1.231659,-1.109656,0.475241,1,9


In [25]:
# criando uma coluna 'produto'

df['Produto'] = df['A'] * df['B']

In [26]:
df.head(10)

Unnamed: 0,A,B,C,D,E,F,G,Produto
2019-01-01,1.279378,1.714901,-2.74828,0.563366,-0.781734,1,0,2.194007
2019-01-02,-1.923841,-0.116341,-0.753045,0.921399,-0.685622,1,1,0.223822
2019-01-03,0.819105,1.664087,0.76896,1.256874,0.197748,1,2,1.363062
2019-01-04,0.092787,-0.525227,0.244183,1.970564,0.938943,1,3,-0.048734
2019-01-05,0.341919,-0.715365,1.363344,1.447022,1.215355,1,4,-0.244597
2019-01-06,1.22949,-0.653691,-1.671514,0.340238,0.98158,1,5,-0.803707
2019-01-07,-0.632821,1.608482,-0.648588,1.190914,0.240317,1,6,-1.017881
2019-01-08,-1.916005,0.089007,1.035161,1.117194,0.367738,1,7,-0.170539
2019-01-09,0.196397,-1.427208,-0.196197,-1.891822,0.493924,1,8,-0.2803
2019-01-10,0.937249,-0.662815,-1.231659,-1.109656,0.475241,1,9,-0.621223
