In [1]:
# https://pandas.pydata.org/pandas-docs/stable/getting_started/10min.html
import numpy as np
import pandas as pd

In [21]:
# criando uma Serie a partir de uma lista
s = pd.Series([1, 3, np.nan, 6, 8])

s

0    1.0
1    3.0
2    NaN
3    6.0
4    8.0
dtype: float64

In [32]:
# Criando um DataFrame usando um DaterimeIndex de data como índice
dates = pd.date_range('20130101', periods=6)

df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))

df

Unnamed: 0,A,B,C,D
2013-01-01,-0.610157,0.783765,-0.16931,-0.194487
2013-01-02,-0.319472,0.245782,0.013333,-1.13453
2013-01-03,-1.124912,0.798506,0.066348,-0.979606
2013-01-04,0.602212,1.596993,-0.863304,-0.035543
2013-01-05,-1.041611,0.650871,-1.037133,1.030592
2013-01-06,-1.16335,0.84517,-0.662014,1.115894


In [33]:
# Criando um Dataframe passand um dicionários de objetos 

df2 = pd.DataFrame({'A': 1.,
                    'B': pd.Timestamp('20130102'),
                    'C': pd.Series(1, index=list(range(4)), dtype='float32'),
                    'D': np.array([3] * 4, dtype='int32'),
                    'E': pd.Categorical(["test", "train", "test", "train"]),
                    'F': 'foo'})

print(df2)
print(df2.dtypes)

     A          B    C  D      E    F
0  1.0 2013-01-02  1.0  3   test  foo
1  1.0 2013-01-02  1.0  3  train  foo
2  1.0 2013-01-02  1.0  3   test  foo
3  1.0 2013-01-02  1.0  3  train  foo
A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object


In [35]:
# analisando o Dataframe
df.head()


Unnamed: 0,A,B,C,D
2013-01-01,-0.610157,0.783765,-0.16931,-0.194487
2013-01-02,-0.319472,0.245782,0.013333,-1.13453
2013-01-03,-1.124912,0.798506,0.066348,-0.979606
2013-01-04,0.602212,1.596993,-0.863304,-0.035543
2013-01-05,-1.041611,0.650871,-1.037133,1.030592


In [17]:
df.tail()

Unnamed: 0,0,1,2,3
2013-01-02,0.076058,-0.1445,1.236635,-0.09565
2013-01-03,-0.957509,-1.042603,0.254863,0.472007
2013-01-04,-0.205166,0.654048,0.963803,1.33864
2013-01-05,0.142613,0.115958,-1.534397,-2.201779
2013-01-06,0.36701,1.880381,0.75902,0.551983


In [36]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [37]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [38]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.609549,0.820181,-0.442013,-0.032947
std,0.679924,0.439194,0.473327,0.957779
min,-1.16335,0.245782,-1.037133,-1.13453
25%,-1.104087,0.684094,-0.812981,-0.783326
50%,-0.825884,0.791136,-0.415662,-0.115015
75%,-0.392144,0.833504,-0.032328,0.764058
max,0.602212,1.596993,0.066348,1.115894


In [39]:
# Transpondo o DataFrame
df.T

Unnamed: 0,2013-01-01 00:00:00,2013-01-02 00:00:00,2013-01-03 00:00:00,2013-01-04 00:00:00,2013-01-05 00:00:00,2013-01-06 00:00:00
A,-0.610157,-0.319472,-1.124912,0.602212,-1.041611,-1.16335
B,0.783765,0.245782,0.798506,1.596993,0.650871,0.84517
C,-0.16931,0.013333,0.066348,-0.863304,-1.037133,-0.662014
D,-0.194487,-1.13453,-0.979606,-0.035543,1.030592,1.115894


In [40]:
# Ordenando por índice
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,-0.194487,-0.16931,0.783765,-0.610157
2013-01-02,-1.13453,0.013333,0.245782,-0.319472
2013-01-03,-0.979606,0.066348,0.798506,-1.124912
2013-01-04,-0.035543,-0.863304,1.596993,0.602212
2013-01-05,1.030592,-1.037133,0.650871,-1.041611
2013-01-06,1.115894,-0.662014,0.84517,-1.16335


In [42]:
# Ordenando por valor
df.sort_values(by='A')

Unnamed: 0,A,B,C,D
2013-01-06,-1.16335,0.84517,-0.662014,1.115894
2013-01-03,-1.124912,0.798506,0.066348,-0.979606
2013-01-05,-1.041611,0.650871,-1.037133,1.030592
2013-01-01,-0.610157,0.783765,-0.16931,-0.194487
2013-01-02,-0.319472,0.245782,0.013333,-1.13453
2013-01-04,0.602212,1.596993,-0.863304,-0.035543


In [49]:
# Selecionando uma única coluna, que gera uma Series
df['A']

2013-01-01   -0.610157
2013-01-02   -0.319472
2013-01-03   -1.124912
2013-01-04    0.602212
2013-01-05   -1.041611
2013-01-06   -1.163350
Freq: D, Name: A, dtype: float64

In [50]:
# Selecionando uma porção de linhas
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,-0.610157,0.783765,-0.16931,-0.194487
2013-01-02,-0.319472,0.245782,0.013333,-1.13453
2013-01-03,-1.124912,0.798506,0.066348,-0.979606


In [45]:
df['20130102':'20130104']

Unnamed: 0,A,B,C,D
2013-01-02,-0.319472,0.245782,0.013333,-1.13453
2013-01-03,-1.124912,0.798506,0.066348,-0.979606
2013-01-04,0.602212,1.596993,-0.863304,-0.035543


In [46]:

df.loc[dates[0]]

A   -0.610157
B    0.783765
C   -0.169310
D   -0.194487
Name: 2013-01-01 00:00:00, dtype: float64

In [47]:
df.loc[:,['A', 'B']]

Unnamed: 0,A,B
2013-01-01,-0.610157,0.783765
2013-01-02,-0.319472,0.245782
2013-01-03,-1.124912,0.798506
2013-01-04,0.602212,1.596993
2013-01-05,-1.041611,0.650871
2013-01-06,-1.16335,0.84517


In [48]:
df.loc['20130102':'20130104', ['A', 'B']]

Unnamed: 0,A,B
2013-01-02,-0.319472,0.245782
2013-01-03,-1.124912,0.798506
2013-01-04,0.602212,1.596993


In [51]:
df.loc['20130102', ['A', 'B']]

A   -0.319472
B    0.245782
Name: 2013-01-02 00:00:00, dtype: float64

In [52]:
df.loc[dates[0], 'A']

-0.610156740843961

In [54]:
df.at[dates[0], 'A']

-0.610156740843961