# Utilizando Filtros

In [1]:
import pandas as pd
import numpy as np

In [2]:
datas = pd.date_range('20180101', periods = 600, freq='D' )

In [3]:
df = pd.DataFrame(np.random.randn(600,5), index = datas, columns = list('ABCDE'))

df

Unnamed: 0,A,B,C,D,E
2018-01-01,1.331227,0.753524,0.620353,-0.979824,-0.094760
2018-01-02,0.180673,-0.307654,-1.239826,0.789386,-1.505147
2018-01-03,0.754885,0.459966,-2.957025,-0.680839,-0.561526
2018-01-04,-0.494527,-0.530957,1.146788,-1.053123,-0.740872
2018-01-05,0.585298,-1.131819,1.442680,-0.952556,-1.460522
...,...,...,...,...,...
2019-08-19,-1.234359,1.140576,0.609088,-0.557427,0.280148
2019-08-20,-1.304491,2.406759,0.064408,-0.385004,-0.177134
2019-08-21,-0.754865,1.027406,0.293772,1.065463,0.555580
2019-08-22,0.022190,-0.403172,1.126334,0.944952,0.983673


In [4]:
# filtrando todos os dados de uma coluna específica

df['A']

2018-01-01    1.331227
2018-01-02    0.180673
2018-01-03    0.754885
2018-01-04   -0.494527
2018-01-05    0.585298
                ...   
2019-08-19   -1.234359
2019-08-20   -1.304491
2019-08-21   -0.754865
2019-08-22    0.022190
2019-08-23    1.097719
Freq: D, Name: A, Length: 600, dtype: float64

In [5]:
# filtrando linhas em específico usando slice

df[1:5]

Unnamed: 0,A,B,C,D,E
2018-01-02,0.180673,-0.307654,-1.239826,0.789386,-1.505147
2018-01-03,0.754885,0.459966,-2.957025,-0.680839,-0.561526
2018-01-04,-0.494527,-0.530957,1.146788,-1.053123,-0.740872
2018-01-05,0.585298,-1.131819,1.44268,-0.952556,-1.460522


In [6]:
# filtrando usando loc

df.loc[:,['B', 'C']]

Unnamed: 0,B,C
2018-01-01,0.753524,0.620353
2018-01-02,-0.307654,-1.239826
2018-01-03,0.459966,-2.957025
2018-01-04,-0.530957,1.146788
2018-01-05,-1.131819,1.442680
...,...,...
2019-08-19,1.140576,0.609088
2019-08-20,2.406759,0.064408
2019-08-21,1.027406,0.293772
2019-08-22,-0.403172,1.126334


In [7]:
# fazendo slice a partir das datas - datas estão como index

df.loc['20180301' : '20180917', ['A', 'E']]

Unnamed: 0,A,E
2018-03-01,1.094344,0.463788
2018-03-02,0.766959,-0.223089
2018-03-03,-0.032734,-0.312982
2018-03-04,-1.230371,-0.103339
2018-03-05,-0.079912,0.635989
...,...,...
2018-09-13,-0.176102,2.038173
2018-09-14,-0.419034,-1.561741
2018-09-15,-1.947235,0.229562
2018-09-16,1.455440,0.017848


In [9]:
# os dados filtrados podem ser gravados em uma nova variável.

In [11]:
# iloc, pesquisando por índices

df.iloc[1]

A    0.180673
B   -0.307654
C   -1.239826
D    0.789386
E   -1.505147
Name: 2018-01-02 00:00:00, dtype: float64

In [12]:
# fazendo um slice pelo iloc

df.iloc[2:4, 1:3]

Unnamed: 0,B,C
2018-01-03,0.459966,-2.957025
2018-01-04,-0.530957,1.146788


In [13]:
# pesquisando linhas específicas

df.iloc[[1,5,6], [0,3]]

Unnamed: 0,A,D
2018-01-02,0.180673,0.789386
2018-01-06,0.553074,0.743951
2018-01-07,0.986629,0.059572


# Lógica Booleana para Filtros

In [14]:
df[df.A > 0]

Unnamed: 0,A,B,C,D,E
2018-01-01,1.331227,0.753524,0.620353,-0.979824,-0.094760
2018-01-02,0.180673,-0.307654,-1.239826,0.789386,-1.505147
2018-01-03,0.754885,0.459966,-2.957025,-0.680839,-0.561526
2018-01-05,0.585298,-1.131819,1.442680,-0.952556,-1.460522
2018-01-06,0.553074,-1.686624,0.426038,0.743951,1.155376
...,...,...,...,...,...
2019-08-14,0.440001,-0.068968,2.253323,1.221070,1.011409
2019-08-17,0.526685,-0.773408,1.094633,2.358872,-1.144268
2019-08-18,1.251447,1.416780,1.428368,-1.182252,-1.640414
2019-08-22,0.022190,-0.403172,1.126334,0.944952,0.983673
