# Pandas para Análise de Séries Temporais

## Lidando com Tempo

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.pylab
%pylab inline
pylab.rcParams['figure.figsize'] = (10,6)

Populating the interactive namespace from numpy and matplotlib


### Date_range

In [5]:
dti = pd.date_range('2020-01-20', periods=3, freq='2H') # freq determina a frequencia
dti

DatetimeIndex(['2020-01-20 00:00:00', '2020-01-20 02:00:00',
               '2020-01-20 04:00:00'],
              dtype='datetime64[ns]', freq='2H')

#### Outras opções da função date_range: [link](https://pandas.pydata.org/pandas-docs/version/0.22/generated/pandas.date_range.html)

In [17]:
dti = pd.date_range('2020-01-20 16:10', periods=10, freq='D')
dti

dti2 = pd.date_range('2020-01-20 0810', periods=10, freq='M')
dti2

dti3 = pd.date_range('2020-01-20 08pm', periods=5, freq='W')
dti3

DatetimeIndex(['2020-01-26 20:00:00', '2020-02-02 20:00:00',
               '2020-02-09 20:00:00', '2020-02-16 20:00:00',
               '2020-02-23 20:00:00'],
              dtype='datetime64[ns]', freq='W-SUN')

#### Vamos manipular outras opções de date_range

In [None]:
#inserindo data de início e fim
dti = pd.date_range(start = '2020-01-20 16:10', end = '2020-02-20', freq='D')
dti

In [None]:
# quando preencho apenas o parâmetro end, start é definido de modo regressivo
# posso adicionar um valor em freq
dti = pd.date_range(end = '2020-02-20 16:10', periods=10, freq='8H')
dti

### Timestamps e time spans

In [None]:
import datetime
pd.Timestamp(datetime.datetime(2012, 5, 1))

In [None]:
pd.Timestamp('2020-02-10 10')

In [None]:
t = pd.Timestamp('2020-02-10')
print(t.day)
print(t.quarter)
print(t.month)

In [None]:
p = pd.Period('02/2020')
t = pd.Timestamp('02/15/2020')
p.start_time <= t <= p.end_time

In [None]:
p.end_time

### Indexação

In [None]:
rng = pd.date_range('2016 Jul 1', periods = 10, freq = 'D')
ind = pd.Series(range(len(rng)), index = rng)
ind

In [None]:
#mostra o index associado a cada elemento de minha série
ind.index

In [None]:
periods = [pd.Period('2016-01'), pd.Period('2016-02'), pd.Period('2016-03')]
ts = pd.Series(np.random.randn(len(periods)), index = periods)
ts

In [None]:
periods[1].dayofyear

In [None]:
periods

### Leitura de Dados

In [None]:
data = pd.read_fwf("http://www.cpc.ncep.noaa.gov/products/precip/CWlink/daily_ao_index/monthly.ao.index.b50.current.ascii", 
                    infer_datetime_format = True, header = None,)

In [None]:
data.head(13)

#### Há algum parâmetro da função read_fwf que nos ajude a tratar esses dados de forma correta? [documentação](https://pandas.pydata.org/pandas-docs/version/0.22.0/generated/pandas.read_fwf.html)

In [None]:
data = pd.read_fwf("http://www.cpc.ncep.noaa.gov/products/precip/CWlink/daily_ao_index/monthly.ao.index.b50.current.ascii", 
                   parse_dates = [[0, 1]], infer_datetime_format = True, header = None,)
data.head(13)

In [None]:
data.columns = ['month', 'value']
data.index = data.month
data = data.drop('month', 1)
data.head()

In [None]:
dateparse = lambda x,y:pd.datetime.strptime('%s-%s'%(x,y),'%Y-%m')

In [None]:
import timeit

print("infer_datetime_format = True, no date parser")
%timeit pd.read_fwf("http://www.cpc.ncep.noaa.gov/products/precip/CWlink/daily_ao_index/monthly.ao.index.b50.current.ascii", parse_dates = [[0, 1]], infer_datetime_format = True, header = None,)

print("infer_datetime_format = False, no date parser")
%timeit pd.read_fwf("http://www.cpc.ncep.noaa.gov/products/precip/CWlink/daily_ao_index/monthly.ao.index.b50.current.ascii", parse_dates = [[0, 1]], infer_datetime_format = False, header = None,)

print("infer_datetime_format = True, date parser provided")
dateparse = lambda x, y: pd.datetime.strptime('%s-%s'%(x,y), '%Y-%m')
%timeit pd.read_fwf("http://www.cpc.ncep.noaa.gov/products/precip/CWlink/daily_ao_index/monthly.ao.index.b50.current.ascii", parse_dates = [[0, 1]], infer_datetime_format = True, date_parser = dateparse,  header = None,)

print("infer_datetime_format = False, date parser provided")
dateparse = lambda x, y: pd.datetime.strptime('%s-%s'%(x,y), '%Y-%m')
%timeit pd.read_fwf("http://www.cpc.ncep.noaa.gov/products/precip/CWlink/daily_ao_index/monthly.ao.index.b50.current.ascii", parse_dates = [[0, 1]], infer_datetime_format = False, date_parser = dateparse,  header = None,)

In [None]:
df = pd.DataFrame({'Year': [2015, 2016],'month': [2, 3],'day': [4, 5],'hour': [2, 3]})
print(df)
pd.to_datetime(df)

## Reamostragem

In [None]:
rng = pd.date_range('1/1/2011', periods=72, freq='H')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts

In [None]:
converted = ts.asfreq('45Min')
converted

#### Olhe a documentação do método asfreq() e veja como podemos resolver o problema de NaN: [documentação](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.asfreq.html)

In [None]:
converted = ts.asfreq('45Min', method='pad')
converted

## Janela Móvel

### Shift

In [None]:
ts = pd.Series(np.random.randn(20),pd.date_range('02/20/2020',freq='D',periods=20))
ts_lagged = ts.shift()
plt.plot(ts, color='blue')
plt.plot(ts_lagged, color='red')

In [None]:
ts = pd.Series(np.random.randn(20),pd.date_range('02/20/2020',freq='H',periods=20))
ts_lagged = ts.shift(-5)
plt.plot(ts, color='blue')
plt.plot(ts_lagged, color='red')

### Rolling

In [None]:
df = pd.DataFrame(np.random.randn(600, 3), 
                  index = pd.date_range('20/2/2020', 
                                        freq = 'S', periods = 600), 
                  columns = ['A', 'B', 'C'])
print(df.head(),'\n')
r = df.rolling(window = 10)
print(r,'\n')
df['A'].plot(color='grey')
r.mean()['A'].plot(color='red')

## Tendência e Sazonalidade

In [None]:
air_passengers = pd.read_csv("./bases/AirPassengers.csv", header = 0, 
                             parse_dates = [0], names = ['Month', 'Passengers'], 
                             index_col = 0)

In [None]:
air_passengers.head()

In [None]:
air_passengers.plot()

In [None]:
air_passengers.dtypes

In [None]:
import statsmodels.api as sm
decomposition = sm.tsa.seasonal_decompose(air_passengers,model='addictive')
fig = decomposition.plot()