In [None]:
from modulos import eda
import pandas as pd
import numpy as np
from statsmodels.tsa.seasonal import seasonal_decompose

In [14]:
prec = pd.read_csv('Precipitação/manaus.txt',
                   sep=r'\s+',
                   comment='#',
                   names=['Data', 'Lat', 'Lon', 'value'])
prec

Unnamed: 0,Data,Lat,Lon,value
0,1985-01-01,0.5178,-60.4661,0.000000
1,1985-01-02,0.5178,-60.4661,1.625000
2,1985-01-03,0.5178,-60.4661,0.000000
3,1985-01-04,0.5178,-60.4661,0.000000
4,1985-01-05,0.5178,-60.4661,0.000000
...,...,...,...,...
13144,2020-12-27,0.5178,-60.4661,4.781250
13145,2020-12-28,0.5178,-60.4661,12.757810
13146,2020-12-29,0.5178,-60.4661,44.960940
13147,2020-12-30,0.5178,-60.4661,27.679690


In [15]:
prec['Data'] = pd.to_datetime(prec['Data'])
prec.index = prec['Data']
prec.drop(columns=['Data','Lat', 'Lon'], inplace=True)
prec

Unnamed: 0_level_0,value
Data,Unnamed: 1_level_1
1985-01-01,0.000000
1985-01-02,1.625000
1985-01-03,0.000000
1985-01-04,0.000000
1985-01-05,0.000000
...,...
2020-12-27,4.781250
2020-12-28,12.757810
2020-12-29,44.960940
2020-12-30,27.679690


In [16]:
prec.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 13149 entries, 1985-01-01 to 2020-12-31
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   value   13149 non-null  float64
dtypes: float64(1)
memory usage: 205.5 KB


In [18]:
prec.to_csv('Precipitação/manaus_prec.csv', sep=';')

In [10]:
prec['14990000'] = eda.data_processing('Cotas extraidas',
                                       '1985-01-01',
                                       '2020-12-31',
                                       ['dados_diarios_estacao_14990000.csv'])['14990000']
prec

Unnamed: 0_level_0,value,14990000
Data,Unnamed: 1_level_1,Unnamed: 2_level_1
1985-01-01,0.000000,2169.0
1985-01-02,1.625000,2178.0
1985-01-03,0.000000,2186.0
1985-01-04,0.000000,2194.0
1985-01-05,0.000000,2201.0
...,...,...
2020-12-27,4.781250,2062.0
2020-12-28,12.757810,2072.0
2020-12-29,44.960940,2080.0
2020-12-30,27.679690,2096.0


In [13]:
tgt = '14990000'

decomp = seasonal_decompose(prec['14990000'], model='additive', period=365)
deseason = prec[tgt] - decomp.seasonal

best_lag = None
d_best_lag = None
max_corr = -np.inf
d_max_corr = -np.inf

for i in range(0, 300):
    corr = prec[tgt].corr(prec['value'].shift(i), method='spearman')
    d_corr = deseason.corr(prec['value'].shift(i), method='spearman')

    if corr > max_corr:
        max_corr = corr
        best_lag = i

    if d_corr > d_max_corr:
        d_max_corr = d_corr
        d_best_lag = i
        
print(f"Melhor lag = {best_lag}; Correlação = {max_corr:.4f}")
print(f"D Melhor lag = {d_best_lag}; Correlação = {d_max_corr:.4f}")

Melhor lag = 4; Correlação = 0.3803
D Melhor lag = 51; Correlação = 0.1267
