In [1]:
# Manipulación y tratamiento de Datos
import numpy as np
import pandas as pd

# Visualización de datos
import plotly.express as px
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

# Modelación Arima
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf 
from statsmodels.tsa.seasonal import seasonal_decompose 
from statsmodels.tsa.stattools import adfuller

# Métrica de Evaluación
from sklearn.metrics import mean_squared_error
from statsmodels.tools.eval_measures import rmse
from sklearn import metrics

# No presentar advertencias
import warnings
warnings.filterwarnings("ignore")

In [6]:
df = pd.read_csv('precipitacionsbarcelonadesde1786.csv')
df.head()

Unnamed: 0,Any,Precip_Acum_Gener,Precip_Acum_Febrer,Precip_Acum_Marc,Precip_Acum_Abril,Precip_Acum_Maig,Precip_Acum_Juny,Precip_Acum_Juliol,Precip_Acum_Agost,Precip_Acum_Setembre,Precip_Acum_Octubre,Precip_Acum_Novembre,Precip_Acum_Desembre
0,1786,32.8,28.4,84.4,42.3,8.5,38.0,6.8,52.1,62.0,57.0,195.8,114.1
1,1787,136.4,27.4,44.6,76.9,0.0,29.8,34.7,14.9,205.8,12.4,42.2,0.0
2,1788,9.9,14.9,32.2,12.4,7.5,146.3,59.4,29.8,123.9,163.6,106.6,22.3
3,1789,12.4,12.4,23.6,9.9,32.2,9.9,6.3,69.4,18.7,52.1,18.7,76.9
4,1790,44.6,1.2,188.5,74.4,44.7,39.7,65.7,32.3,71.9,205.8,72.0,14.9


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 237 entries, 0 to 236
Data columns (total 13 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Any                   237 non-null    int64  
 1   Precip_Acum_Gener     237 non-null    float64
 2   Precip_Acum_Febrer    237 non-null    float64
 3   Precip_Acum_Marc      237 non-null    float64
 4   Precip_Acum_Abril     237 non-null    float64
 5   Precip_Acum_Maig      237 non-null    float64
 6   Precip_Acum_Juny      237 non-null    float64
 7   Precip_Acum_Juliol    237 non-null    float64
 8   Precip_Acum_Agost     237 non-null    float64
 9   Precip_Acum_Setembre  237 non-null    float64
 10  Precip_Acum_Octubre   237 non-null    float64
 11  Precip_Acum_Novembre  237 non-null    float64
 12  Precip_Acum_Desembre  237 non-null    float64
dtypes: float64(12), int64(1)
memory usage: 24.2 KB


In [8]:
#rename columns
df.rename(columns={'Precip_Acum_Gener': "1",
                   'Precip_Acum_Febrer': "2",
                   'Precip_Acum_Marc': "3",
                   'Precip_Acum_Abril': "4",
                   'Precip_Acum_Maig': "5",
                   'Precip_Acum_Juny': "6",
                   'Precip_Acum_Juliol': "7",
                   'Precip_Acum_Agost': "8",
                   'Precip_Acum_Setembre': "9",
                   'Precip_Acum_Octubre': "10",
                   'Precip_Acum_Novembre': "11",
                   'Precip_Acum_Desembre': "12"},inplace=True)

In [9]:
df.head()

Unnamed: 0,Any,1,2,3,4,5,6,7,8,9,10,11,12
0,1786,32.8,28.4,84.4,42.3,8.5,38.0,6.8,52.1,62.0,57.0,195.8,114.1
1,1787,136.4,27.4,44.6,76.9,0.0,29.8,34.7,14.9,205.8,12.4,42.2,0.0
2,1788,9.9,14.9,32.2,12.4,7.5,146.3,59.4,29.8,123.9,163.6,106.6,22.3
3,1789,12.4,12.4,23.6,9.9,32.2,9.9,6.3,69.4,18.7,52.1,18.7,76.9
4,1790,44.6,1.2,188.5,74.4,44.7,39.7,65.7,32.3,71.9,205.8,72.0,14.9


In [10]:
# Creates a pivot table dataframe
table = df.melt(id_vars=['Any'], value_vars=['1', '2', '3', '4', '5', '6', '7',
       '8', '9', '10', '11', '12'])

In [11]:
table["Data"] = pd.to_datetime(dict(year=table["Any"], month=table["variable"], day="1"))

In [12]:
table.head()

Unnamed: 0,Any,variable,value,Data
0,1786,1,32.8,1786-01-01
1,1787,1,136.4,1787-01-01
2,1788,1,9.9,1788-01-01
3,1789,1,12.4,1789-01-01
4,1790,1,44.6,1790-01-01


In [13]:
df = table.set_index("Data")
df.head()

Unnamed: 0_level_0,Any,variable,value
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1786-01-01,1786,1,32.8
1787-01-01,1787,1,136.4
1788-01-01,1788,1,9.9
1789-01-01,1789,1,12.4
1790-01-01,1790,1,44.6


In [14]:
df.drop(['Any', 'variable'], axis=1)

Unnamed: 0_level_0,value
Data,Unnamed: 1_level_1
1786-01-01,32.8
1787-01-01,136.4
1788-01-01,9.9
1789-01-01,12.4
1790-01-01,44.6
...,...
2018-12-01,4.8
2019-12-01,119.2
2020-12-01,41.5
2021-12-01,3.8


In [15]:
fig = px.line(df, x= df.index, y="value", template = "plotly_dark", title="Precipitaciones en Barcelona desde 1780")
fig.show()

In [17]:
def Prueba_Dickey_Fuller(series , column_name):
    print (f'Resultados de la prueba de Dickey-Fuller para columna: {column_name}')
    dftest = adfuller(series, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','No Lags Used','Número de observaciones utilizadas'])
    for key,value in dftest[4].items():
       dfoutput['Critical Value (%s)'%key] = value
    print (dfoutput)
    if dftest[1] <= 0.05:
        print("Conclusion:====>")
        print("Rechazar la hipótesis nula")
        print("Los datos son estacionarios")
    else:
        print("Conclusion:====>")
        print("No se puede rechazar la hipótesis nula")
        print("Los datos no son estacionarios")

In [18]:
Prueba_Dickey_Fuller(df["value"],"value")

Resultados de la prueba de Dickey-Fuller para columna: value
Test Statistic                          -4.450598
p-value                                  0.000241
No Lags Used                            27.000000
Número de observaciones utilizadas    2816.000000
Critical Value (1%)                     -3.432674
Critical Value (5%)                     -2.862567
Critical Value (10%)                    -2.567317
dtype: float64
Conclusion:====>
Rechazar la hipótesis nula
Los datos son estacionarios
