In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error

In [2]:
wdata = pd.read_excel('../DataPreProcessing/wdata.xlsx' , index_col = 'Date' , parse_dates = True)

In [3]:
from statsmodels.tsa.stattools import adfuller

In [4]:
def ad_test(dataset):
    dftest = adfuller(dataset , autolag = 'bic')
    print('1. ADF', dftest[0])
    print('2. P- Value', dftest[1])
    print('3 Num of Lags ', dftest[2])
    print('4 No. od observations used ', dftest[3])
    print('5 . Critical Values ')
    for key, val in dftest[4].items():
        print('\t', key , ": ", val)

In [5]:
ad_test(wdata['temperature_2m (°C)'])

1. ADF -5.38255676195106
2. P- Value 3.6849750252869432e-06
3 Num of Lags  6
4 No. od observations used  8820
5 . Critical Values 
	 1% :  -3.431091633128763
	 5% :  -2.8618677528979655
	 10% :  -2.5669444578776335


In [6]:
from pmdarima import auto_arima
import warnings
warnings.filterwarnings("ignore")

In [7]:
stepwise_fit = auto_arima(wdata['temperature_2m (°C)'], trace=True , supress_warnings=True)
stepwise_fit.summary()

Performing stepwise search to minimize aic
 ARIMA(2,0,2)(0,0,0)[0] intercept   : AIC=24776.771, Time=4.73 sec
 ARIMA(0,0,0)(0,0,0)[0] intercept   : AIC=57024.008, Time=0.09 sec
 ARIMA(1,0,0)(0,0,0)[0] intercept   : AIC=25078.519, Time=0.26 sec
 ARIMA(0,0,1)(0,0,0)[0] intercept   : AIC=46609.238, Time=0.52 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=79062.962, Time=0.05 sec
 ARIMA(1,0,2)(0,0,0)[0] intercept   : AIC=24954.211, Time=1.71 sec
 ARIMA(2,0,1)(0,0,0)[0] intercept   : AIC=25043.577, Time=3.03 sec
 ARIMA(3,0,2)(0,0,0)[0] intercept   : AIC=24754.079, Time=6.52 sec
 ARIMA(3,0,1)(0,0,0)[0] intercept   : AIC=24760.798, Time=4.02 sec
 ARIMA(4,0,2)(0,0,0)[0] intercept   : AIC=24764.761, Time=3.73 sec
 ARIMA(3,0,3)(0,0,0)[0] intercept   : AIC=24756.905, Time=6.75 sec
 ARIMA(2,0,3)(0,0,0)[0] intercept   : AIC=24764.950, Time=5.75 sec
 ARIMA(4,0,1)(0,0,0)[0] intercept   : AIC=24757.361, Time=4.66 sec
 ARIMA(4,0,3)(0,0,0)[0] intercept   : AIC=24757.634, Time=6.36 sec
 ARIMA(3,0,2)(0,0,0

0,1,2,3
Dep. Variable:,y,No. Observations:,8827.0
Model:,"SARIMAX(3, 0, 2)",Log Likelihood,-12370.04
Date:,"Thu, 14 Mar 2024",AIC,24754.079
Time:,22:07:18,BIC,24803.678
Sample:,01-01-2000,HQIC,24770.973
,- 03-01-2024,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,0.0370,0.009,4.328,0.000,0.020,0.054
ar.L1,2.0031,0.064,31.238,0.000,1.877,2.129
ar.L2,-1.3583,0.116,-11.692,0.000,-1.586,-1.131
ar.L3,0.3534,0.054,6.553,0.000,0.248,0.459
ma.L1,-1.0066,0.066,-15.199,0.000,-1.136,-0.877
ma.L2,0.2273,0.061,3.726,0.000,0.108,0.347
sigma2,0.9651,0.011,85.043,0.000,0.943,0.987

0,1,2,3
Ljung-Box (L1) (Q):,0.04,Jarque-Bera (JB):,1346.62
Prob(Q):,0.83,Prob(JB):,0.0
Heteroskedasticity (H):,1.45,Skew:,-0.44
Prob(H) (two-sided):,0.0,Kurtosis:,4.7


In [8]:
stepwise_fit.order

(3, 0, 2)

In [9]:
 # from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.arima.model import ARIMA

In [10]:
train = pd.read_excel('../DataPreProcessing/ptrain.xlsx' , index_col = 'Date' , parse_dates = True)
test = pd.read_excel('../DataPreProcessing/ptest.xlsx', index_col = 'Date' , parse_dates = True)

In [11]:
arima = ARIMA(train['temperature_2m (°C)'] , order = stepwise_fit.order)
arima = arima.fit()

In [12]:
start = len(train)
end = len(wdata)+1

In [13]:
pred = arima.predict(start = start , end = end , type = 'levels')
pred

2019-03-02    13.454039
2019-03-03    13.420265
2019-03-04    13.203207
2019-03-05    13.015320
2019-03-06    12.916538
                ...    
2024-02-28    19.366866
2024-02-29    19.366869
2024-03-01    19.366872
2024-03-02    19.366875
2024-03-03    19.366878
Freq: D, Name: predicted_mean, Length: 1829, dtype: float64

In [14]:
# from sklearn.metrics import mean_squared_error
# mse_train = mean_squared_error(test['temperature_2m (°C)'], pred)
# print(f'Mean Squared Error: {mse_train}')

In [15]:
arima = ARIMA(wdata['temperature_2m (°C)'] , order = stepwise_fit.order)
arima = arima.fit()

In [16]:

forecast = arima.forecast(steps=365)
forecast

2024-03-02    16.427512
2024-03-03    15.984617
2024-03-04    15.662663
2024-03-05    15.503901
2024-03-06    15.466687
                ...    
2025-02-25    19.456501
2025-02-26    19.460174
2025-02-27    19.463828
2025-02-28    19.467462
2025-03-01    19.471077
Freq: D, Name: predicted_mean, Length: 365, dtype: float64