In [5]:
import pandas as pd
from statsmodels.tsa.statespace.sarimax import SARIMAX

# Define the training data
# `train_data` should be a pandas DataFrame containing your time series and predictors
# `meantemp` is the dependent variable, and others are independent variables
train_data = pd.read_csv("/Users/gufeng/2024_Fall/dasc6510/Final report for time series/data/train_data.csv")

# Define the exogenous variables (independent variables)
exogenous_vars = train_data[['humidity', 'wind_speed', 'meanpressure', 'time', 
                             'season_Autumn', 'season_Spring', 'season_Summer']]

# add an intercept
exogenous_vars["Intercept"] = 1.0

# Fit the SARIMA model with exogenous variables
sarima_model = SARIMAX(
    train_data['meantemp'],  # Dependent variable
    exog=exogenous_vars,     # Exogenous variables
    order=(1, 0, 2),        # (p, d, q) for SARIMA
    seasonal_order=(0, 0, 0, 365),  # (P, D, Q, s) for seasonal SARIMA
    enforce_stationarity=False,  # Relax stationarity constraints if needed
    enforce_invertibility=False  # Relax invertibility constraints if needed
).fit(disp=False)

# Print the summary of the SARIMA model
print(sarima_model.summary())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  exogenous_vars["Intercept"] = 1.0


                               SARIMAX Results                                
Dep. Variable:               meantemp   No. Observations:                 1462
Model:               SARIMAX(1, 0, 2)   Log Likelihood               -2972.230
Date:                Sun, 01 Dec 2024   AIC                           5968.460
Time:                        00:48:16   BIC                           6031.886
Sample:                             0   HQIC                          5992.121
                               - 1462                                         
Covariance Type:                  opg                                         
                    coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------
humidity         -0.1270      0.006    -20.678      0.000      -0.139      -0.115
wind_speed       -0.0525      0.012     -4.286      0.000      -0.076      -0.028
meanpressure     -0.3886      0.010    -

In [2]:
train_data

Unnamed: 0.1,Unnamed: 0,date,meantemp,humidity,wind_speed,meanpressure,season,season_Autumn,season_Spring,season_Summer,time
0,0,2013-01-01,10.000000,84.500000,0.000000,1015.666667,Winter,0.0,0.0,0.0,1
1,1,2013-01-02,7.400000,92.000000,2.980000,1017.800000,Winter,0.0,0.0,0.0,2
2,2,2013-01-03,7.166667,87.000000,4.633333,1018.666667,Winter,0.0,0.0,0.0,3
3,3,2013-01-04,8.666667,71.333333,1.233333,1017.166667,Winter,0.0,0.0,0.0,4
4,4,2013-01-05,6.000000,86.833333,3.700000,1016.500000,Winter,0.0,0.0,0.0,5
...,...,...,...,...,...,...,...,...,...,...,...
1457,1457,2016-12-28,17.217391,68.043478,3.547826,1015.565217,Winter,0.0,0.0,0.0,1458
1458,1458,2016-12-29,15.238095,87.857143,6.000000,1016.904762,Winter,0.0,0.0,0.0,1459
1459,1459,2016-12-30,14.095238,89.666667,6.266667,1017.904762,Winter,0.0,0.0,0.0,1460
1460,1460,2016-12-31,15.052632,87.000000,7.325000,1016.100000,Winter,0.0,0.0,0.0,1461


In [15]:
from itertools import product

# Define parameter ranges
p = d = q = range(0, 3)
P = D = Q = range(0, 3)
s = 365

# Create all parameter combinations
param_combinations = list(product(p, d, q))
seasonal_combinations = list(product(P, D, Q))

# Iterate over combinations
best_model = None
best_aic = float("inf")

for params in param_combinations:
    for seasonal_params in seasonal_combinations:
        try:
            model = SARIMAX(
                train_data['meantemp'],
                exog=exogenous_vars,
                order=params,
                seasonal_order=(*seasonal_params, s),
                enforce_stationarity=False,
                enforce_invertibility=False
            ).fit(disp=False)
            
            if model.aic < best_aic:
                best_model = model
                best_aic = model.aic
        except:
            continue

# Print the best model's summary
print(best_model.summary())

