In [None]:
import pandas as pd
import seaborn as sns
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error
import numpy as np
import itertools
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import make_scorer
import time
from sklearn.preprocessing import LabelEncoder


In [3]:
train = pd.read_csv('train_with_seasonal_cluster.csv')
test = pd.read_csv('test_with_seasonal_cluster.csv')

In [5]:
train.head()

Unnamed: 0,ID,cluster_id,electricity_consumption,temperature_2m_max,temperature_2m_min,sunshine_duration,daylight_duration,wind_speed_10m_max,wind_direction_10m_dominant,month,dayofweek,quarter,day,weekofyear
0,cluster_1_2014-01-01,0,358.032,10.8,4.2,53.003333,29787.533333,40.6,186.0,1,2,1,1,1
1,cluster_2_2014-01-01,1,548.247,12.2,4.3,8195.656667,30650.35,36.0,170.591118,1,2,1,1,1
2,cluster_3_2014-01-01,2,758.303,12.9,-0.8,16305.26,31547.686667,20.9,159.467752,1,2,1,1,1
3,cluster_4_2014-01-01,3,1072.077,10.8,4.7,9224.803333,30769.22,34.7,184.339753,1,2,1,1,1
4,cluster_1_2014-01-02,0,386.908,10.7,7.0,22372.0,29850.226667,34.3,210.333465,1,3,1,2,1


In [4]:
y_train = train['electricity_consumption']

In [6]:
p = d = q = range(0, 3)
P = D = Q = range(0, 2)
s = 4 

In [7]:
pdq = list(itertools.product(p, d, q))
seasonal_pdq = list(itertools.product(P, D, Q, [s]))

In [8]:
best_score = float("inf")
best_order = None
best_seasonal_order = None

In [9]:
for order in pdq:
    for seasonal_order in seasonal_pdq:
        try:
            model = SARIMAX(y_train,
                            order=order,
                            seasonal_order=seasonal_order,
                            enforce_stationarity=False,
                            enforce_invertibility=False)
            results = model.fit(disp=0)
            aic = results.aic
            if aic < best_score:
                best_score = aic
                best_order = order
                best_seasonal_order = seasonal_order
        except:
            continue

print("Best order:", best_order)
print("Best seasonal order:", best_seasonal_order)



Best order: (2, 0, 2)
Best seasonal order: (1, 1, 1, 4)


In [10]:
model = SARIMAX(y_train,
                      order=best_order,
                      seasonal_order=best_seasonal_order,
                      enforce_stationarity=False,
                      enforce_invertibility=False)

results = model.fit()

In [None]:
n_steps = len(test)  # how many steps to predict
forecast = results.forecast(steps=len(test))

In [12]:
submission = pd.DataFrame({
    "ID": test["ID"],
    "electricity_consumption": forecast.values
})
submission.to_csv("submission9.csv", index=False)