In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from datetime import timedelta 

In [None]:
def accu(actual, forecast):
    error = actual - forecast
    mean = np.mean(actual)
    sd = np.std(actual)
    CV = sd / mean
    FBias = np.sum(error) / np.sum(actual)
    MAPE = np.sum(np.abs(error / (actual+1))) / len(actual)
    RMSE = np.sqrt(np.sum(error ** 2) / len(actual))
    MAD = np.sum(np.abs(error)) / len(actual)
    MADP = np.sum(np.abs(error)) / np.sum(np.abs(actual))
    WMAPE = MAD / mean
    
    result_dict = {
        "n": len(actual),
        "mean": mean,
        "sd": sd,
        "CV": CV,
        "FBias": FBias,
        "MAPE": MAPE,
        "RMSE": RMSE,
        "MAD": MAD,
        "MADP": MADP,
        "WMAPE": WMAPE
    }
    
    return pd.DataFrame(result_dict, index=[0])

In [None]:
production_data_loc = "data/production.csv"
weather_data_loc = "data/processed_weather.csv"

In [None]:
production_df = pd.read_csv(production_data_loc)
production_df["date"] = pd.to_datetime(production_df["date"])
production_df = production_df.iloc[4:]
production_df = production_df.drop_duplicates()
production_df.reset_index(drop=True, inplace=True)
production_df["production"] = production_df["production"].apply(lambda x: 10 if x > 10 else x)
date = pd.to_datetime("2024-05-12")
production_df = production_df[production_df["date"] != date]

production_df

In [None]:
weather_df = pd.read_csv(weather_data_loc)
weather_df["date"] = pd.to_datetime(weather_df["date"], format='%Y-%m-%d')
weather_df = weather_df.sort_values(by=['date', 'hour'])
weather_df.reset_index(drop=True, inplace=True) 

columns_to_pivot = weather_df.columns[4:]

for col in columns_to_pivot:
    weather_df[f'{col}_identifier'] = col + "_" + weather_df['lat'].astype(str) + "_" + weather_df['lon'].astype(str)

pivoted_dfs = []
for col in columns_to_pivot:
    pivoted_df = pd.pivot(
        weather_df,
        index=['date', 'hour'],
        columns=f'{col}_identifier',
        values=col
    )
    pivoted_df.columns.name = None 
    pivoted_df.reset_index(inplace=True)  
    pivoted_dfs.append(pivoted_df)

result_df = pivoted_dfs[0]
for df in pivoted_dfs[1:]:
    result_df = result_df.merge(df, on=['date', 'hour'], how='outer')
    
df = result_df.iloc[:, :252]

df

In [None]:
end_date = production_df["date"].iloc[-1]
end_hour = production_df["hour"].iloc[-1]

predict_df = df.copy()

cut_off_index = df[(df["date"] == end_date) & (df["hour"] == end_hour)].index.min()
df = df.loc[:cut_off_index]
df = pd.merge(df, production_df, on=["date", "hour"], how = "inner")
df.fillna(method='ffill', inplace=True)
df

In [None]:
for hour in sorted(df['hour'].unique()):
    mask = df["hour"] == hour
    df_hour = df[mask]
    print(f"Hour {hour} Mean Production = {df_hour.production.mean()}")

# Hour 5

In [None]:
mask = df["hour"] == 5
df_hour = df[mask]
df_hour.reset_index(drop=True, inplace=True)
df_hour_train = df_hour[df_hour["date"] < "2024"]
df_hour_test = df_hour[df_hour["date"] >= "2024"]
df_hour_train.reset_index(drop=True, inplace=True)
df_hour_test.reset_index(drop=True, inplace=True)

df_hour

In [None]:
df_hour_month_train = df_hour_train["date"].dt.month
df_hour_month_train = pd.get_dummies(df_hour_month_train, prefix="month")
df_hour_month_train.drop(columns=["month_12"], inplace=True)
df_hour_month_train

df_model_hour = df_hour_month_train.copy()
df_model_hour["trend"] = np.arange(len(df_model_hour))
df_model_hour = sm.add_constant(df_model_hour)


features_train = df_hour_train.copy()
features_train.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_train.columns[df_hour_train.columns.str.contains(feature)]
    features_train[f"mean_{feature}"] = df_hour_train[feature_col].mean(axis=1)
features_train = features_train.filter(regex=r'^mean_')
features_train

df_model_hour= pd.concat([df_model_hour, features_train], axis=1)

df_model_hour

In [None]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
result = lm.fit()
print(result.summary())

In [None]:
drop_lst_hour_5 = ["trend", "mean_uswrf_surface", "mean_csnow_surface", "mean_tcdc_entire.atmosphere", "mean_tcdc_middle.cloud.layer"]

df_model_hour.drop(columns = drop_lst_hour_5, inplace = True)

df_model_hour["lag_2"] = df_hour_train["production"].shift(2, fill_value=0)
df_model_hour["ma_3"] = df_hour_train["production"].rolling(window=3).mean()
df_model_hour.fillna(method='bfill', inplace=True)

In [None]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
model_hour_5 = lm.fit()
print(model_hour_5.summary())

In [None]:
accu(df_hour_train["production"], model_hour_5.predict())

In [None]:
df_hour_month_test = df_hour_test["date"].dt.month
df_hour_month_test = pd.get_dummies(df_hour_month_test, prefix="month")
for i in range(5,12):
    df_hour_month_test[f"month_{i}"] = 0

df_test = df_hour_month_test.copy()
df_test["trend"] = np.arange(len(df_test))
df_test = sm.add_constant(df_test)

features_test = df_hour_test.copy()
features_test.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_test.columns[df_hour_test.columns.str.contains(feature)]
    features_test[f"mean_{feature}"] = df_hour_test[feature_col].mean(axis=1)
features_test = features_test.filter(regex=r'^mean_')
features_test

df_test.reset_index(drop=True, inplace=True)
features_test.reset_index(drop=True, inplace=True)
df_test = pd.concat([df_test, features_test], axis=1)

df_test.drop(columns = drop_lst_hour_5, inplace = True)

df_test["lag_2"] = df_hour_test["production"].shift(2, fill_value=0)
df_test["ma_3"] = df_hour_test["production"].rolling(window=3).mean()
df_test.fillna(method='bfill', inplace=True)

df_test

In [None]:
accu(df_hour_test["production"], model_hour_5.predict(df_test))

In [None]:
df_hour_month = df_hour["date"].dt.month
df_hour_month = pd.get_dummies(df_hour_month, prefix="month")
for i in range(5,12):
    df_hour_month[f"month_{i}"] = 0

df_predict = df_hour_month.copy()
df_predict["trend"] = np.arange(len(df_predict))
df_predict = sm.add_constant(df_predict)

features = df_hour.copy()
features.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour.columns[df_hour.columns.str.contains(feature)]
    features[f"mean_{feature}"] = df_hour[feature_col].mean(axis=1)
features = features.filter(regex=r'^mean_')

df_predict.reset_index(drop=True, inplace=True)
features.reset_index(drop=True, inplace=True)
df_predict = pd.concat([df_predict, features], axis=1)

df_predict.drop(columns = drop_lst_hour_5, inplace = True)

df_predict["lag_2"] = df_hour["production"].shift(2, fill_value=0)
df_predict["ma_3"] = df_hour["production"].rolling(window=3).mean()
df_predict.fillna(method='bfill', inplace=True)

df_predict

In [None]:
lm = sm.OLS(df_hour["production"], df_predict)
model_hour_5 = lm.fit()
print(model_hour_5.summary())

In [None]:
accu(df_hour["production"], model_hour_5.predict())

# Hour 6

In [None]:
mask = df["hour"] == 6
df_hour = df[mask]
df_hour.reset_index(drop=True, inplace=True)
df_hour_train = df_hour[df_hour["date"] < "2024"]
df_hour_test = df_hour[df_hour["date"] >= "2024"]
df_hour_train.reset_index(drop=True, inplace=True)
df_hour_test.reset_index(drop=True, inplace=True)

df_hour

In [None]:
df_hour_month_train = df_hour_train["date"].dt.month
df_hour_month_train = pd.get_dummies(df_hour_month_train, prefix="month")
df_hour_month_train.drop(columns=["month_12"], inplace=True)
df_hour_month_train

df_model_hour = df_hour_month_train.copy()
df_model_hour["trend"] = np.arange(len(df_model_hour))
df_model_hour = sm.add_constant(df_model_hour)


features_train = df_hour_train.copy()
features_train.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_train.columns[df_hour_train.columns.str.contains(feature)]
    features_train[f"mean_{feature}"] = df_hour_train[feature_col].mean(axis=1)
features_train = features_train.filter(regex=r'^mean_')
features_train

df_model_hour= pd.concat([df_model_hour, features_train], axis=1)

df_model_hour

In [None]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
result = lm.fit()
print(result.summary())

In [None]:
drop_lst_hour_6 = ["mean_dswrf_surface", "mean_tcdc_middle.cloud.layer", "mean_tcdc_high.cloud.layer", "mean_uswrf_surface"]

df_model_hour.drop(columns = drop_lst_hour_6, inplace = True)

df_model_hour["lag_2"] = df_hour_train["production"].shift(2, fill_value=0)
df_model_hour["ma_3"] = df_hour_train["production"].rolling(window=3).mean()
df_model_hour.fillna(method='bfill', inplace=True)

In [None]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
model_hour_6 = lm.fit()
print(model_hour_6.summary())

In [None]:
accu(df_hour_train["production"], model_hour_6.predict())

In [None]:
df_hour_month_test = df_hour_test["date"].dt.month
df_hour_month_test = pd.get_dummies(df_hour_month_test, prefix="month")
for i in range(5,12):
    df_hour_month_test[f"month_{i}"] = 0

df_test = df_hour_month_test.copy()
df_test["trend"] = np.arange(len(df_test))
df_test = sm.add_constant(df_test)

features_test = df_hour_test.copy()
features_test.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_test.columns[df_hour_test.columns.str.contains(feature)]
    features_test[f"mean_{feature}"] = df_hour_test[feature_col].mean(axis=1)
features_test = features_test.filter(regex=r'^mean_')
features_test

df_test.reset_index(drop=True, inplace=True)
features_test.reset_index(drop=True, inplace=True)
df_test = pd.concat([df_test, features_test], axis=1)

df_test.drop(columns = drop_lst_hour_6, inplace = True)

df_test["lag_2"] = df_hour_test["production"].shift(2, fill_value=0)
df_test["ma_3"] = df_hour_test["production"].rolling(window=3).mean()
df_test.fillna(method='bfill', inplace=True)

df_test

In [None]:
accu(df_hour_test["production"], model_hour_6.predict(df_test))

In [None]:
df_hour_month = df_hour["date"].dt.month
df_hour_month = pd.get_dummies(df_hour_month, prefix="month")
for i in range(5,12):
    df_hour_month[f"month_{i}"] = 0

df_predict = df_hour_month.copy()
df_predict["trend"] = np.arange(len(df_predict))
df_predict = sm.add_constant(df_predict)

features = df_hour.copy()
features.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour.columns[df_hour.columns.str.contains(feature)]
    features[f"mean_{feature}"] = df_hour[feature_col].mean(axis=1)
features = features.filter(regex=r'^mean_')

df_predict.reset_index(drop=True, inplace=True)
features.reset_index(drop=True, inplace=True)
df_predict = pd.concat([df_predict, features], axis=1)

df_predict.drop(columns = drop_lst_hour_6, inplace = True)

df_predict["lag_2"] = df_hour["production"].shift(2, fill_value=0)
df_predict["ma_3"] = df_hour["production"].rolling(window=3).mean()
df_predict.fillna(method='bfill', inplace=True)

df_predict

In [None]:
lm = sm.OLS(df_hour["production"], df_predict)
model_hour_6 = lm.fit()
print(model_hour_6.summary())

In [None]:
accu(df_hour["production"], model_hour_6.predict())

# Hour 7

In [None]:
mask = df["hour"] == 7
df_hour = df[mask]
df_hour.reset_index(drop=True, inplace=True)
df_hour_train = df_hour[df_hour["date"] < "2024"]
df_hour_test = df_hour[df_hour["date"] >= "2024"]
df_hour_train.reset_index(drop=True, inplace=True)
df_hour_test.reset_index(drop=True, inplace=True)

df_hour

In [None]:
df_hour_month_train = df_hour_train["date"].dt.month
df_hour_month_train = pd.get_dummies(df_hour_month_train, prefix="month")
df_hour_month_train.drop(columns=["month_12"], inplace=True)
df_hour_month_train

df_model_hour = df_hour_month_train.copy()
df_model_hour["trend"] = np.arange(len(df_model_hour))
df_model_hour = sm.add_constant(df_model_hour)


features_train = df_hour_train.copy()
features_train.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_train.columns[df_hour_train.columns.str.contains(feature)]
    features_train[f"mean_{feature}"] = df_hour_train[feature_col].mean(axis=1)
features_train = features_train.filter(regex=r'^mean_')
features_train

df_model_hour= pd.concat([df_model_hour, features_train], axis=1)

df_model_hour

In [None]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
result = lm.fit()
print(result.summary())

In [None]:
drop_lst_hour_7 = ["mean_dswrf_surface", "mean_tcdc_high.cloud.layer", "mean_uswrf_top_of_atmosphere", "mean_tmp_surface"]

df_model_hour.drop(columns = drop_lst_hour_7, inplace = True)

df_model_hour["lag_2"] = df_hour_train["production"].shift(2, fill_value=0)
df_model_hour["ma_3"] = df_hour_train["production"].rolling(window=3).mean()
df_model_hour.fillna(method='bfill', inplace=True)

In [None]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
model_hour_7 = lm.fit()
print(model_hour_7.summary())

In [None]:
accu(df_hour_train["production"], model_hour_7.predict())

In [None]:
df_hour_month_test = df_hour_test["date"].dt.month
df_hour_month_test = pd.get_dummies(df_hour_month_test, prefix="month")
for i in range(5,12):
    df_hour_month_test[f"month_{i}"] = 0

df_test = df_hour_month_test.copy()
df_test["trend"] = np.arange(len(df_test))
df_test = sm.add_constant(df_test)

features_test = df_hour_test.copy()
features_test.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_test.columns[df_hour_test.columns.str.contains(feature)]
    features_test[f"mean_{feature}"] = df_hour_test[feature_col].mean(axis=1)
features_test = features_test.filter(regex=r'^mean_')
features_test

df_test.reset_index(drop=True, inplace=True)
features_test.reset_index(drop=True, inplace=True)
df_test = pd.concat([df_test, features_test], axis=1)

df_test.drop(columns = drop_lst_hour_7, inplace = True)

df_test["lag_2"] = df_hour_test["production"].shift(2, fill_value=0)
df_test["ma_3"] = df_hour_test["production"].rolling(window=3).mean()
df_test.fillna(method='bfill', inplace=True)

df_test

In [None]:
accu(df_hour_test["production"], model_hour_7.predict(df_test))

In [None]:
df_hour_month = df_hour["date"].dt.month
df_hour_month = pd.get_dummies(df_hour_month, prefix="month")
for i in range(5,12):
    df_hour_month[f"month_{i}"] = 0

df_predict = df_hour_month.copy()
df_predict["trend"] = np.arange(len(df_predict))
df_predict = sm.add_constant(df_predict)

features = df_hour.copy()
features.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour.columns[df_hour.columns.str.contains(feature)]
    features[f"mean_{feature}"] = df_hour[feature_col].mean(axis=1)
features = features.filter(regex=r'^mean_')

df_predict.reset_index(drop=True, inplace=True)
features.reset_index(drop=True, inplace=True)
df_predict = pd.concat([df_predict, features], axis=1)

df_predict.drop(columns = drop_lst_hour_7, inplace = True)

df_predict["lag_2"] = df_hour["production"].shift(2, fill_value=0)
df_predict["ma_3"] = df_hour["production"].rolling(window=3).mean()
df_predict.fillna(method='bfill', inplace=True)

df_predict

In [None]:
lm = sm.OLS(df_hour["production"], df_predict)
model_hour_7 = lm.fit()
print(model_hour_7.summary())

In [None]:
accu(df_hour["production"], model_hour_7.predict())

# Hour 8

In [None]:
mask = df["hour"] == 8
df_hour = df[mask]
df_hour.reset_index(drop=True, inplace=True)
df_hour_train = df_hour[df_hour["date"] < "2024"]
df_hour_test = df_hour[df_hour["date"] >= "2024"]
df_hour_train.reset_index(drop=True, inplace=True)
df_hour_test.reset_index(drop=True, inplace=True)

df_hour

In [None]:
df_hour_month_train = df_hour_train["date"].dt.month
df_hour_month_train = pd.get_dummies(df_hour_month_train, prefix="month")
df_hour_month_train.drop(columns=["month_12"], inplace=True)
df_hour_month_train

df_model_hour = df_hour_month_train.copy()
df_model_hour["trend"] = np.arange(len(df_model_hour))
df_model_hour = sm.add_constant(df_model_hour)


features_train = df_hour_train.copy()
features_train.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_train.columns[df_hour_train.columns.str.contains(feature)]
    features_train[f"mean_{feature}"] = df_hour_train[feature_col].mean(axis=1)
features_train = features_train.filter(regex=r'^mean_')
features_train

df_model_hour= pd.concat([df_model_hour, features_train], axis=1)

df_model_hour

In [None]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
result = lm.fit()
print(result.summary())

In [None]:
drop_lst_hour_8 = ["mean_dlwrf_surface"]

df_model_hour.drop(columns = drop_lst_hour_8, inplace = True)

df_model_hour["lag_2"] = df_hour_train["production"].shift(2, fill_value=0)
df_model_hour["ma_3"] = df_hour_train["production"].rolling(window=3).mean()
df_model_hour.fillna(method='bfill', inplace=True)

In [None]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
model_hour_8 = lm.fit()
print(model_hour_8.summary())

In [None]:
accu(df_hour_train["production"], model_hour_8.predict())

In [None]:
df_hour_month_test = df_hour_test["date"].dt.month
df_hour_month_test = pd.get_dummies(df_hour_month_test, prefix="month")
for i in range(5,12):
    df_hour_month_test[f"month_{i}"] = 0

df_test = df_hour_month_test.copy()
df_test["trend"] = np.arange(len(df_test))
df_test = sm.add_constant(df_test)

features_test = df_hour_test.copy()
features_test.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_test.columns[df_hour_test.columns.str.contains(feature)]
    features_test[f"mean_{feature}"] = df_hour_test[feature_col].mean(axis=1)
features_test = features_test.filter(regex=r'^mean_')
features_test

df_test.reset_index(drop=True, inplace=True)
features_test.reset_index(drop=True, inplace=True)
df_test = pd.concat([df_test, features_test], axis=1)

df_test.drop(columns = drop_lst_hour_8, inplace = True)

df_test["lag_2"] = df_hour_test["production"].shift(2, fill_value=0)
df_test["ma_3"] = df_hour_test["production"].rolling(window=3).mean()
df_test.fillna(method='bfill', inplace=True)

df_test

In [None]:
accu(df_hour_test["production"], model_hour_8.predict(df_test))

In [None]:
df_hour_month = df_hour["date"].dt.month
df_hour_month = pd.get_dummies(df_hour_month, prefix="month")
for i in range(5,12):
    df_hour_month[f"month_{i}"] = 0

df_predict = df_hour_month.copy()
df_predict["trend"] = np.arange(len(df_predict))
df_predict = sm.add_constant(df_predict)

features = df_hour.copy()
features.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour.columns[df_hour.columns.str.contains(feature)]
    features[f"mean_{feature}"] = df_hour[feature_col].mean(axis=1)
features = features.filter(regex=r'^mean_')

df_predict.reset_index(drop=True, inplace=True)
features.reset_index(drop=True, inplace=True)
df_predict = pd.concat([df_predict, features], axis=1)

df_predict.drop(columns = drop_lst_hour_8, inplace = True)

df_predict["lag_2"] = df_hour["production"].shift(2, fill_value=0)
df_predict["ma_3"] = df_hour["production"].rolling(window=3).mean()
df_predict.fillna(method='bfill', inplace=True)

df_predict

In [None]:
lm = sm.OLS(df_hour["production"], df_predict)
model_hour_8 = lm.fit()
print(model_hour_8.summary())

In [None]:
accu(df_hour["production"], model_hour_8.predict())

# Hour 9

In [None]:
mask = df["hour"] == 9
df_hour = df[mask]
df_hour.reset_index(drop=True, inplace=True)
df_hour_train = df_hour[df_hour["date"] < "2024"]
df_hour_test = df_hour[df_hour["date"] >= "2024"]
df_hour_train.reset_index(drop=True, inplace=True)
df_hour_test.reset_index(drop=True, inplace=True)

df_hour

In [None]:
df_hour_month_train = df_hour_train["date"].dt.month
df_hour_month_train = pd.get_dummies(df_hour_month_train, prefix="month")
df_hour_month_train.drop(columns=["month_12"], inplace=True)
df_hour_month_train

df_model_hour = df_hour_month_train.copy()
df_model_hour["trend"] = np.arange(len(df_model_hour))
df_model_hour = sm.add_constant(df_model_hour)


features_train = df_hour_train.copy()
features_train.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_train.columns[df_hour_train.columns.str.contains(feature)]
    features_train[f"mean_{feature}"] = df_hour_train[feature_col].mean(axis=1)
features_train = features_train.filter(regex=r'^mean_')
features_train

df_model_hour= pd.concat([df_model_hour, features_train], axis=1)

df_model_hour

In [None]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
result = lm.fit()
print(result.summary())

In [None]:
drop_lst_hour_9 = ["trend", "mean_uswrf_surface", "mean_csnow_surface", "mean_tcdc_entire.atmosphere", "mean_tcdc_middle.cloud.layer"]

df_model_hour.drop(columns = drop_lst_hour_9, inplace = True)

df_model_hour["lag_2"] = df_hour_train["production"].shift(2, fill_value=0)
df_model_hour["ma_3"] = df_hour_train["production"].rolling(window=3).mean()
df_model_hour.fillna(method='bfill', inplace=True)

In [None]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
model_hour_9 = lm.fit()
print(model_hour_9.summary())

In [None]:
accu(df_hour_train["production"], model_hour_9.predict())

In [None]:
df_hour_month_test = df_hour_test["date"].dt.month
df_hour_month_test = pd.get_dummies(df_hour_month_test, prefix="month")
for i in range(5,12):
    df_hour_month_test[f"month_{i}"] = 0

df_test = df_hour_month_test.copy()
df_test["trend"] = np.arange(len(df_test))
df_test = sm.add_constant(df_test)

features_test = df_hour_test.copy()
features_test.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_test.columns[df_hour_test.columns.str.contains(feature)]
    features_test[f"mean_{feature}"] = df_hour_test[feature_col].mean(axis=1)
features_test = features_test.filter(regex=r'^mean_')
features_test

df_test.reset_index(drop=True, inplace=True)
features_test.reset_index(drop=True, inplace=True)
df_test = pd.concat([df_test, features_test], axis=1)

df_test.drop(columns = drop_lst_hour_9, inplace = True)

df_test["lag_2"] = df_hour_test["production"].shift(2, fill_value=0)
df_test["ma_3"] = df_hour_test["production"].rolling(window=3).mean()
df_test.fillna(method='bfill', inplace=True)

df_test

In [None]:
accu(df_hour_test["production"], model_hour_9.predict(df_test))

In [None]:
df_hour_month = df_hour["date"].dt.month
df_hour_month = pd.get_dummies(df_hour_month, prefix="month")
for i in range(5,12):
    df_hour_month[f"month_{i}"] = 0

df_predict = df_hour_month.copy()
df_predict["trend"] = np.arange(len(df_predict))
df_predict = sm.add_constant(df_predict)

features = df_hour.copy()
features.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour.columns[df_hour.columns.str.contains(feature)]
    features[f"mean_{feature}"] = df_hour[feature_col].mean(axis=1)
features = features.filter(regex=r'^mean_')

df_predict.reset_index(drop=True, inplace=True)
features.reset_index(drop=True, inplace=True)
df_predict = pd.concat([df_predict, features], axis=1)

df_predict.drop(columns = drop_lst_hour_9, inplace = True)

df_predict["lag_2"] = df_hour["production"].shift(2, fill_value=0)
df_predict["ma_3"] = df_hour["production"].rolling(window=3).mean()
df_predict.fillna(method='bfill', inplace=True)

df_predict

In [None]:
lm = sm.OLS(df_hour["production"], df_predict)
model_hour_9 = lm.fit()
print(model_hour_9.summary())

In [None]:
accu(df_hour["production"], model_hour_9.predict())

# Hour 10

In [None]:
mask = df["hour"] == 10
df_hour = df[mask]
df_hour.reset_index(drop=True, inplace=True)
df_hour_train = df_hour[df_hour["date"] < "2024"]
df_hour_test = df_hour[df_hour["date"] >= "2024"]
df_hour_train.reset_index(drop=True, inplace=True)
df_hour_test.reset_index(drop=True, inplace=True)

df_hour

In [None]:
df_hour_month_train = df_hour_train["date"].dt.month
df_hour_month_train = pd.get_dummies(df_hour_month_train, prefix="month")
df_hour_month_train.drop(columns=["month_12"], inplace=True)
df_hour_month_train

df_model_hour = df_hour_month_train.copy()
df_model_hour["trend"] = np.arange(len(df_model_hour))
df_model_hour = sm.add_constant(df_model_hour)


features_train = df_hour_train.copy()
features_train.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_train.columns[df_hour_train.columns.str.contains(feature)]
    features_train[f"mean_{feature}"] = df_hour_train[feature_col].mean(axis=1)
features_train = features_train.filter(regex=r'^mean_')
features_train

df_model_hour= pd.concat([df_model_hour, features_train], axis=1)

df_model_hour

In [None]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
result = lm.fit()
print(result.summary())

In [None]:
drop_lst_hour_10 = ["mean_uswrf_top_of_atmosphere", "mean_tcdc_high.cloud.layer"]

df_model_hour.drop(columns = drop_lst_hour_10, inplace = True)

df_model_hour["lag_2"] = df_hour_train["production"].shift(2, fill_value=0)
df_model_hour["ma_3"] = df_hour_train["production"].rolling(window=3).mean()
df_model_hour.fillna(method='bfill', inplace=True)

In [None]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
model_hour_10 = lm.fit()
print(model_hour_10.summary())

In [None]:
accu(df_hour_train["production"], model_hour_10.predict())

In [None]:
df_hour_month_test = df_hour_test["date"].dt.month
df_hour_month_test = pd.get_dummies(df_hour_month_test, prefix="month")
for i in range(5,12):
    df_hour_month_test[f"month_{i}"] = 0

df_test = df_hour_month_test.copy()
df_test["trend"] = np.arange(len(df_test))
df_test = sm.add_constant(df_test)

features_test = df_hour_test.copy()
features_test.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_test.columns[df_hour_test.columns.str.contains(feature)]
    features_test[f"mean_{feature}"] = df_hour_test[feature_col].mean(axis=1)
features_test = features_test.filter(regex=r'^mean_')
features_test

df_test.reset_index(drop=True, inplace=True)
features_test.reset_index(drop=True, inplace=True)
df_test = pd.concat([df_test, features_test], axis=1)

df_test.drop(columns = drop_lst_hour_10, inplace = True)

df_test["lag_2"] = df_hour_test["production"].shift(2, fill_value=0)
df_test["ma_3"] = df_hour_test["production"].rolling(window=3).mean()
df_test.fillna(method='bfill', inplace=True)

df_test

In [None]:
accu(df_hour_test["production"], model_hour_10.predict(df_test))

In [None]:
df_hour_month = df_hour["date"].dt.month
df_hour_month = pd.get_dummies(df_hour_month, prefix="month")
for i in range(5,12):
    df_hour_month[f"month_{i}"] = 0

df_predict = df_hour_month.copy()
df_predict["trend"] = np.arange(len(df_predict))
df_predict = sm.add_constant(df_predict)

features = df_hour.copy()
features.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour.columns[df_hour.columns.str.contains(feature)]
    features[f"mean_{feature}"] = df_hour[feature_col].mean(axis=1)
features = features.filter(regex=r'^mean_')

df_predict.reset_index(drop=True, inplace=True)
features.reset_index(drop=True, inplace=True)
df_predict = pd.concat([df_predict, features], axis=1)

df_predict.drop(columns = drop_lst_hour_10, inplace = True)

df_predict["lag_2"] = df_hour["production"].shift(2, fill_value=0)
df_predict["ma_3"] = df_hour["production"].rolling(window=3).mean()
df_predict.fillna(method='bfill', inplace=True)

df_predict

In [None]:
lm = sm.OLS(df_hour["production"], df_predict)
model_hour_10 = lm.fit()
print(model_hour_10.summary())

In [None]:
accu(df_hour["production"], model_hour_10.predict())

# Hour 11

In [None]:
mask = df["hour"] == 11
df_hour = df[mask]
df_hour.reset_index(drop=True, inplace=True)
df_hour_train = df_hour[df_hour["date"] < "2024"]
df_hour_test = df_hour[df_hour["date"] >= "2024"]
df_hour_train.reset_index(drop=True, inplace=True)
df_hour_test.reset_index(drop=True, inplace=True)

df_hour

In [None]:
df_hour_month_train = df_hour_train["date"].dt.month
df_hour_month_train = pd.get_dummies(df_hour_month_train, prefix="month")
df_hour_month_train.drop(columns=["month_12"], inplace=True)
df_hour_month_train

df_model_hour = df_hour_month_train.copy()
df_model_hour["trend"] = np.arange(len(df_model_hour))
df_model_hour = sm.add_constant(df_model_hour)


features_train = df_hour_train.copy()
features_train.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_train.columns[df_hour_train.columns.str.contains(feature)]
    features_train[f"mean_{feature}"] = df_hour_train[feature_col].mean(axis=1)
features_train = features_train.filter(regex=r'^mean_')
features_train

df_model_hour= pd.concat([df_model_hour, features_train], axis=1)

df_model_hour

In [None]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
result = lm.fit()
print(result.summary())

In [None]:
drop_lst_hour_11 = ["mean_uswrf_top_of_atmosphere", "mean_tcdc_high.cloud.layer", "trend"]

df_model_hour.drop(columns = drop_lst_hour_11, inplace = True)

df_model_hour["lag_2"] = df_hour_train["production"].shift(2, fill_value=0)
df_model_hour["ma_3"] = df_hour_train["production"].rolling(window=3).mean()
df_model_hour.fillna(method='bfill', inplace=True)

In [None]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
model_hour_11 = lm.fit()
print(model_hour_11.summary())

In [None]:
accu(df_hour_train["production"], model_hour_11.predict())

In [None]:
df_hour_month_test = df_hour_test["date"].dt.month
df_hour_month_test = pd.get_dummies(df_hour_month_test, prefix="month")
for i in range(5,12):
    df_hour_month_test[f"month_{i}"] = 0

df_test = df_hour_month_test.copy()
df_test["trend"] = np.arange(len(df_test))
df_test = sm.add_constant(df_test)

features_test = df_hour_test.copy()
features_test.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_test.columns[df_hour_test.columns.str.contains(feature)]
    features_test[f"mean_{feature}"] = df_hour_test[feature_col].mean(axis=1)
features_test = features_test.filter(regex=r'^mean_')
features_test

df_test.reset_index(drop=True, inplace=True)
features_test.reset_index(drop=True, inplace=True)
df_test = pd.concat([df_test, features_test], axis=1)

df_test.drop(columns = drop_lst_hour_11, inplace = True)

df_test["lag_2"] = df_hour_test["production"].shift(2, fill_value=0)
df_test["ma_3"] = df_hour_test["production"].rolling(window=3).mean()
df_test.fillna(method='bfill', inplace=True)

df_test

In [None]:
accu(df_hour_test["production"], model_hour_11.predict(df_test))

In [None]:
df_hour_month = df_hour["date"].dt.month
df_hour_month = pd.get_dummies(df_hour_month, prefix="month")
for i in range(5,12):
    df_hour_month[f"month_{i}"] = 0

df_predict = df_hour_month.copy()
df_predict["trend"] = np.arange(len(df_predict))
df_predict = sm.add_constant(df_predict)

features = df_hour.copy()
features.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour.columns[df_hour.columns.str.contains(feature)]
    features[f"mean_{feature}"] = df_hour[feature_col].mean(axis=1)
features = features.filter(regex=r'^mean_')

df_predict.reset_index(drop=True, inplace=True)
features.reset_index(drop=True, inplace=True)
df_predict = pd.concat([df_predict, features], axis=1)

df_predict.drop(columns = drop_lst_hour_11, inplace = True)

df_predict["lag_2"] = df_hour["production"].shift(2, fill_value=0)
df_predict["ma_3"] = df_hour["production"].rolling(window=3).mean()
df_predict.fillna(method='bfill', inplace=True)

df_predict

In [None]:
lm = sm.OLS(df_hour["production"], df_predict)
model_hour_11 = lm.fit()
print(model_hour_11.summary())

In [None]:
accu(df_hour["production"], model_hour_11.predict())

# Hour 12

In [None]:
mask = df["hour"] == 12
df_hour = df[mask]
df_hour.reset_index(drop=True, inplace=True)
df_hour_train = df_hour[df_hour["date"] < "2024"]
df_hour_test = df_hour[df_hour["date"] >= "2024"]
df_hour_train.reset_index(drop=True, inplace=True)
df_hour_test.reset_index(drop=True, inplace=True)

df_hour

In [None]:
df_hour_month_train = df_hour_train["date"].dt.month
df_hour_month_train = pd.get_dummies(df_hour_month_train, prefix="month")
df_hour_month_train.drop(columns=["month_12"], inplace=True)
df_hour_month_train

df_model_hour = df_hour_month_train.copy()
df_model_hour["trend"] = np.arange(len(df_model_hour))
df_model_hour = sm.add_constant(df_model_hour)


features_train = df_hour_train.copy()
features_train.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_train.columns[df_hour_train.columns.str.contains(feature)]
    features_train[f"mean_{feature}"] = df_hour_train[feature_col].mean(axis=1)
features_train = features_train.filter(regex=r'^mean_')
features_train

df_model_hour= pd.concat([df_model_hour, features_train], axis=1)

df_model_hour

In [None]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
result = lm.fit()
print(result.summary())

In [None]:
drop_lst_hour_12 = ["mean_uswrf_top_of_atmosphere", "mean_tcdc_high.cloud.layer", "trend"]

df_model_hour.drop(columns = drop_lst_hour_12, inplace = True)

df_model_hour["lag_2"] = df_hour_train["production"].shift(2, fill_value=0)
df_model_hour["ma_3"] = df_hour_train["production"].rolling(window=3).mean()
df_model_hour.fillna(method='bfill', inplace=True)

In [None]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
model_hour_12 = lm.fit()
print(model_hour_12.summary())

In [None]:
accu(df_hour_train["production"], model_hour_12.predict())

In [None]:
df_hour_month_test = df_hour_test["date"].dt.month
df_hour_month_test = pd.get_dummies(df_hour_month_test, prefix="month")
for i in range(5,12):
    df_hour_month_test[f"month_{i}"] = 0

df_test = df_hour_month_test.copy()
df_test["trend"] = np.arange(len(df_test))
df_test = sm.add_constant(df_test)

features_test = df_hour_test.copy()
features_test.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_test.columns[df_hour_test.columns.str.contains(feature)]
    features_test[f"mean_{feature}"] = df_hour_test[feature_col].mean(axis=1)
features_test = features_test.filter(regex=r'^mean_')
features_test

df_test.reset_index(drop=True, inplace=True)
features_test.reset_index(drop=True, inplace=True)
df_test = pd.concat([df_test, features_test], axis=1)

df_test.drop(columns = drop_lst_hour_12, inplace = True)

df_test["lag_2"] = df_hour_test["production"].shift(2, fill_value=0)
df_test["ma_3"] = df_hour_test["production"].rolling(window=3).mean()
df_test.fillna(method='bfill', inplace=True)

df_test

In [None]:
accu(df_hour_test["production"], model_hour_12.predict(df_test))

In [None]:
df_hour_month = df_hour["date"].dt.month
df_hour_month = pd.get_dummies(df_hour_month, prefix="month")
for i in range(5,12):
    df_hour_month[f"month_{i}"] = 0

df_predict = df_hour_month.copy()
df_predict["trend"] = np.arange(len(df_predict))
df_predict = sm.add_constant(df_predict)

features = df_hour.copy()
features.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour.columns[df_hour.columns.str.contains(feature)]
    features[f"mean_{feature}"] = df_hour[feature_col].mean(axis=1)
features = features.filter(regex=r'^mean_')

df_predict.reset_index(drop=True, inplace=True)
features.reset_index(drop=True, inplace=True)
df_predict = pd.concat([df_predict, features], axis=1)

df_predict.drop(columns = drop_lst_hour_12, inplace = True)

df_predict["lag_2"] = df_hour["production"].shift(2, fill_value=0)
df_predict["ma_3"] = df_hour["production"].rolling(window=3).mean()
df_predict.fillna(method='bfill', inplace=True)

df_predict

In [None]:
lm = sm.OLS(df_hour["production"], df_predict)
model_hour_12 = lm.fit()
print(model_hour_12.summary())

In [None]:
accu(df_hour["production"], model_hour_12.predict())

# Hour 13

In [None]:
mask = df["hour"] == 13
df_hour = df[mask]
df_hour.reset_index(drop=True, inplace=True)
df_hour_train = df_hour[df_hour["date"] < "2024"]
df_hour_test = df_hour[df_hour["date"] >= "2024"]
df_hour_train.reset_index(drop=True, inplace=True)
df_hour_test.reset_index(drop=True, inplace=True)

df_hour

In [None]:
df_hour_month_train = df_hour_train["date"].dt.month
df_hour_month_train = pd.get_dummies(df_hour_month_train, prefix="month")
df_hour_month_train.drop(columns=["month_12"], inplace=True)
df_hour_month_train

df_model_hour = df_hour_month_train.copy()
df_model_hour["trend"] = np.arange(len(df_model_hour))
df_model_hour = sm.add_constant(df_model_hour)


features_train = df_hour_train.copy()
features_train.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_train.columns[df_hour_train.columns.str.contains(feature)]
    features_train[f"mean_{feature}"] = df_hour_train[feature_col].mean(axis=1)
features_train = features_train.filter(regex=r'^mean_')
features_train

df_model_hour= pd.concat([df_model_hour, features_train], axis=1)

df_model_hour

In [None]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
result = lm.fit()
print(result.summary())

In [None]:
drop_lst_hour_13 = ["mean_csnow_surface", "mean_tcdc_high.cloud.layer", "trend"]

df_model_hour.drop(columns = drop_lst_hour_13, inplace = True)

df_model_hour["lag_2"] = df_hour_train["production"].shift(2, fill_value=0)
df_model_hour["ma_3"] = df_hour_train["production"].rolling(window=3).mean()
df_model_hour.fillna(method='bfill', inplace=True)

In [None]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
model_hour_13 = lm.fit()
print(model_hour_13.summary())

In [None]:
accu(df_hour_train["production"], model_hour_13.predict())

In [None]:
df_hour_month_test = df_hour_test["date"].dt.month
df_hour_month_test = pd.get_dummies(df_hour_month_test, prefix="month")
for i in range(5,12):
    df_hour_month_test[f"month_{i}"] = 0

df_test = df_hour_month_test.copy()
df_test["trend"] = np.arange(len(df_test))
df_test = sm.add_constant(df_test)

features_test = df_hour_test.copy()
features_test.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_test.columns[df_hour_test.columns.str.contains(feature)]
    features_test[f"mean_{feature}"] = df_hour_test[feature_col].mean(axis=1)
features_test = features_test.filter(regex=r'^mean_')
features_test

df_test.reset_index(drop=True, inplace=True)
features_test.reset_index(drop=True, inplace=True)
df_test = pd.concat([df_test, features_test], axis=1)

df_test.drop(columns = drop_lst_hour_13, inplace = True)

df_test["lag_2"] = df_hour_test["production"].shift(2, fill_value=0)
df_test["ma_3"] = df_hour_test["production"].rolling(window=3).mean()
df_test.fillna(method='bfill', inplace=True)

df_test

In [None]:
accu(df_hour_test["production"], model_hour_13.predict(df_test))

In [None]:
df_hour_month = df_hour["date"].dt.month
df_hour_month = pd.get_dummies(df_hour_month, prefix="month")
for i in range(5,12):
    df_hour_month[f"month_{i}"] = 0

df_predict = df_hour_month.copy()
df_predict["trend"] = np.arange(len(df_predict))
df_predict = sm.add_constant(df_predict)

features = df_hour.copy()
features.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour.columns[df_hour.columns.str.contains(feature)]
    features[f"mean_{feature}"] = df_hour[feature_col].mean(axis=1)
features = features.filter(regex=r'^mean_')

df_predict.reset_index(drop=True, inplace=True)
features.reset_index(drop=True, inplace=True)
df_predict = pd.concat([df_predict, features], axis=1)

df_predict.drop(columns = drop_lst_hour_13, inplace = True)

df_predict["lag_2"] = df_hour["production"].shift(2, fill_value=0)
df_predict["ma_3"] = df_hour["production"].rolling(window=3).mean()
df_predict.fillna(method='bfill', inplace=True)

df_predict

In [None]:
lm = sm.OLS(df_hour["production"], df_predict)
model_hour_13 = lm.fit()
print(model_hour_13.summary())

In [None]:
accu(df_hour["production"], model_hour_13.predict())

# Hour 14

In [None]:
mask = df["hour"] == 14
df_hour = df[mask]
df_hour.reset_index(drop=True, inplace=True)
df_hour_train = df_hour[df_hour["date"] < "2024"]
df_hour_test = df_hour[df_hour["date"] >= "2024"]
df_hour_train.reset_index(drop=True, inplace=True)
df_hour_test.reset_index(drop=True, inplace=True)

df_hour

In [None]:
df_hour_month_train = df_hour_train["date"].dt.month
df_hour_month_train = pd.get_dummies(df_hour_month_train, prefix="month")
df_hour_month_train.drop(columns=["month_12"], inplace=True)
df_hour_month_train

df_model_hour = df_hour_month_train.copy()
df_model_hour["trend"] = np.arange(len(df_model_hour))
df_model_hour = sm.add_constant(df_model_hour)


features_train = df_hour_train.copy()
features_train.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_train.columns[df_hour_train.columns.str.contains(feature)]
    features_train[f"mean_{feature}"] = df_hour_train[feature_col].mean(axis=1)
features_train = features_train.filter(regex=r'^mean_')
features_train

df_model_hour= pd.concat([df_model_hour, features_train], axis=1)

df_model_hour

In [None]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
result = lm.fit()
print(result.summary())

In [None]:
drop_lst_hour_14 = ["mean_csnow_surface", "mean_tcdc_low.cloud.layer", "mean_dswrf_surface"]

df_model_hour.drop(columns = drop_lst_hour_14, inplace = True)

df_model_hour["lag_2"] = df_hour_train["production"].shift(2, fill_value=0)
df_model_hour["ma_3"] = df_hour_train["production"].rolling(window=3).mean()
df_model_hour.fillna(method='bfill', inplace=True)

In [None]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
model_hour_14 = lm.fit()
print(model_hour_14.summary())

In [None]:
accu(df_hour_train["production"], model_hour_14.predict())

In [None]:
df_hour_month_test = df_hour_test["date"].dt.month
df_hour_month_test = pd.get_dummies(df_hour_month_test, prefix="month")
for i in range(5,12):
    df_hour_month_test[f"month_{i}"] = 0

df_test = df_hour_month_test.copy()
df_test["trend"] = np.arange(len(df_test))
df_test = sm.add_constant(df_test)

features_test = df_hour_test.copy()
features_test.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_test.columns[df_hour_test.columns.str.contains(feature)]
    features_test[f"mean_{feature}"] = df_hour_test[feature_col].mean(axis=1)
features_test = features_test.filter(regex=r'^mean_')
features_test

df_test.reset_index(drop=True, inplace=True)
features_test.reset_index(drop=True, inplace=True)
df_test = pd.concat([df_test, features_test], axis=1)

df_test.drop(columns = drop_lst_hour_14, inplace = True)

df_test["lag_2"] = df_hour_test["production"].shift(2, fill_value=0)
df_test["ma_3"] = df_hour_test["production"].rolling(window=3).mean()
df_test.fillna(method='bfill', inplace=True)

df_test

In [None]:
accu(df_hour_test["production"], model_hour_14.predict(df_test))

In [None]:
df_hour_month = df_hour["date"].dt.month
df_hour_month = pd.get_dummies(df_hour_month, prefix="month")
for i in range(5,12):
    df_hour_month[f"month_{i}"] = 0

df_predict = df_hour_month.copy()
df_predict["trend"] = np.arange(len(df_predict))
df_predict = sm.add_constant(df_predict)

features = df_hour.copy()
features.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour.columns[df_hour.columns.str.contains(feature)]
    features[f"mean_{feature}"] = df_hour[feature_col].mean(axis=1)
features = features.filter(regex=r'^mean_')

df_predict.reset_index(drop=True, inplace=True)
features.reset_index(drop=True, inplace=True)
df_predict = pd.concat([df_predict, features], axis=1)

df_predict.drop(columns = drop_lst_hour_14, inplace = True)

df_predict["lag_2"] = df_hour["production"].shift(2, fill_value=0)
df_predict["ma_3"] = df_hour["production"].rolling(window=3).mean()
df_predict.fillna(method='bfill', inplace=True)

df_predict

In [None]:
lm = sm.OLS(df_hour["production"], df_predict)
model_hour_14 = lm.fit()
print(model_hour_14.summary())

In [None]:
accu(df_hour["production"], model_hour_14.predict())

# Hour 15

In [None]:
mask = df["hour"] == 15
df_hour = df[mask]
df_hour.reset_index(drop=True, inplace=True)
df_hour_train = df_hour[df_hour["date"] < "2024"]
df_hour_test = df_hour[df_hour["date"] >= "2024"]
df_hour_train.reset_index(drop=True, inplace=True)
df_hour_test.reset_index(drop=True, inplace=True)

df_hour

In [None]:
df_hour_month_train = df_hour_train["date"].dt.month
df_hour_month_train = pd.get_dummies(df_hour_month_train, prefix="month")
df_hour_month_train.drop(columns=["month_12"], inplace=True)
df_hour_month_train

df_model_hour = df_hour_month_train.copy()
df_model_hour["trend"] = np.arange(len(df_model_hour))
df_model_hour = sm.add_constant(df_model_hour)


features_train = df_hour_train.copy()
features_train.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_train.columns[df_hour_train.columns.str.contains(feature)]
    features_train[f"mean_{feature}"] = df_hour_train[feature_col].mean(axis=1)
features_train = features_train.filter(regex=r'^mean_')
features_train

df_model_hour= pd.concat([df_model_hour, features_train], axis=1)

df_model_hour

In [None]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
result = lm.fit()
print(result.summary())

In [None]:
drop_lst_hour_15 = ["mean_csnow_surface", "mean_tcdc_high.cloud.layer"]

df_model_hour.drop(columns = drop_lst_hour_15, inplace = True)

df_model_hour["lag_2"] = df_hour_train["production"].shift(2, fill_value=0)
df_model_hour["ma_3"] = df_hour_train["production"].rolling(window=3).mean()
df_model_hour.fillna(method='bfill', inplace=True)

In [None]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
model_hour_15 = lm.fit()
print(model_hour_15.summary())

In [None]:
accu(df_hour_train["production"], model_hour_15.predict())

In [None]:
df_hour_month_test = df_hour_test["date"].dt.month
df_hour_month_test = pd.get_dummies(df_hour_month_test, prefix="month")
for i in range(5,12):
    df_hour_month_test[f"month_{i}"] = 0

df_test = df_hour_month_test.copy()
df_test["trend"] = np.arange(len(df_test))
df_test = sm.add_constant(df_test)

features_test = df_hour_test.copy()
features_test.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_test.columns[df_hour_test.columns.str.contains(feature)]
    features_test[f"mean_{feature}"] = df_hour_test[feature_col].mean(axis=1)
features_test = features_test.filter(regex=r'^mean_')
features_test

df_test.reset_index(drop=True, inplace=True)
features_test.reset_index(drop=True, inplace=True)
df_test = pd.concat([df_test, features_test], axis=1)

df_test.drop(columns = drop_lst_hour_15, inplace = True)

df_test["lag_2"] = df_hour_test["production"].shift(2, fill_value=0)
df_test["ma_3"] = df_hour_test["production"].rolling(window=3).mean()
df_test.fillna(method='bfill', inplace=True)

df_test

In [None]:
accu(df_hour_test["production"], model_hour_15.predict(df_test))

In [None]:
df_hour_month = df_hour["date"].dt.month
df_hour_month = pd.get_dummies(df_hour_month, prefix="month")
for i in range(5,12):
    df_hour_month[f"month_{i}"] = 0

df_predict = df_hour_month.copy()
df_predict["trend"] = np.arange(len(df_predict))
df_predict = sm.add_constant(df_predict)

features = df_hour.copy()
features.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour.columns[df_hour.columns.str.contains(feature)]
    features[f"mean_{feature}"] = df_hour[feature_col].mean(axis=1)
features = features.filter(regex=r'^mean_')

df_predict.reset_index(drop=True, inplace=True)
features.reset_index(drop=True, inplace=True)
df_predict = pd.concat([df_predict, features], axis=1)

df_predict.drop(columns = drop_lst_hour_15, inplace = True)

df_predict["lag_2"] = df_hour["production"].shift(2, fill_value=0)
df_predict["ma_3"] = df_hour["production"].rolling(window=3).mean()
df_predict.fillna(method='bfill', inplace=True)

df_predict

In [None]:
lm = sm.OLS(df_hour["production"], df_predict)
model_hour_15 = lm.fit()
print(model_hour_15.summary())

In [None]:
accu(df_hour["production"], model_hour_15.predict())

# Hour 16

In [None]:
mask = df["hour"] == 16
df_hour = df[mask]
df_hour.reset_index(drop=True, inplace=True)
df_hour_train = df_hour[df_hour["date"] < "2024"]
df_hour_test = df_hour[df_hour["date"] >= "2024"]
df_hour_train.reset_index(drop=True, inplace=True)
df_hour_test.reset_index(drop=True, inplace=True)

df_hour

In [None]:
df_hour_month_train = df_hour_train["date"].dt.month
df_hour_month_train = pd.get_dummies(df_hour_month_train, prefix="month")
df_hour_month_train.drop(columns=["month_12"], inplace=True)
df_hour_month_train

df_model_hour = df_hour_month_train.copy()
df_model_hour["trend"] = np.arange(len(df_model_hour))
df_model_hour = sm.add_constant(df_model_hour)


features_train = df_hour_train.copy()
features_train.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_train.columns[df_hour_train.columns.str.contains(feature)]
    features_train[f"mean_{feature}"] = df_hour_train[feature_col].mean(axis=1)
features_train = features_train.filter(regex=r'^mean_')
features_train

df_model_hour= pd.concat([df_model_hour, features_train], axis=1)

df_model_hour

In [None]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
result = lm.fit()
print(result.summary())

In [None]:
drop_lst_hour_16 = ["mean_csnow_surface", "mean_tcdc_middle.cloud.layer"]

df_model_hour.drop(columns = drop_lst_hour_16, inplace = True)

df_model_hour["lag_2"] = df_hour_train["production"].shift(2, fill_value=0)
df_model_hour["ma_3"] = df_hour_train["production"].rolling(window=3).mean()
df_model_hour.fillna(method='bfill', inplace=True)

In [None]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
model_hour_16 = lm.fit()
print(model_hour_16.summary())

In [None]:
accu(df_hour_train["production"], model_hour_16.predict())

In [None]:
df_hour_month_test = df_hour_test["date"].dt.month
df_hour_month_test = pd.get_dummies(df_hour_month_test, prefix="month")
for i in range(5,12):
    df_hour_month_test[f"month_{i}"] = 0

df_test = df_hour_month_test.copy()
df_test["trend"] = np.arange(len(df_test))
df_test = sm.add_constant(df_test)

features_test = df_hour_test.copy()
features_test.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_test.columns[df_hour_test.columns.str.contains(feature)]
    features_test[f"mean_{feature}"] = df_hour_test[feature_col].mean(axis=1)
features_test = features_test.filter(regex=r'^mean_')
features_test

df_test.reset_index(drop=True, inplace=True)
features_test.reset_index(drop=True, inplace=True)
df_test = pd.concat([df_test, features_test], axis=1)

df_test.drop(columns = drop_lst_hour_16, inplace = True)

df_test["lag_2"] = df_hour_test["production"].shift(2, fill_value=0)
df_test["ma_3"] = df_hour_test["production"].rolling(window=3).mean()
df_test.fillna(method='bfill', inplace=True)

df_test

In [None]:
accu(df_hour_test["production"], model_hour_16.predict(df_test))

In [None]:
df_hour_month = df_hour["date"].dt.month
df_hour_month = pd.get_dummies(df_hour_month, prefix="month")
for i in range(5,12):
    df_hour_month[f"month_{i}"] = 0

df_predict = df_hour_month.copy()
df_predict["trend"] = np.arange(len(df_predict))
df_predict = sm.add_constant(df_predict)

features = df_hour.copy()
features.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour.columns[df_hour.columns.str.contains(feature)]
    features[f"mean_{feature}"] = df_hour[feature_col].mean(axis=1)
features = features.filter(regex=r'^mean_')

df_predict.reset_index(drop=True, inplace=True)
features.reset_index(drop=True, inplace=True)
df_predict = pd.concat([df_predict, features], axis=1)

df_predict.drop(columns = drop_lst_hour_16, inplace = True)

df_predict["lag_2"] = df_hour["production"].shift(2, fill_value=0)
df_predict["ma_3"] = df_hour["production"].rolling(window=3).mean()
df_predict.fillna(method='bfill', inplace=True)

df_predict

In [None]:
lm = sm.OLS(df_hour["production"], df_predict)
model_hour_16 = lm.fit()
print(model_hour_16.summary())

In [None]:
accu(df_hour["production"], model_hour_16.predict())

# Hour 17

In [None]:
mask = df["hour"] == 17
df_hour = df[mask]
df_hour.reset_index(drop=True, inplace=True)
df_hour_train = df_hour[df_hour["date"] < "2024"]
df_hour_test = df_hour[df_hour["date"] >= "2024"]
df_hour_train.reset_index(drop=True, inplace=True)
df_hour_test.reset_index(drop=True, inplace=True)

df_hour

In [None]:
df_hour_month_train = df_hour_train["date"].dt.month
df_hour_month_train = pd.get_dummies(df_hour_month_train, prefix="month")
df_hour_month_train.drop(columns=["month_12"], inplace=True)
df_hour_month_train

df_model_hour = df_hour_month_train.copy()
df_model_hour["trend"] = np.arange(len(df_model_hour))
df_model_hour = sm.add_constant(df_model_hour)


features_train = df_hour_train.copy()
features_train.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_train.columns[df_hour_train.columns.str.contains(feature)]
    features_train[f"mean_{feature}"] = df_hour_train[feature_col].mean(axis=1)
features_train = features_train.filter(regex=r'^mean_')
features_train

df_model_hour= pd.concat([df_model_hour, features_train], axis=1)

df_model_hour

In [None]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
result = lm.fit()
print(result.summary())

In [None]:
drop_lst_hour_17 = ["mean_csnow_surface"]

df_model_hour.drop(columns = drop_lst_hour_17, inplace = True)

df_model_hour["lag_2"] = df_hour_train["production"].shift(2, fill_value=0)
df_model_hour["ma_3"] = df_hour_train["production"].rolling(window=3).mean()
df_model_hour.fillna(method='bfill', inplace=True)

In [None]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
model_hour_17 = lm.fit()
print(model_hour_17.summary())

In [None]:
accu(df_hour_train["production"], model_hour_17.predict())

In [None]:
df_hour_month_test = df_hour_test["date"].dt.month
df_hour_month_test = pd.get_dummies(df_hour_month_test, prefix="month")
for i in range(5,12):
    df_hour_month_test[f"month_{i}"] = 0

df_test = df_hour_month_test.copy()
df_test["trend"] = np.arange(len(df_test))
df_test = sm.add_constant(df_test)

features_test = df_hour_test.copy()
features_test.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_test.columns[df_hour_test.columns.str.contains(feature)]
    features_test[f"mean_{feature}"] = df_hour_test[feature_col].mean(axis=1)
features_test = features_test.filter(regex=r'^mean_')
features_test

df_test.reset_index(drop=True, inplace=True)
features_test.reset_index(drop=True, inplace=True)
df_test = pd.concat([df_test, features_test], axis=1)

df_test.drop(columns = drop_lst_hour_17, inplace = True)

df_test["lag_2"] = df_hour_test["production"].shift(2, fill_value=0)
df_test["ma_3"] = df_hour_test["production"].rolling(window=3).mean()
df_test.fillna(method='bfill', inplace=True)

df_test

In [None]:
accu(df_hour_test["production"], model_hour_17.predict(df_test))

In [None]:
df_hour_month = df_hour["date"].dt.month
df_hour_month = pd.get_dummies(df_hour_month, prefix="month")
for i in range(5,12):
    df_hour_month[f"month_{i}"] = 0

df_predict = df_hour_month.copy()
df_predict["trend"] = np.arange(len(df_predict))
df_predict = sm.add_constant(df_predict)

features = df_hour.copy()
features.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour.columns[df_hour.columns.str.contains(feature)]
    features[f"mean_{feature}"] = df_hour[feature_col].mean(axis=1)
features = features.filter(regex=r'^mean_')

df_predict.reset_index(drop=True, inplace=True)
features.reset_index(drop=True, inplace=True)
df_predict = pd.concat([df_predict, features], axis=1)

df_predict.drop(columns = drop_lst_hour_17, inplace = True)

df_predict["lag_2"] = df_hour["production"].shift(2, fill_value=0)
df_predict["ma_3"] = df_hour["production"].rolling(window=3).mean()
df_predict.fillna(method='bfill', inplace=True)

df_predict

In [None]:
lm = sm.OLS(df_hour["production"], df_predict)
model_hour_17 = lm.fit()
print(model_hour_17.summary())

In [None]:
accu(df_hour["production"], model_hour_17.predict())

# Hour 18

# Complete Model

In [None]:
cut_off_date = production_df["date"].iloc[-1]
next_day = cut_off_date + timedelta(days=2)
predict_df = predict_df[predict_df["date"] == next_day]

In [None]:
features = predict_df.copy()
features.drop(columns=["date", "hour"], inplace=True)
lst = weather_df.columns[4:-10]

for feature in lst:
    feature_col = predict_df.columns[predict_df.columns.str.contains(feature)]
    features[f"mean_{feature}"] = predict_df[feature_col].mean(axis=1)

features = features.filter(regex=r'^mean_')
features.reset_index(drop=True, inplace=True)

df_month = predict_df["date"].dt.month
df_month = pd.get_dummies(df_month, prefix="month")

for i in range(1,13):
    if i != 5:
        df_month[f"month_{i}"] = 0

column_order = ['month_' + str(i) for i in range(1, 13)] + ['trend']  
df_month = df_month.reindex(columns=column_order)   

df_predict = df_month.copy()
df_predict["trend"] = np.arange(len(df_predict))
df_predict.insert(0, 'const', 1)


df_predict.reset_index(drop=True, inplace=True)
df_concat = pd.concat([df_predict, features], axis=1)

production_2_day_before = production_df["production"].iloc[-2*24:-24]

df_concat["lag_2"] = production_2_day_before.values

last_3_production = production_df["production"].iloc[-3*24:]
last_3_production.reset_index(drop=True, inplace=True)

moving_avg_3 = [(last_3_production[i] + last_3_production[i+24] + last_3_production[i + 48])/3 for i in range(len(last_3_production)-48)]

df_concat["ma_3"] = moving_avg_3

df_concat

In [None]:
prediction = [0] * 4

df_concat_hour_5 = df_concat.copy()
df_concat_hour_5.drop(columns = drop_lst_hour_5, inplace = True)
prediction.append(model_hour_5.predict(df_concat_hour_5.iloc[5]).iloc[0])

df_concat_hour_6 = df_concat.copy()
df_concat_hour_6.drop(columns = drop_lst_hour_6, inplace = True)
prediction.append(model_hour_6.predict(df_concat_hour_6.iloc[6]).iloc[0])

df_concat_hour_7 = df_concat.copy()
df_concat_hour_7.drop(columns = drop_lst_hour_7, inplace = True)
prediction.append(model_hour_7.predict(df_concat_hour_7.iloc[7]).iloc[0])

df_concat_hour_8 = df_concat.copy()
df_concat_hour_8.drop(columns = drop_lst_hour_8, inplace = True)
prediction.append(model_hour_8.predict(df_concat_hour_8.iloc[8]).iloc[0])

df_concat_hour_9 = df_concat.copy()
df_concat_hour_9.drop(columns = drop_lst_hour_9, inplace = True)
prediction.append(model_hour_9.predict(df_concat_hour_9.iloc[9]).iloc[0])

df_concat_hour_10 = df_concat.copy()
df_concat_hour_10.drop(columns = drop_lst_hour_10, inplace = True)
prediction.append(model_hour_10.predict(df_concat_hour_10.iloc[10]).iloc[0])

df_concat_hour_11 = df_concat.copy()
df_concat_hour_11.drop(columns = drop_lst_hour_11, inplace = True)
prediction.append(model_hour_11.predict(df_concat_hour_11.iloc[11]).iloc[0])

df_concat_hour_12 = df_concat.copy()
df_concat_hour_12.drop(columns = drop_lst_hour_12, inplace = True)
prediction.append(model_hour_12.predict(df_concat_hour_12.iloc[12]).iloc[0])

df_concat_hour_13 = df_concat.copy()
df_concat_hour_13.drop(columns = drop_lst_hour_13, inplace = True)
prediction.append(model_hour_13.predict(df_concat_hour_13.iloc[13]).iloc[0])

df_concat_hour_14 = df_concat.copy()
df_concat_hour_14.drop(columns = drop_lst_hour_14, inplace = True)
prediction.append(model_hour_14.predict(df_concat_hour_14.iloc[14]).iloc[0])

df_concat_hour_15 = df_concat.copy()
df_concat_hour_15.drop(columns = drop_lst_hour_15, inplace = True)
prediction.append(model_hour_15.predict(df_concat_hour_15.iloc[15]).iloc[0])

df_concat_hour_16 = df_concat.copy()
df_concat_hour_16.drop(columns = drop_lst_hour_16, inplace = True)
prediction.append(model_hour_16.predict(df_concat_hour_16.iloc[16]).iloc[0])

df_concat_hour_17 = df_concat.copy()
df_concat_hour_17.drop(columns = drop_lst_hour_17, inplace = True)
prediction.append(model_hour_17.predict(df_concat_hour_17.iloc[17]).iloc[0])

prediction += [0] * 7
prediction = [round(i,2) for i in prediction]

formatted_list = ",".join(map(str, prediction))

print(next_day)
print("\n")
print(formatted_list)