In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from datetime import timedelta 

In [2]:
def accu(actual, forecast):
    error = actual - forecast
    mean = np.mean(actual)
    sd = np.std(actual)
    CV = sd / mean
    FBias = np.sum(error) / np.sum(actual)
    MAPE = np.sum(np.abs(error / (actual+1))) / len(actual)
    RMSE = np.sqrt(np.sum(error ** 2) / len(actual))
    MAD = np.sum(np.abs(error)) / len(actual)
    MADP = np.sum(np.abs(error)) / np.sum(np.abs(actual))
    WMAPE = MAD / mean
    
    result_dict = {
        "n": len(actual),
        "mean": mean,
        "sd": sd,
        "CV": CV,
        "FBias": FBias,
        "MAPE": MAPE,
        "RMSE": RMSE,
        "MAD": MAD,
        "MADP": MADP,
        "WMAPE": WMAPE
    }
    
    return pd.DataFrame(result_dict, index=[0])

In [3]:
production_data_loc = "data\production.csv"
weather_data_loc = "data\processed_weather.csv"

In [4]:
production_df = pd.read_csv(production_data_loc)
production_df["date"] = pd.to_datetime(production_df["date"])
production_df = production_df.iloc[4:]
production_df = production_df.drop_duplicates()
production_df.reset_index(drop=True, inplace=True)
production_df["production"] = production_df["production"].apply(lambda x: 10 if x > 10 else x)
date = pd.to_datetime("2024-05-12")
production_df = production_df[production_df["date"] != date]

production_df

Unnamed: 0,date,hour,production
0,2022-01-01,4,0.0
1,2022-01-01,5,0.0
2,2022-01-01,6,0.0
3,2022-01-01,7,0.0
4,2022-01-01,8,3.4
...,...,...,...
20823,2024-05-17,19,0.0
20824,2024-05-17,20,0.0
20825,2024-05-17,21,0.0
20826,2024-05-17,22,0.0


In [5]:
weather_df = pd.read_csv(weather_data_loc)
weather_df["date"] = pd.to_datetime(weather_df["date"], format='%Y-%m-%d')
weather_df = weather_df.sort_values(by=['date', 'hour'])
weather_df.reset_index(drop=True, inplace=True) 

columns_to_pivot = weather_df.columns[4:]

for col in columns_to_pivot:
    weather_df[f'{col}_identifier'] = col + "_" + weather_df['lat'].astype(str) + "_" + weather_df['lon'].astype(str)

pivoted_dfs = []
for col in columns_to_pivot:
    pivoted_df = pd.pivot(
        weather_df,
        index=['date', 'hour'],
        columns=f'{col}_identifier',
        values=col
    )
    pivoted_df.columns.name = None 
    pivoted_df.reset_index(inplace=True)  
    pivoted_dfs.append(pivoted_df)

result_df = pivoted_dfs[0]
for df in pivoted_dfs[1:]:
    result_df = result_df.merge(df, on=['date', 'hour'], how='outer')
    
df = result_df.iloc[:, :252]

df

Unnamed: 0,date,hour,dswrf_surface_37.75_34.5,dswrf_surface_37.75_34.75,dswrf_surface_37.75_35.0,dswrf_surface_37.75_35.25,dswrf_surface_37.75_35.5,dswrf_surface_38.0_34.5,dswrf_surface_38.0_34.75,dswrf_surface_38.0_35.0,...,tmp_surface_38.5_34.5,tmp_surface_38.5_34.75,tmp_surface_38.5_35.0,tmp_surface_38.5_35.25,tmp_surface_38.5_35.5,tmp_surface_38.75_34.5,tmp_surface_38.75_34.75,tmp_surface_38.75_35.0,tmp_surface_38.75_35.25,tmp_surface_38.75_35.5
0,2022-01-01,4,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,271.404,270.804,270.504,271.204,262.204,271.404,271.404,270.904,270.304,271.204
1,2022-01-01,5,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,271.304,270.804,270.604,271.104,262.004,271.304,271.204,270.804,270.104,271.104
2,2022-01-01,6,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,271.331,270.831,270.531,270.931,262.031,271.231,271.131,270.731,270.031,271.131
3,2022-01-01,7,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,271.217,270.817,270.517,270.917,262.317,271.217,271.117,270.717,270.017,271.217
4,2022-01-01,8,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,271.194,270.894,270.594,270.894,263.394,271.194,271.094,270.594,269.894,270.994
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20941,2024-05-22,17,685.040,682.340,491.880,420.500,610.260,690.980,680.900,675.260,...,301.527,300.027,299.627,299.227,294.827,304.027,303.327,302.127,302.027,301.727
20942,2024-05-22,18,589.360,588.300,457.140,393.680,523.660,594.600,587.140,582.800,...,298.221,297.321,297.021,296.021,291.821,300.521,300.321,299.021,298.921,298.821
20943,2024-05-22,19,488.880,488.864,390.448,339.440,435.104,492.224,488.240,484.656,...,293.141,293.841,293.441,292.341,288.441,297.241,296.841,295.241,294.641,294.541
20944,2024-05-22,20,391.712,395.184,317.344,275.952,352.336,394.816,392.768,390.880,...,289.022,287.622,288.622,289.122,284.522,290.422,290.522,291.722,289.922,289.522


In [6]:
end_date = production_df["date"].iloc[-1]
end_hour = production_df["hour"].iloc[-1]

predict_df = df.copy()

cut_off_index = df[(df["date"] == end_date) & (df["hour"] == end_hour)].index.min()
df = df.loc[:cut_off_index]
df = pd.merge(df, production_df, on=["date", "hour"], how = "inner")
df.fillna(method='ffill', inplace=True)
df

Unnamed: 0,date,hour,dswrf_surface_37.75_34.5,dswrf_surface_37.75_34.75,dswrf_surface_37.75_35.0,dswrf_surface_37.75_35.25,dswrf_surface_37.75_35.5,dswrf_surface_38.0_34.5,dswrf_surface_38.0_34.75,dswrf_surface_38.0_35.0,...,tmp_surface_38.5_34.75,tmp_surface_38.5_35.0,tmp_surface_38.5_35.25,tmp_surface_38.5_35.5,tmp_surface_38.75_34.5,tmp_surface_38.75_34.75,tmp_surface_38.75_35.0,tmp_surface_38.75_35.25,tmp_surface_38.75_35.5,production
0,2022-01-01,4,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,270.804,270.504,271.204,262.204,271.404,271.404,270.904,270.304,271.204,0.0
1,2022-01-01,5,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,270.804,270.604,271.104,262.004,271.304,271.204,270.804,270.104,271.104,0.0
2,2022-01-01,6,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,270.831,270.531,270.931,262.031,271.231,271.131,270.731,270.031,271.131,0.0
3,2022-01-01,7,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,270.817,270.517,270.917,262.317,271.217,271.117,270.717,270.017,271.217,0.0
4,2022-01-01,8,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,270.894,270.594,270.894,263.394,271.194,271.094,270.594,269.894,270.994,3.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20799,2024-05-17,19,266.448,246.592,229.296,220.912,289.840,257.328,221.200,111.808,...,288.500,288.300,287.000,282.800,292.000,291.400,290.100,289.900,289.700,0.0
20800,2024-05-17,20,214.160,198.336,184.944,179.360,234.720,209.168,179.472,92.048,...,284.621,284.521,285.121,279.621,288.921,290.121,289.321,288.921,287.721,0.0
20801,2024-05-17,21,178.464,165.280,154.112,149.472,195.616,174.304,149.568,76.704,...,284.221,283.621,283.421,280.121,286.621,286.721,285.521,287.221,287.321,0.0
20802,2024-05-17,22,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,283.858,283.658,282.958,279.158,286.458,285.758,284.658,284.558,283.958,0.0


In [7]:
for hour in sorted(df['hour'].unique()):
    mask = df["hour"] == hour
    df_hour = df[mask]
    print(f"Hour {hour} Mean Production = {df_hour.production.mean()}")

Hour 0 Mean Production = 0.0
Hour 1 Mean Production = 0.0
Hour 2 Mean Production = 0.0
Hour 3 Mean Production = 0.0
Hour 4 Mean Production = 0.0047174163783160325
Hour 5 Mean Production = 0.07513264129181085
Hour 6 Mean Production = 0.651603229527105
Hour 7 Mean Production = 2.8404959630911186
Hour 8 Mean Production = 5.497531718569782
Hour 9 Mean Production = 7.350841983852365
Hour 10 Mean Production = 7.86558246828143
Hour 11 Mean Production = 7.888512110726643
Hour 12 Mean Production = 7.78717416378316
Hour 13 Mean Production = 7.375570934256055
Hour 14 Mean Production = 6.36121107266436
Hour 15 Mean Production = 4.57161476355248
Hour 16 Mean Production = 2.383783160322953
Hour 17 Mean Production = 0.839838523644752
Hour 18 Mean Production = 0.0698961937716263
Hour 19 Mean Production = 5.7670126874279124e-05
Hour 20 Mean Production = 0.0
Hour 21 Mean Production = 0.0
Hour 22 Mean Production = 0.0
Hour 23 Mean Production = 0.0


# Hour 5

In [8]:
mask = df["hour"] == 5
df_hour = df[mask]
df_hour.reset_index(drop=True, inplace=True)
df_hour_train = df_hour[df_hour["date"] < "2024"]
df_hour_test = df_hour[df_hour["date"] >= "2024"]
df_hour_train.reset_index(drop=True, inplace=True)
df_hour_test.reset_index(drop=True, inplace=True)

df_hour

Unnamed: 0,date,hour,dswrf_surface_37.75_34.5,dswrf_surface_37.75_34.75,dswrf_surface_37.75_35.0,dswrf_surface_37.75_35.25,dswrf_surface_37.75_35.5,dswrf_surface_38.0_34.5,dswrf_surface_38.0_34.75,dswrf_surface_38.0_35.0,...,tmp_surface_38.5_34.75,tmp_surface_38.5_35.0,tmp_surface_38.5_35.25,tmp_surface_38.5_35.5,tmp_surface_38.75_34.5,tmp_surface_38.75_34.75,tmp_surface_38.75_35.0,tmp_surface_38.75_35.25,tmp_surface_38.75_35.5,production
0,2022-01-01,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,270.804,270.604,271.104,262.004,271.304,271.204,270.804,270.104,271.104,0.00
1,2022-01-02,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,274.285,274.085,273.685,270.885,277.485,276.985,275.985,275.385,274.885,0.00
2,2022-01-03,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,270.025,269.725,269.425,257.325,271.025,271.325,270.625,270.525,270.325,0.00
3,2022-01-04,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,272.247,272.047,272.047,265.047,272.847,272.947,272.547,271.947,270.847,0.00
4,2022-01-05,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,271.328,270.428,270.728,261.028,271.828,271.728,271.228,270.528,271.028,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,2024-05-13,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,277.900,277.300,279.500,277.600,278.000,280.500,280.900,281.600,281.300,0.76
863,2024-05-14,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,278.502,279.102,278.602,275.602,277.702,278.302,280.202,280.802,280.802,0.96
864,2024-05-15,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,277.931,278.431,277.731,271.731,278.331,278.231,277.531,277.331,277.031,0.84
865,2024-05-16,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,281.194,279.994,279.194,275.894,284.494,284.294,283.894,283.494,282.894,0.60


In [9]:
df_hour_month_train = df_hour_train["date"].dt.month
df_hour_month_train = pd.get_dummies(df_hour_month_train, prefix="month")
df_hour_month_train.drop(columns=["month_12"], inplace=True)
df_hour_month_train

df_model_hour = df_hour_month_train.copy()
df_model_hour["trend"] = np.arange(len(df_model_hour))
df_model_hour = sm.add_constant(df_model_hour)


features_train = df_hour_train.copy()
features_train.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_train.columns[df_hour_train.columns.str.contains(feature)]
    features_train[f"mean_{feature}"] = df_hour_train[feature_col].mean(axis=1)
features_train = features_train.filter(regex=r'^mean_')
features_train

df_model_hour= pd.concat([df_model_hour, features_train], axis=1)

df_model_hour

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,mean_dswrf_surface,mean_tcdc_low.cloud.layer,mean_tcdc_middle.cloud.layer,mean_tcdc_high.cloud.layer,mean_tcdc_entire.atmosphere,mean_uswrf_top_of_atmosphere,mean_csnow_surface,mean_dlwrf_surface,mean_uswrf_surface,mean_tmp_surface
0,1.0,1,0,0,0,0,0,0,0,0,...,0.0,2.784,4.324,10.636,19.272,0.0,0.0,227.774,0.0,269.104
1,1.0,1,0,0,0,0,0,0,0,0,...,0.0,92.820,88.048,99.388,100.000,0.0,0.6,311.772,0.0,273.537
2,1.0,1,0,0,0,0,0,0,0,0,...,0.0,9.660,0.000,0.276,10.492,0.0,0.0,198.201,0.0,266.297
3,1.0,1,0,0,0,0,0,0,0,0,...,0.0,0.000,71.672,90.780,93.340,0.0,0.0,253.389,0.0,270.131
4,1.0,1,0,0,0,0,0,0,0,0,...,0.0,0.000,0.000,9.992,12.960,0.0,0.0,223.808,0.0,268.768
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
725,1.0,0,0,0,0,0,0,0,0,0,...,0.0,0.000,0.000,13.476,14.088,0.0,0.0,244.915,0.0,273.089
726,1.0,0,0,0,0,0,0,0,0,0,...,0.0,0.044,0.000,0.000,0.048,0.0,0.0,235.566,0.0,272.347
727,1.0,0,0,0,0,0,0,0,0,0,...,0.0,0.052,0.000,0.000,0.052,0.0,0.0,225.852,0.0,271.038
728,1.0,0,0,0,0,0,0,0,0,0,...,0.0,0.000,0.000,0.176,0.176,0.0,0.0,228.672,0.0,270.942


In [10]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
result = lm.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.396
Model:                            OLS   Adj. R-squared:                  0.380
Method:                 Least Squares   F-statistic:                     24.48
Date:                Sat, 18 May 2024   Prob (F-statistic):           3.59e-65
Time:                        09:02:17   Log-Likelihood:                 400.78
No. Observations:                 730   AIC:                            -761.6
Df Residuals:                     710   BIC:                            -669.7
Df Model:                          19                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [11]:
drop_lst_hour_5 = ["trend", "mean_uswrf_surface", "mean_csnow_surface", "mean_tcdc_entire.atmosphere", "mean_tcdc_middle.cloud.layer"]

df_model_hour.drop(columns = drop_lst_hour_5, inplace = True)

df_model_hour["lag_2"] = df_hour_train["production"].shift(2, fill_value=0)
df_model_hour["ma_3"] = df_hour_train["production"].rolling(window=3).mean()
df_model_hour.fillna(method='bfill', inplace=True)

In [12]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
model_hour_5 = lm.fit()
print(model_hour_5.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.868
Model:                            OLS   Adj. R-squared:                  0.865
Method:                 Least Squares   F-statistic:                     275.3
Date:                Sat, 18 May 2024   Prob (F-statistic):          3.42e-299
Time:                        09:02:17   Log-Likelihood:                 955.86
No. Observations:                 730   AIC:                            -1876.
Df Residuals:                     712   BIC:                            -1793.
Df Model:                          17                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [13]:
accu(df_hour_train["production"], model_hour_5.predict())

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,730,0.059356,0.179784,3.028897,-2.889321e-14,0.009926,0.065328,0.0139,0.234182,0.234182


In [14]:
df_hour_month_test = df_hour_test["date"].dt.month
df_hour_month_test = pd.get_dummies(df_hour_month_test, prefix="month")
for i in range(5,12):
    df_hour_month_test[f"month_{i}"] = 0

df_test = df_hour_month_test.copy()
df_test["trend"] = np.arange(len(df_test))
df_test = sm.add_constant(df_test)

features_test = df_hour_test.copy()
features_test.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_test.columns[df_hour_test.columns.str.contains(feature)]
    features_test[f"mean_{feature}"] = df_hour_test[feature_col].mean(axis=1)
features_test = features_test.filter(regex=r'^mean_')
features_test

df_test.reset_index(drop=True, inplace=True)
features_test.reset_index(drop=True, inplace=True)
df_test = pd.concat([df_test, features_test], axis=1)

df_test.drop(columns = drop_lst_hour_5, inplace = True)

df_test["lag_2"] = df_hour_test["production"].shift(2, fill_value=0)
df_test["ma_3"] = df_hour_test["production"].rolling(window=3).mean()
df_test.fillna(method='bfill', inplace=True)

df_test

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,month_10,month_11,mean_dswrf_surface,mean_tcdc_low.cloud.layer,mean_tcdc_high.cloud.layer,mean_uswrf_top_of_atmosphere,mean_dlwrf_surface,mean_tmp_surface,lag_2,ma_3
0,1.0,1,0,0,0,0,0,0,0,0,0,0,0.0,0.000,99.000,0.0,245.087,271.818,0.00,0.000000
1,1.0,1,0,0,0,0,0,0,0,0,0,0,0.0,22.292,100.000,0.0,294.097,275.316,0.00,0.000000
2,1.0,1,0,0,0,0,0,0,0,0,0,0,0.0,97.128,34.312,0.0,327.443,277.509,0.00,0.000000
3,1.0,1,0,0,0,0,0,0,0,0,0,0,0.0,38.880,99.684,0.0,281.652,275.416,0.00,0.000000
4,1.0,1,0,0,0,0,0,0,0,0,0,0,0.0,97.696,9.560,0.0,331.483,278.367,0.00,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
132,1.0,0,0,0,0,0,0,0,0,0,0,0,0.0,66.208,99.664,0.0,302.364,279.196,0.63,0.676667
133,1.0,0,0,0,0,0,0,0,0,0,0,0,0.0,72.772,0.000,0.0,296.704,278.086,0.64,0.786667
134,1.0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.028,96.860,0.0,268.502,276.987,0.76,0.853333
135,1.0,0,0,0,0,0,0,0,0,0,0,0,0.0,35.132,0.592,0.0,281.086,279.774,0.96,0.800000


In [15]:
accu(df_hour_test["production"], model_hour_5.predict(df_test))

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,137,0.159197,0.270946,1.701954,0.018412,0.022683,0.093485,0.03354,0.21068,0.21068


In [16]:
df_hour_month = df_hour["date"].dt.month
df_hour_month = pd.get_dummies(df_hour_month, prefix="month")
for i in range(5,12):
    df_hour_month[f"month_{i}"] = 0

df_predict = df_hour_month.copy()
df_predict["trend"] = np.arange(len(df_predict))
df_predict = sm.add_constant(df_predict)

features = df_hour.copy()
features.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour.columns[df_hour.columns.str.contains(feature)]
    features[f"mean_{feature}"] = df_hour[feature_col].mean(axis=1)
features = features.filter(regex=r'^mean_')

df_predict.reset_index(drop=True, inplace=True)
features.reset_index(drop=True, inplace=True)
df_predict = pd.concat([df_predict, features], axis=1)

df_predict.drop(columns = drop_lst_hour_5, inplace = True)

df_predict["lag_2"] = df_hour["production"].shift(2, fill_value=0)
df_predict["ma_3"] = df_hour["production"].rolling(window=3).mean()
df_predict.fillna(method='bfill', inplace=True)

df_predict

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,month_11,month_12,mean_dswrf_surface,mean_tcdc_low.cloud.layer,mean_tcdc_high.cloud.layer,mean_uswrf_top_of_atmosphere,mean_dlwrf_surface,mean_tmp_surface,lag_2,ma_3
0,1.0,1,0,0,0,0,0,0,0,0,...,0,0,0.0,2.784,10.636,0.0,227.774,269.104,0.00,0.000000
1,1.0,1,0,0,0,0,0,0,0,0,...,0,0,0.0,92.820,99.388,0.0,311.772,273.537,0.00,0.000000
2,1.0,1,0,0,0,0,0,0,0,0,...,0,0,0.0,9.660,0.276,0.0,198.201,266.297,0.00,0.000000
3,1.0,1,0,0,0,0,0,0,0,0,...,0,0,0.0,0.000,90.780,0.0,253.389,270.131,0.00,0.000000
4,1.0,1,0,0,0,0,0,0,0,0,...,0,0,0.0,0.000,9.992,0.0,223.808,268.768,0.00,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,1.0,0,0,0,0,0,0,0,0,0,...,0,0,0.0,66.208,99.664,0.0,302.364,279.196,0.63,0.676667
863,1.0,0,0,0,0,0,0,0,0,0,...,0,0,0.0,72.772,0.000,0.0,296.704,278.086,0.64,0.786667
864,1.0,0,0,0,0,0,0,0,0,0,...,0,0,0.0,0.028,96.860,0.0,268.502,276.987,0.76,0.853333
865,1.0,0,0,0,0,0,0,0,0,0,...,0,0,0.0,35.132,0.592,0.0,281.086,279.774,0.96,0.800000


In [17]:
lm = sm.OLS(df_hour["production"], df_predict)
model_hour_5 = lm.fit()
print(model_hour_5.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.876
Model:                            OLS   Adj. R-squared:                  0.875
Method:                 Least Squares   F-statistic:                     549.6
Date:                Sat, 18 May 2024   Prob (F-statistic):               0.00
Time:                        09:02:17   Log-Likelihood:                 1068.9
No. Observations:                 867   AIC:                            -2114.
Df Residuals:                     855   BIC:                            -2057.
Df Model:                          11                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [18]:
accu(df_hour["production"], model_hour_5.predict())

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,867,0.075133,0.200353,2.666655,3.333716e-14,0.011264,0.070524,0.016313,0.217122,0.217122


# Hour 6

In [19]:
mask = df["hour"] == 6
df_hour = df[mask]
df_hour.reset_index(drop=True, inplace=True)
df_hour_train = df_hour[df_hour["date"] < "2024"]
df_hour_test = df_hour[df_hour["date"] >= "2024"]
df_hour_train.reset_index(drop=True, inplace=True)
df_hour_test.reset_index(drop=True, inplace=True)

df_hour

Unnamed: 0,date,hour,dswrf_surface_37.75_34.5,dswrf_surface_37.75_34.75,dswrf_surface_37.75_35.0,dswrf_surface_37.75_35.25,dswrf_surface_37.75_35.5,dswrf_surface_38.0_34.5,dswrf_surface_38.0_34.75,dswrf_surface_38.0_35.0,...,tmp_surface_38.5_34.75,tmp_surface_38.5_35.0,tmp_surface_38.5_35.25,tmp_surface_38.5_35.5,tmp_surface_38.75_34.5,tmp_surface_38.75_34.75,tmp_surface_38.75_35.0,tmp_surface_38.75_35.25,tmp_surface_38.75_35.5,production
0,2022-01-01,6,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,270.831,270.531,270.931,262.031,271.231,271.131,270.731,270.031,271.131,0.00
1,2022-01-02,6,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,274.378,274.078,273.678,270.978,277.478,276.978,276.078,275.578,274.978,0.00
2,2022-01-03,6,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,270.001,269.601,269.101,256.701,270.901,271.201,270.401,270.301,270.101,0.00
3,2022-01-04,6,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,272.368,272.068,271.968,263.268,272.768,273.068,272.768,272.068,270.068,0.00
4,2022-01-05,6,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,271.420,270.320,270.720,260.220,271.720,271.720,271.120,270.420,271.020,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,2024-05-13,6,0.46,0.40,1.44,1.48,1.60,0.82,1.62,1.48,...,278.939,279.139,278.739,277.739,278.939,279.739,280.439,281.539,281.939,2.69
863,2024-05-14,6,0.72,0.18,0.86,2.42,2.48,1.68,0.76,0.96,...,277.600,278.600,278.400,275.200,279.900,277.800,279.000,280.000,280.800,2.59
864,2024-05-15,6,1.90,2.16,2.40,2.40,2.42,1.62,1.76,2.02,...,277.921,278.521,277.921,274.221,278.921,278.821,278.421,278.421,278.321,2.94
865,2024-05-16,6,2.02,2.26,2.56,2.72,2.80,2.14,2.38,2.58,...,282.722,282.522,281.422,277.822,284.822,284.422,284.422,284.022,283.822,2.89


In [20]:
df_hour_month_train = df_hour_train["date"].dt.month
df_hour_month_train = pd.get_dummies(df_hour_month_train, prefix="month")
df_hour_month_train.drop(columns=["month_12"], inplace=True)
df_hour_month_train

df_model_hour = df_hour_month_train.copy()
df_model_hour["trend"] = np.arange(len(df_model_hour))
df_model_hour = sm.add_constant(df_model_hour)


features_train = df_hour_train.copy()
features_train.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_train.columns[df_hour_train.columns.str.contains(feature)]
    features_train[f"mean_{feature}"] = df_hour_train[feature_col].mean(axis=1)
features_train = features_train.filter(regex=r'^mean_')
features_train

df_model_hour= pd.concat([df_model_hour, features_train], axis=1)

df_model_hour

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,mean_dswrf_surface,mean_tcdc_low.cloud.layer,mean_tcdc_middle.cloud.layer,mean_tcdc_high.cloud.layer,mean_tcdc_entire.atmosphere,mean_uswrf_top_of_atmosphere,mean_csnow_surface,mean_dlwrf_surface,mean_uswrf_surface,mean_tmp_surface
0,1.0,1,0,0,0,0,0,0,0,0,...,0.0,2.964,5.372,11.688,21.772,0.0,0.00,227.764,0.0,269.035
1,1.0,1,0,0,0,0,0,0,0,0,...,0.0,93.304,89.960,99.588,100.000,0.0,0.68,312.206,0.0,273.710
2,1.0,1,0,0,0,0,0,0,0,0,...,0.0,9.296,0.000,0.284,10.084,0.0,0.00,198.536,0.0,266.189
3,1.0,1,0,0,0,0,0,0,0,0,...,0.0,0.000,74.092,93.828,95.536,0.0,0.00,255.377,0.0,270.068
4,1.0,1,0,0,0,0,0,0,0,0,...,0.0,0.000,0.088,10.592,13.740,0.0,0.00,223.964,0.0,268.728
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
725,1.0,0,0,0,0,0,0,0,0,0,...,0.0,0.000,0.000,13.324,13.940,0.0,0.00,244.583,0.0,272.834
726,1.0,0,0,0,0,0,0,0,0,0,...,0.0,0.048,0.000,0.000,0.052,0.0,0.00,234.954,0.0,272.071
727,1.0,0,0,0,0,0,0,0,0,0,...,0.0,0.064,0.000,0.000,0.064,0.0,0.00,225.549,0.0,270.780
728,1.0,0,0,0,0,0,0,0,0,0,...,0.0,0.000,0.000,0.564,0.580,0.0,0.00,228.653,0.0,270.876


In [21]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
result = lm.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.525
Model:                            OLS   Adj. R-squared:                  0.510
Method:                 Least Squares   F-statistic:                     35.52
Date:                Sat, 18 May 2024   Prob (F-statistic):           8.03e-99
Time:                        09:02:18   Log-Likelihood:                -749.12
No. Observations:                 730   AIC:                             1544.
Df Residuals:                     707   BIC:                             1650.
Df Model:                          22                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [22]:
drop_lst_hour_6 = ["mean_dswrf_surface", "mean_tcdc_middle.cloud.layer", "mean_tcdc_high.cloud.layer", "mean_uswrf_surface"]

df_model_hour.drop(columns = drop_lst_hour_6, inplace = True)

df_model_hour["lag_2"] = df_hour_train["production"].shift(2, fill_value=0)
df_model_hour["ma_3"] = df_hour_train["production"].rolling(window=3).mean()
df_model_hour.fillna(method='bfill', inplace=True)

In [23]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
model_hour_6 = lm.fit()
print(model_hour_6.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.872
Model:                            OLS   Adj. R-squared:                  0.868
Method:                 Least Squares   F-statistic:                     241.2
Date:                Sat, 18 May 2024   Prob (F-statistic):          3.72e-300
Time:                        09:02:18   Log-Likelihood:                -270.90
No. Observations:                 730   AIC:                             583.8
Df Residuals:                     709   BIC:                             680.3
Df Model:                          20                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [24]:
accu(df_hour_train["production"], model_hour_6.predict())

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,730,0.596,0.979719,1.643824,1.559679e-14,0.084787,0.350694,0.141331,0.237132,0.237132


In [25]:
df_hour_month_test = df_hour_test["date"].dt.month
df_hour_month_test = pd.get_dummies(df_hour_month_test, prefix="month")
for i in range(5,12):
    df_hour_month_test[f"month_{i}"] = 0

df_test = df_hour_month_test.copy()
df_test["trend"] = np.arange(len(df_test))
df_test = sm.add_constant(df_test)

features_test = df_hour_test.copy()
features_test.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_test.columns[df_hour_test.columns.str.contains(feature)]
    features_test[f"mean_{feature}"] = df_hour_test[feature_col].mean(axis=1)
features_test = features_test.filter(regex=r'^mean_')
features_test

df_test.reset_index(drop=True, inplace=True)
features_test.reset_index(drop=True, inplace=True)
df_test = pd.concat([df_test, features_test], axis=1)

df_test.drop(columns = drop_lst_hour_6, inplace = True)

df_test["lag_2"] = df_hour_test["production"].shift(2, fill_value=0)
df_test["ma_3"] = df_hour_test["production"].rolling(window=3).mean()
df_test.fillna(method='bfill', inplace=True)

df_test

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,month_11,trend,mean_tcdc_low.cloud.layer,mean_tcdc_entire.atmosphere,mean_uswrf_top_of_atmosphere,mean_csnow_surface,mean_dlwrf_surface,mean_tmp_surface,lag_2,ma_3
0,1.0,1,0,0,0,0,0,0,0,0,...,0,0,0.000,99.392,0.00000,0.00,244.884,271.617,0.00,0.000000
1,1.0,1,0,0,0,0,0,0,0,0,...,0,1,23.272,100.000,0.00000,0.04,293.261,275.224,0.00,0.000000
2,1.0,1,0,0,0,0,0,0,0,0,...,0,2,97.064,99.296,0.00000,0.24,327.480,277.352,0.00,0.000000
3,1.0,1,0,0,0,0,0,0,0,0,...,0,3,53.512,100.000,0.00000,0.00,288.892,275.927,0.00,0.000000
4,1.0,1,0,0,0,0,0,0,0,0,...,0,4,97.268,97.992,0.00000,0.08,331.079,278.318,0.00,0.003333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
132,1.0,0,0,0,0,0,0,0,0,0,...,0,132,71.276,99.960,3.99488,0.04,305.732,279.635,2.82,2.673333
133,1.0,0,0,0,0,0,0,0,0,0,...,0,133,74.492,75.568,3.66080,0.12,298.591,277.988,2.51,2.596667
134,1.0,0,0,0,0,0,0,0,0,0,...,0,134,0.100,98.256,4.11968,0.00,270.013,277.985,2.69,2.740000
135,1.0,0,0,0,0,0,0,0,0,0,...,0,135,36.444,41.088,3.54816,0.00,283.820,280.754,2.59,2.806667


In [26]:
accu(df_hour_test["production"], model_hour_6.predict(df_test))

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,137,0.947883,1.096747,1.157048,0.03904,0.085937,0.354414,0.181405,0.191379,0.191379


In [27]:
df_hour_month = df_hour["date"].dt.month
df_hour_month = pd.get_dummies(df_hour_month, prefix="month")
for i in range(5,12):
    df_hour_month[f"month_{i}"] = 0

df_predict = df_hour_month.copy()
df_predict["trend"] = np.arange(len(df_predict))
df_predict = sm.add_constant(df_predict)

features = df_hour.copy()
features.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour.columns[df_hour.columns.str.contains(feature)]
    features[f"mean_{feature}"] = df_hour[feature_col].mean(axis=1)
features = features.filter(regex=r'^mean_')

df_predict.reset_index(drop=True, inplace=True)
features.reset_index(drop=True, inplace=True)
df_predict = pd.concat([df_predict, features], axis=1)

df_predict.drop(columns = drop_lst_hour_6, inplace = True)

df_predict["lag_2"] = df_hour["production"].shift(2, fill_value=0)
df_predict["ma_3"] = df_hour["production"].rolling(window=3).mean()
df_predict.fillna(method='bfill', inplace=True)

df_predict

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,month_12,trend,mean_tcdc_low.cloud.layer,mean_tcdc_entire.atmosphere,mean_uswrf_top_of_atmosphere,mean_csnow_surface,mean_dlwrf_surface,mean_tmp_surface,lag_2,ma_3
0,1.0,1,0,0,0,0,0,0,0,0,...,0,0,2.964,21.772,0.00000,0.00,227.764,269.035,0.00,0.000000
1,1.0,1,0,0,0,0,0,0,0,0,...,0,1,93.304,100.000,0.00000,0.68,312.206,273.710,0.00,0.000000
2,1.0,1,0,0,0,0,0,0,0,0,...,0,2,9.296,10.084,0.00000,0.00,198.536,266.189,0.00,0.000000
3,1.0,1,0,0,0,0,0,0,0,0,...,0,3,0.000,95.536,0.00000,0.00,255.377,270.068,0.00,0.000000
4,1.0,1,0,0,0,0,0,0,0,0,...,0,4,0.000,13.740,0.00000,0.00,223.964,268.728,0.00,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,1.0,0,0,0,0,0,0,0,0,0,...,0,862,71.276,99.960,3.99488,0.04,305.732,279.635,2.82,2.673333
863,1.0,0,0,0,0,0,0,0,0,0,...,0,863,74.492,75.568,3.66080,0.12,298.591,277.988,2.51,2.596667
864,1.0,0,0,0,0,0,0,0,0,0,...,0,864,0.100,98.256,4.11968,0.00,270.013,277.985,2.69,2.740000
865,1.0,0,0,0,0,0,0,0,0,0,...,0,865,36.444,41.088,3.54816,0.00,283.820,280.754,2.59,2.806667


In [28]:
lm = sm.OLS(df_hour["production"], df_predict)
model_hour_6 = lm.fit()
print(model_hour_6.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.879
Model:                            OLS   Adj. R-squared:                  0.877
Method:                 Least Squares   F-statistic:                     440.9
Date:                Sat, 18 May 2024   Prob (F-statistic):               0.00
Time:                        09:02:18   Log-Likelihood:                -322.02
No. Observations:                 867   AIC:                             674.0
Df Residuals:                     852   BIC:                             745.5
Df Model:                          14                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [29]:
accu(df_hour["production"], model_hour_6.predict())

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,867,0.651603,1.007334,1.545932,-1.439316e-14,0.087214,0.350805,0.149285,0.229105,0.229105


# Hour 7

In [30]:
mask = df["hour"] == 7
df_hour = df[mask]
df_hour.reset_index(drop=True, inplace=True)
df_hour_train = df_hour[df_hour["date"] < "2024"]
df_hour_test = df_hour[df_hour["date"] >= "2024"]
df_hour_train.reset_index(drop=True, inplace=True)
df_hour_test.reset_index(drop=True, inplace=True)

df_hour

Unnamed: 0,date,hour,dswrf_surface_37.75_34.5,dswrf_surface_37.75_34.75,dswrf_surface_37.75_35.0,dswrf_surface_37.75_35.25,dswrf_surface_37.75_35.5,dswrf_surface_38.0_34.5,dswrf_surface_38.0_34.75,dswrf_surface_38.0_35.0,...,tmp_surface_38.5_34.75,tmp_surface_38.5_35.0,tmp_surface_38.5_35.25,tmp_surface_38.5_35.5,tmp_surface_38.75_34.5,tmp_surface_38.75_34.75,tmp_surface_38.75_35.0,tmp_surface_38.75_35.25,tmp_surface_38.75_35.5,production
0,2022-01-01,7,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,270.817,270.517,270.917,262.317,271.217,271.117,270.717,270.017,271.217,0.00
1,2022-01-02,7,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,274.289,273.989,273.589,271.189,277.489,277.089,276.089,275.589,275.089,0.00
2,2022-01-03,7,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,269.798,269.498,268.998,256.998,270.798,271.098,270.198,270.098,269.798,0.85
3,2022-01-04,7,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,272.406,272.106,272.106,267.006,272.606,273.006,272.906,272.606,271.606,0.00
4,2022-01-05,7,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,271.525,270.225,270.725,260.225,271.725,271.625,270.925,270.325,271.125,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,2024-05-13,7,3.216,2.800,18.224,17.968,18.768,10.816,20.032,12.576,...,279.772,279.572,281.072,278.272,282.072,282.072,282.172,282.772,283.472,5.75
863,2024-05-14,7,9.580,3.720,5.940,30.200,33.380,26.140,7.580,13.840,...,278.359,278.759,279.659,276.659,280.359,280.159,280.459,280.459,281.459,4.14
864,2024-05-15,7,22.480,21.360,22.928,21.984,19.648,26.080,21.280,19.328,...,281.177,281.177,281.177,277.977,282.377,282.077,281.677,281.777,282.277,6.58
865,2024-05-16,7,31.664,33.024,34.656,35.488,34.400,31.840,33.184,34.224,...,283.396,284.696,284.696,281.096,286.996,287.096,286.696,286.796,286.796,6.45


In [31]:
df_hour_month_train = df_hour_train["date"].dt.month
df_hour_month_train = pd.get_dummies(df_hour_month_train, prefix="month")
df_hour_month_train.drop(columns=["month_12"], inplace=True)
df_hour_month_train

df_model_hour = df_hour_month_train.copy()
df_model_hour["trend"] = np.arange(len(df_model_hour))
df_model_hour = sm.add_constant(df_model_hour)


features_train = df_hour_train.copy()
features_train.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_train.columns[df_hour_train.columns.str.contains(feature)]
    features_train[f"mean_{feature}"] = df_hour_train[feature_col].mean(axis=1)
features_train = features_train.filter(regex=r'^mean_')
features_train

df_model_hour= pd.concat([df_model_hour, features_train], axis=1)

df_model_hour

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,mean_dswrf_surface,mean_tcdc_low.cloud.layer,mean_tcdc_middle.cloud.layer,mean_tcdc_high.cloud.layer,mean_tcdc_entire.atmosphere,mean_uswrf_top_of_atmosphere,mean_csnow_surface,mean_dlwrf_surface,mean_uswrf_surface,mean_tmp_surface
0,1.0,1,0,0,0,0,0,0,0,0,...,0.0,3.284,9.212,20.736,31.992,0.0,0.00,228.19600,0.0,269.001
1,1.0,1,0,0,0,0,0,0,0,0,...,0.0,93.492,91.856,99.696,100.000,0.0,0.64,312.99500,0.0,273.837
2,1.0,1,0,0,0,0,0,0,0,0,...,0.0,8.484,0.000,0.208,9.196,0.0,0.00,199.03344,0.0,266.118
3,1.0,1,0,0,0,0,0,0,0,0,...,0.0,0.000,79.024,95.376,96.656,0.0,0.00,258.27100,0.0,270.410
4,1.0,1,0,0,0,0,0,0,0,0,...,0.0,0.000,0.060,12.960,16.304,0.0,0.00,224.13700,0.0,268.765
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
725,1.0,0,0,0,0,0,0,0,0,0,...,0.0,0.000,0.000,11.140,11.752,0.0,0.00,244.30000,0.0,272.634
726,1.0,0,0,0,0,0,0,0,0,0,...,0.0,0.040,0.000,0.000,0.044,0.0,0.00,234.50600,0.0,271.821
727,1.0,0,0,0,0,0,0,0,0,0,...,0.0,0.048,0.000,0.000,0.048,0.0,0.00,225.29900,0.0,270.589
728,1.0,0,0,0,0,0,0,0,0,0,...,0.0,0.000,0.000,1.020,1.044,0.0,0.00,228.69500,0.0,270.748


In [32]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
result = lm.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.587
Model:                            OLS   Adj. R-squared:                  0.574
Method:                 Least Squares   F-statistic:                     45.65
Date:                Sat, 18 May 2024   Prob (F-statistic):          9.49e-120
Time:                        09:02:18   Log-Likelihood:                -1329.3
No. Observations:                 730   AIC:                             2705.
Df Residuals:                     707   BIC:                             2810.
Df Model:                          22                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [33]:
drop_lst_hour_7 = ["mean_dswrf_surface", "mean_tcdc_high.cloud.layer", "mean_uswrf_top_of_atmosphere", "mean_tmp_surface"]

df_model_hour.drop(columns = drop_lst_hour_7, inplace = True)

df_model_hour["lag_2"] = df_hour_train["production"].shift(2, fill_value=0)
df_model_hour["ma_3"] = df_hour_train["production"].rolling(window=3).mean()
df_model_hour.fillna(method='bfill', inplace=True)

In [34]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
model_hour_7 = lm.fit()
print(model_hour_7.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.859
Model:                            OLS   Adj. R-squared:                  0.855
Method:                 Least Squares   F-statistic:                     216.6
Date:                Sat, 18 May 2024   Prob (F-statistic):          6.99e-286
Time:                        09:02:19   Log-Likelihood:                -935.95
No. Observations:                 730   AIC:                             1914.
Df Residuals:                     709   BIC:                             2010.
Df Model:                          20                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [35]:
accu(df_hour_train["production"], model_hour_7.predict())

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,730,2.847767,2.325628,0.81665,2.287875e-15,0.246442,0.872126,0.549728,0.193038,0.193038


In [36]:
df_hour_month_test = df_hour_test["date"].dt.month
df_hour_month_test = pd.get_dummies(df_hour_month_test, prefix="month")
for i in range(5,12):
    df_hour_month_test[f"month_{i}"] = 0

df_test = df_hour_month_test.copy()
df_test["trend"] = np.arange(len(df_test))
df_test = sm.add_constant(df_test)

features_test = df_hour_test.copy()
features_test.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_test.columns[df_hour_test.columns.str.contains(feature)]
    features_test[f"mean_{feature}"] = df_hour_test[feature_col].mean(axis=1)
features_test = features_test.filter(regex=r'^mean_')
features_test

df_test.reset_index(drop=True, inplace=True)
features_test.reset_index(drop=True, inplace=True)
df_test = pd.concat([df_test, features_test], axis=1)

df_test.drop(columns = drop_lst_hour_7, inplace = True)

df_test["lag_2"] = df_hour_test["production"].shift(2, fill_value=0)
df_test["ma_3"] = df_hour_test["production"].rolling(window=3).mean()
df_test.fillna(method='bfill', inplace=True)

df_test

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,month_11,trend,mean_tcdc_low.cloud.layer,mean_tcdc_middle.cloud.layer,mean_tcdc_entire.atmosphere,mean_csnow_surface,mean_dlwrf_surface,mean_uswrf_surface,lag_2,ma_3
0,1.0,1,0,0,0,0,0,0,0,0,...,0,0,0.000,23.940,99.520,0.00,245.690,0.00000,0.00,0.390000
1,1.0,1,0,0,0,0,0,0,0,0,...,0,1,27.348,93.332,100.000,0.04,293.079,0.00000,0.00,0.390000
2,1.0,1,0,0,0,0,0,0,0,0,...,0,2,96.964,81.204,99.012,0.24,327.098,0.00000,0.35,0.390000
3,1.0,1,0,0,0,0,0,0,0,0,...,0,3,60.072,75.412,100.000,0.04,293.004,0.00000,0.52,0.326667
4,1.0,1,0,0,0,0,0,0,0,0,...,0,4,96.244,58.216,96.952,0.12,330.051,0.00000,0.30,0.223333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
132,1.0,0,0,0,0,0,0,0,0,0,...,0,132,74.956,51.568,99.972,0.00,308.447,2.57728,6.29,6.103333
133,1.0,0,0,0,0,0,0,0,0,0,...,0,133,73.384,4.540,74.324,0.16,296.758,4.53824,6.27,5.386667
134,1.0,0,0,0,0,0,0,0,0,0,...,0,134,0.172,68.744,98.136,0.00,271.004,4.95872,5.75,5.490000
135,1.0,0,0,0,0,0,0,0,0,0,...,0,135,36.336,33.408,42.408,0.00,284.448,7.05088,4.14,5.723333


In [37]:
accu(df_hour_test["production"], model_hour_7.predict(df_test))

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,137,2.801752,2.091782,0.746598,0.053063,0.178111,0.776126,0.549405,0.196094,0.196094


In [38]:
df_hour_month = df_hour["date"].dt.month
df_hour_month = pd.get_dummies(df_hour_month, prefix="month")
for i in range(5,12):
    df_hour_month[f"month_{i}"] = 0

df_predict = df_hour_month.copy()
df_predict["trend"] = np.arange(len(df_predict))
df_predict = sm.add_constant(df_predict)

features = df_hour.copy()
features.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour.columns[df_hour.columns.str.contains(feature)]
    features[f"mean_{feature}"] = df_hour[feature_col].mean(axis=1)
features = features.filter(regex=r'^mean_')

df_predict.reset_index(drop=True, inplace=True)
features.reset_index(drop=True, inplace=True)
df_predict = pd.concat([df_predict, features], axis=1)

df_predict.drop(columns = drop_lst_hour_7, inplace = True)

df_predict["lag_2"] = df_hour["production"].shift(2, fill_value=0)
df_predict["ma_3"] = df_hour["production"].rolling(window=3).mean()
df_predict.fillna(method='bfill', inplace=True)

df_predict

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,month_12,trend,mean_tcdc_low.cloud.layer,mean_tcdc_middle.cloud.layer,mean_tcdc_entire.atmosphere,mean_csnow_surface,mean_dlwrf_surface,mean_uswrf_surface,lag_2,ma_3
0,1.0,1,0,0,0,0,0,0,0,0,...,0,0,3.284,9.212,31.992,0.00,228.19600,0.00000,0.00,0.283333
1,1.0,1,0,0,0,0,0,0,0,0,...,0,1,93.492,91.856,100.000,0.64,312.99500,0.00000,0.00,0.283333
2,1.0,1,0,0,0,0,0,0,0,0,...,0,2,8.484,0.000,9.196,0.00,199.03344,0.00000,0.00,0.283333
3,1.0,1,0,0,0,0,0,0,0,0,...,0,3,0.000,79.024,96.656,0.00,258.27100,0.00000,0.00,0.283333
4,1.0,1,0,0,0,0,0,0,0,0,...,0,4,0.000,0.060,16.304,0.00,224.13700,0.00000,0.85,0.283333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,1.0,0,0,0,0,0,0,0,0,0,...,0,862,74.956,51.568,99.972,0.00,308.44700,2.57728,6.29,6.103333
863,1.0,0,0,0,0,0,0,0,0,0,...,0,863,73.384,4.540,74.324,0.16,296.75800,4.53824,6.27,5.386667
864,1.0,0,0,0,0,0,0,0,0,0,...,0,864,0.172,68.744,98.136,0.00,271.00400,4.95872,5.75,5.490000
865,1.0,0,0,0,0,0,0,0,0,0,...,0,865,36.336,33.408,42.408,0.00,284.44800,7.05088,4.14,5.723333


In [39]:
lm = sm.OLS(df_hour["production"], df_predict)
model_hour_7 = lm.fit()
print(model_hour_7.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.860
Model:                            OLS   Adj. R-squared:                  0.858
Method:                 Least Squares   F-statistic:                     375.3
Date:                Sat, 18 May 2024   Prob (F-statistic):               0.00
Time:                        09:02:19   Log-Likelihood:                -1095.0
No. Observations:                 867   AIC:                             2220.
Df Residuals:                     852   BIC:                             2291.
Df Model:                          14                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [40]:
accu(df_hour["production"], model_hour_7.predict())

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,867,2.840496,2.290327,0.806312,5.241699e-15,0.239897,0.855555,0.552079,0.19436,0.19436


# Hour 8

In [41]:
mask = df["hour"] == 8
df_hour = df[mask]
df_hour.reset_index(drop=True, inplace=True)
df_hour_train = df_hour[df_hour["date"] < "2024"]
df_hour_test = df_hour[df_hour["date"] >= "2024"]
df_hour_train.reset_index(drop=True, inplace=True)
df_hour_test.reset_index(drop=True, inplace=True)

df_hour

Unnamed: 0,date,hour,dswrf_surface_37.75_34.5,dswrf_surface_37.75_34.75,dswrf_surface_37.75_35.0,dswrf_surface_37.75_35.25,dswrf_surface_37.75_35.5,dswrf_surface_38.0_34.5,dswrf_surface_38.0_34.75,dswrf_surface_38.0_35.0,...,tmp_surface_38.5_34.75,tmp_surface_38.5_35.0,tmp_surface_38.5_35.25,tmp_surface_38.5_35.5,tmp_surface_38.75_34.5,tmp_surface_38.75_34.75,tmp_surface_38.75_35.0,tmp_surface_38.75_35.25,tmp_surface_38.75_35.5,production
0,2022-01-01,8,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,270.894,270.594,270.894,263.394,271.194,271.094,270.594,269.894,270.994,3.40
1,2022-01-02,8,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,274.285,273.985,273.385,271.385,277.485,277.085,276.185,275.485,275.185,0.00
2,2022-01-03,8,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,269.635,269.235,268.835,256.535,270.635,270.835,270.035,269.935,269.635,5.10
3,2022-01-04,8,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,271.932,271.732,272.332,267.632,272.732,272.832,272.732,272.032,270.332,0.85
4,2022-01-05,8,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,271.530,270.130,270.730,260.030,271.630,271.530,270.831,270.230,271.030,4.25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,2024-05-13,8,13.168,11.184,54.864,48.288,51.680,16.048,33.200,20.336,...,282.442,282.342,284.242,281.242,285.242,285.642,284.842,285.042,285.842,9.24
863,2024-05-14,8,53.220,33.000,18.140,73.360,92.700,77.620,33.820,42.480,...,282.042,282.441,283.242,279.642,281.242,280.642,282.441,284.242,284.441,7.60
864,2024-05-15,8,66.432,66.112,64.432,56.736,49.136,80.544,75.312,64.880,...,285.697,285.797,285.397,282.197,286.997,286.597,286.197,285.997,286.497,9.13
865,2024-05-16,8,89.376,91.632,94.720,96.320,93.392,85.568,88.624,87.120,...,286.177,285.877,287.877,285.277,291.177,291.077,290.277,290.577,291.577,8.39


In [42]:
df_hour_month_train = df_hour_train["date"].dt.month
df_hour_month_train = pd.get_dummies(df_hour_month_train, prefix="month")
df_hour_month_train.drop(columns=["month_12"], inplace=True)
df_hour_month_train

df_model_hour = df_hour_month_train.copy()
df_model_hour["trend"] = np.arange(len(df_model_hour))
df_model_hour = sm.add_constant(df_model_hour)


features_train = df_hour_train.copy()
features_train.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_train.columns[df_hour_train.columns.str.contains(feature)]
    features_train[f"mean_{feature}"] = df_hour_train[feature_col].mean(axis=1)
features_train = features_train.filter(regex=r'^mean_')
features_train

df_model_hour= pd.concat([df_model_hour, features_train], axis=1)

df_model_hour

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,mean_dswrf_surface,mean_tcdc_low.cloud.layer,mean_tcdc_middle.cloud.layer,mean_tcdc_high.cloud.layer,mean_tcdc_entire.atmosphere,mean_uswrf_top_of_atmosphere,mean_csnow_surface,mean_dlwrf_surface,mean_uswrf_surface,mean_tmp_surface
0,1.0,1,0,0,0,0,0,0,0,0,...,0.0000,3.672,11.252,26.432,38.376,0.00000,0.00,228.657,0.0,269.00200
1,1.0,1,0,0,0,0,0,0,0,0,...,0.0000,93.520,93.408,99.756,100.000,0.00000,0.64,313.692,0.0,273.88500
2,1.0,1,0,0,0,0,0,0,0,0,...,0.0000,7.988,0.000,0.180,8.608,0.00000,0.00,199.473,0.0,265.99500
3,1.0,1,0,0,0,0,0,0,0,0,...,0.0000,0.028,81.336,94.888,96.324,0.00000,0.00,257.786,0.0,269.77600
4,1.0,1,0,0,0,0,0,0,0,0,...,0.0000,0.036,0.052,17.812,21.260,0.00000,0.00,224.714,0.0,268.84216
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
725,1.0,0,0,0,0,0,0,0,0,0,...,0.0008,0.000,0.000,9.684,10.212,0.00192,0.00,244.076,0.0,272.38600
726,1.0,0,0,0,0,0,0,0,0,0,...,0.0008,0.044,0.000,0.000,0.044,0.00128,0.00,234.135,0.0,271.60600
727,1.0,0,0,0,0,0,0,0,0,0,...,0.0008,0.036,0.000,0.000,0.036,0.00064,0.00,225.089,0.0,270.43000
728,1.0,0,0,0,0,0,0,0,0,0,...,0.0000,0.000,0.000,2.484,2.552,0.00064,0.00,228.749,0.0,270.67100


In [43]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
result = lm.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.568
Model:                            OLS   Adj. R-squared:                  0.554
Method:                 Least Squares   F-statistic:                     42.19
Date:                Sat, 18 May 2024   Prob (F-statistic):          6.46e-113
Time:                        09:02:19   Log-Likelihood:                -1477.1
No. Observations:                 730   AIC:                             3000.
Df Residuals:                     707   BIC:                             3106.
Df Model:                          22                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [44]:
drop_lst_hour_8 = ["mean_dlwrf_surface"]

df_model_hour.drop(columns = drop_lst_hour_8, inplace = True)

df_model_hour["lag_2"] = df_hour_train["production"].shift(2, fill_value=0)
df_model_hour["ma_3"] = df_hour_train["production"].rolling(window=3).mean()
df_model_hour.fillna(method='bfill', inplace=True)

In [45]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
model_hour_8 = lm.fit()
print(model_hour_8.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.807
Model:                            OLS   Adj. R-squared:                  0.801
Method:                 Least Squares   F-statistic:                     128.4
Date:                Sat, 18 May 2024   Prob (F-statistic):          3.30e-234
Time:                        09:02:19   Log-Likelihood:                -1182.6
No. Observations:                 730   AIC:                             2413.
Df Residuals:                     706   BIC:                             2523.
Df Model:                          23                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [46]:
accu(df_hour_train["production"], model_hour_8.predict())

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,730,5.542014,2.783724,0.502295,1.540498e-15,0.25584,1.222705,0.883286,0.15938,0.15938


In [47]:
df_hour_month_test = df_hour_test["date"].dt.month
df_hour_month_test = pd.get_dummies(df_hour_month_test, prefix="month")
for i in range(5,12):
    df_hour_month_test[f"month_{i}"] = 0

df_test = df_hour_month_test.copy()
df_test["trend"] = np.arange(len(df_test))
df_test = sm.add_constant(df_test)

features_test = df_hour_test.copy()
features_test.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_test.columns[df_hour_test.columns.str.contains(feature)]
    features_test[f"mean_{feature}"] = df_hour_test[feature_col].mean(axis=1)
features_test = features_test.filter(regex=r'^mean_')
features_test

df_test.reset_index(drop=True, inplace=True)
features_test.reset_index(drop=True, inplace=True)
df_test = pd.concat([df_test, features_test], axis=1)

df_test.drop(columns = drop_lst_hour_8, inplace = True)

df_test["lag_2"] = df_hour_test["production"].shift(2, fill_value=0)
df_test["ma_3"] = df_hour_test["production"].rolling(window=3).mean()
df_test.fillna(method='bfill', inplace=True)

df_test

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,mean_tcdc_low.cloud.layer,mean_tcdc_middle.cloud.layer,mean_tcdc_high.cloud.layer,mean_tcdc_entire.atmosphere,mean_uswrf_top_of_atmosphere,mean_csnow_surface,mean_uswrf_surface,mean_tmp_surface,lag_2,ma_3
0,1.0,1,0,0,0,0,0,0,0,0,...,0.000,31.824,99.352,99.560,0.00000,0.00,0.00000,271.6940,0.00,2.163333
1,1.0,1,0,0,0,0,0,0,0,0,...,31.012,92.100,99.844,100.000,0.00000,0.08,0.00000,275.1400,0.00,2.163333
2,1.0,1,0,0,0,0,0,0,0,0,...,96.888,75.408,13.884,98.928,0.00000,0.24,0.00000,277.2280,3.28,2.163333
3,1.0,1,0,0,0,0,0,0,0,0,...,63.640,77.564,98.276,100.000,0.00000,0.04,0.00000,275.7610,2.14,2.010000
4,1.0,1,0,0,0,0,0,0,0,0,...,94.464,52.560,3.932,95.388,0.00000,0.12,0.00000,277.8640,1.07,1.700000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
132,1.0,0,0,0,0,0,0,0,0,0,...,77.392,54.816,99.840,99.976,86.47424,0.00,7.03680,282.5820,8.86,9.056667
133,1.0,0,0,0,0,0,0,0,0,0,...,72.352,3.632,0.000,73.240,70.20224,0.24,11.83872,281.5176,9.07,8.636667
134,1.0,0,0,0,0,0,0,0,0,0,...,0.184,66.436,96.776,97.780,64.85632,0.00,14.99968,285.4930,9.24,8.656667
135,1.0,0,0,0,0,0,0,0,0,0,...,36.572,37.576,0.748,45.368,52.68928,0.00,18.06400,288.7770,7.60,8.373333


In [48]:
accu(df_hour_test["production"], model_hour_8.predict(df_test))

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,137,5.260511,2.740557,0.520968,0.081064,0.241101,1.331003,1.037441,0.197213,0.197213


In [49]:
df_hour_month = df_hour["date"].dt.month
df_hour_month = pd.get_dummies(df_hour_month, prefix="month")
for i in range(5,12):
    df_hour_month[f"month_{i}"] = 0

df_predict = df_hour_month.copy()
df_predict["trend"] = np.arange(len(df_predict))
df_predict = sm.add_constant(df_predict)

features = df_hour.copy()
features.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour.columns[df_hour.columns.str.contains(feature)]
    features[f"mean_{feature}"] = df_hour[feature_col].mean(axis=1)
features = features.filter(regex=r'^mean_')

df_predict.reset_index(drop=True, inplace=True)
features.reset_index(drop=True, inplace=True)
df_predict = pd.concat([df_predict, features], axis=1)

df_predict.drop(columns = drop_lst_hour_8, inplace = True)

df_predict["lag_2"] = df_hour["production"].shift(2, fill_value=0)
df_predict["ma_3"] = df_hour["production"].rolling(window=3).mean()
df_predict.fillna(method='bfill', inplace=True)

df_predict

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,mean_tcdc_low.cloud.layer,mean_tcdc_middle.cloud.layer,mean_tcdc_high.cloud.layer,mean_tcdc_entire.atmosphere,mean_uswrf_top_of_atmosphere,mean_csnow_surface,mean_uswrf_surface,mean_tmp_surface,lag_2,ma_3
0,1.0,1,0,0,0,0,0,0,0,0,...,3.672,11.252,26.432,38.376,0.00000,0.00,0.00000,269.00200,0.00,2.833333
1,1.0,1,0,0,0,0,0,0,0,0,...,93.520,93.408,99.756,100.000,0.00000,0.64,0.00000,273.88500,0.00,2.833333
2,1.0,1,0,0,0,0,0,0,0,0,...,7.988,0.000,0.180,8.608,0.00000,0.00,0.00000,265.99500,3.40,2.833333
3,1.0,1,0,0,0,0,0,0,0,0,...,0.028,81.336,94.888,96.324,0.00000,0.00,0.00000,269.77600,0.00,1.983333
4,1.0,1,0,0,0,0,0,0,0,0,...,0.036,0.052,17.812,21.260,0.00000,0.00,0.00000,268.84216,5.10,3.400000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,1.0,0,0,0,0,0,0,0,0,0,...,77.392,54.816,99.840,99.976,86.47424,0.00,7.03680,282.58200,8.86,9.056667
863,1.0,0,0,0,0,0,0,0,0,0,...,72.352,3.632,0.000,73.240,70.20224,0.24,11.83872,281.51760,9.07,8.636667
864,1.0,0,0,0,0,0,0,0,0,0,...,0.184,66.436,96.776,97.780,64.85632,0.00,14.99968,285.49300,9.24,8.656667
865,1.0,0,0,0,0,0,0,0,0,0,...,36.572,37.576,0.748,45.368,52.68928,0.00,18.06400,288.77700,7.60,8.373333


In [50]:
lm = sm.OLS(df_hour["production"], df_predict)
model_hour_8 = lm.fit()
print(model_hour_8.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.806
Model:                            OLS   Adj. R-squared:                  0.802
Method:                 Least Squares   F-statistic:                     207.3
Date:                Sat, 18 May 2024   Prob (F-statistic):          5.37e-288
Time:                        09:02:20   Log-Likelihood:                -1405.8
No. Observations:                 867   AIC:                             2848.
Df Residuals:                     849   BIC:                             2933.
Df Model:                          17                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [51]:
accu(df_hour["production"], model_hour_8.predict())

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,867,5.497532,2.778845,0.505471,-1.225392e-15,0.256224,1.224502,0.900243,0.163754,0.163754


# Hour 9

In [52]:
mask = df["hour"] == 9
df_hour = df[mask]
df_hour.reset_index(drop=True, inplace=True)
df_hour_train = df_hour[df_hour["date"] < "2024"]
df_hour_test = df_hour[df_hour["date"] >= "2024"]
df_hour_train.reset_index(drop=True, inplace=True)
df_hour_test.reset_index(drop=True, inplace=True)

df_hour

Unnamed: 0,date,hour,dswrf_surface_37.75_34.5,dswrf_surface_37.75_34.75,dswrf_surface_37.75_35.0,dswrf_surface_37.75_35.25,dswrf_surface_37.75_35.5,dswrf_surface_38.0_34.5,dswrf_surface_38.0_34.75,dswrf_surface_38.0_35.0,...,tmp_surface_38.5_34.75,tmp_surface_38.5_35.0,tmp_surface_38.5_35.25,tmp_surface_38.5_35.5,tmp_surface_38.75_34.5,tmp_surface_38.75_34.75,tmp_surface_38.75_35.0,tmp_surface_38.75_35.25,tmp_surface_38.75_35.5,production
0,2022-01-01,9,6.980,8.100,8.980,9.240,9.040,7.080,7.620,8.140,...,273.110,273.110,273.210,265.310,273.710,273.610,273.210,272.710,273.410,6.80
1,2022-01-02,9,0.300,0.480,1.300,4.680,6.240,0.640,0.500,1.700,...,274.471,274.071,273.471,271.971,277.471,277.171,276.271,275.571,275.271,0.85
2,2022-01-03,9,8.100,9.000,9.860,10.100,10.000,8.480,8.880,9.480,...,271.806,271.606,271.106,259.606,273.106,273.206,272.406,272.406,271.706,9.35
3,2022-01-04,9,6.560,7.160,7.540,8.560,8.780,6.240,6.340,6.640,...,274.149,273.849,273.549,267.949,274.549,274.649,274.449,274.349,273.949,4.25
4,2022-01-05,9,6.600,7.440,8.660,9.300,9.420,6.780,6.960,7.440,...,273.805,272.505,273.305,262.905,274.205,273.905,273.405,273.005,273.305,8.50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,2024-05-13,9,47.136,18.720,95.984,92.704,108.928,17.920,35.616,25.600,...,285.635,285.035,287.835,284.235,289.035,289.135,287.835,288.035,289.835,8.88
863,2024-05-14,9,128.064,90.848,91.440,147.792,167.104,124.336,103.584,125.616,...,286.300,286.900,282.400,282.900,284.600,283.200,285.600,288.000,288.200,8.83
864,2024-05-15,9,125.344,114.000,112.688,110.480,98.736,144.688,132.000,121.232,...,291.900,291.300,291.000,287.500,292.700,292.400,291.200,290.400,291.300,9.82
865,2024-05-16,9,161.616,164.736,168.496,171.408,166.768,157.680,155.776,147.008,...,291.169,292.569,291.769,288.269,295.469,294.969,294.569,294.469,294.869,8.39


In [53]:
df_hour_month_train = df_hour_train["date"].dt.month
df_hour_month_train = pd.get_dummies(df_hour_month_train, prefix="month")
df_hour_month_train.drop(columns=["month_12"], inplace=True)
df_hour_month_train

df_model_hour = df_hour_month_train.copy()
df_model_hour["trend"] = np.arange(len(df_model_hour))
df_model_hour = sm.add_constant(df_model_hour)


features_train = df_hour_train.copy()
features_train.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_train.columns[df_hour_train.columns.str.contains(feature)]
    features_train[f"mean_{feature}"] = df_hour_train[feature_col].mean(axis=1)
features_train = features_train.filter(regex=r'^mean_')
features_train

df_model_hour= pd.concat([df_model_hour, features_train], axis=1)

df_model_hour

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,mean_dswrf_surface,mean_tcdc_low.cloud.layer,mean_tcdc_middle.cloud.layer,mean_tcdc_high.cloud.layer,mean_tcdc_entire.atmosphere,mean_uswrf_top_of_atmosphere,mean_csnow_surface,mean_dlwrf_surface,mean_uswrf_surface,mean_tmp_surface
0,1.0,1,0,0,0,0,0,0,0,0,...,7.3688,4.120,10.880,35.088,45.856,8.96704,0.00,229.41600,2.41280,271.634
1,1.0,1,0,0,0,0,0,0,0,0,...,1.1968,93.872,94.488,99.656,100.000,13.37856,0.64,314.23072,0.53568,274.263
2,1.0,1,0,0,0,0,0,0,0,0,...,8.5648,7.800,0.000,0.180,8.404,7.75168,0.00,200.01300,3.31648,268.494
3,1.0,1,0,0,0,0,0,0,0,0,...,6.0200,0.096,83.096,94.868,96.416,10.65856,0.00,258.46700,2.20096,272.169
4,1.0,1,0,0,0,0,0,0,0,0,...,7.5632,0.064,5.624,25.648,29.064,8.83968,0.00,226.37300,2.67392,271.625
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
725,1.0,0,0,0,0,0,0,0,0,0,...,8.3904,0.000,0.000,8.504,8.940,6.77504,0.00,243.93200,1.98400,275.397
726,1.0,0,0,0,0,0,0,0,0,0,...,8.4400,0.048,0.000,0.000,0.048,6.82560,0.00,233.82300,1.98336,274.644
727,1.0,0,0,0,0,0,0,0,0,0,...,8.7032,0.028,0.000,0.000,0.028,6.71808,0.00,224.97500,2.09280,274.008
728,1.0,0,0,0,0,0,0,0,0,0,...,8.4712,0.000,0.000,7.520,7.696,6.93888,0.00,228.88900,1.98336,274.233


In [54]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
result = lm.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.544
Model:                            OLS   Adj. R-squared:                  0.530
Method:                 Least Squares   F-statistic:                     38.33
Date:                Sat, 18 May 2024   Prob (F-statistic):          6.43e-105
Time:                        09:02:20   Log-Likelihood:                -1526.4
No. Observations:                 730   AIC:                             3099.
Df Residuals:                     707   BIC:                             3204.
Df Model:                          22                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [55]:
drop_lst_hour_9 = ["trend", "mean_uswrf_surface", "mean_csnow_surface", "mean_tcdc_entire.atmosphere", "mean_tcdc_middle.cloud.layer"]

df_model_hour.drop(columns = drop_lst_hour_9, inplace = True)

df_model_hour["lag_2"] = df_hour_train["production"].shift(2, fill_value=0)
df_model_hour["ma_3"] = df_hour_train["production"].rolling(window=3).mean()
df_model_hour.fillna(method='bfill', inplace=True)

In [56]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
model_hour_9 = lm.fit()
print(model_hour_9.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.765
Model:                            OLS   Adj. R-squared:                  0.759
Method:                 Least Squares   F-statistic:                     121.7
Date:                Sat, 18 May 2024   Prob (F-statistic):          1.99e-208
Time:                        09:02:20   Log-Likelihood:                -1284.2
No. Observations:                 730   AIC:                             2608.
Df Residuals:                     710   BIC:                             2700.
Df Model:                          19                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [57]:
accu(df_hour_train["production"], model_hour_9.predict())

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,730,7.442575,2.89969,0.389608,1.449052e-15,0.201284,1.405249,0.969125,0.130214,0.130214


In [58]:
df_hour_month_test = df_hour_test["date"].dt.month
df_hour_month_test = pd.get_dummies(df_hour_month_test, prefix="month")
for i in range(5,12):
    df_hour_month_test[f"month_{i}"] = 0

df_test = df_hour_month_test.copy()
df_test["trend"] = np.arange(len(df_test))
df_test = sm.add_constant(df_test)

features_test = df_hour_test.copy()
features_test.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_test.columns[df_hour_test.columns.str.contains(feature)]
    features_test[f"mean_{feature}"] = df_hour_test[feature_col].mean(axis=1)
features_test = features_test.filter(regex=r'^mean_')
features_test

df_test.reset_index(drop=True, inplace=True)
features_test.reset_index(drop=True, inplace=True)
df_test = pd.concat([df_test, features_test], axis=1)

df_test.drop(columns = drop_lst_hour_9, inplace = True)

df_test["lag_2"] = df_hour_test["production"].shift(2, fill_value=0)
df_test["ma_3"] = df_hour_test["production"].rolling(window=3).mean()
df_test.fillna(method='bfill', inplace=True)

df_test

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,month_10,month_11,mean_dswrf_surface,mean_tcdc_low.cloud.layer,mean_tcdc_high.cloud.layer,mean_uswrf_top_of_atmosphere,mean_dlwrf_surface,mean_tmp_surface,lag_2,ma_3
0,1.0,1,0,0,0,0,0,0,0,0,0,0,6.97920,0.000,95.920,8.79232,246.553,274.534,0.00,4.390000
1,1.0,1,0,0,0,0,0,0,0,0,0,0,5.03680,33.112,98.808,10.22208,292.101,276.713,0.00,4.390000
2,1.0,1,0,0,0,0,0,0,0,0,0,0,1.63600,96.776,11.880,11.51552,326.081,277.622,8.07,4.390000
3,1.0,1,0,0,0,0,0,0,0,0,0,0,3.76832,67.056,98.272,11.23200,297.200,277.550,2.50,2.860000
4,1.0,1,0,0,0,0,0,0,0,0,0,0,2.82752,93.328,3.288,10.83456,325.841,278.557,2.60,4.006667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
132,1.0,0,0,0,0,0,0,0,0,0,0,0,71.58592,78.124,99.860,131.93088,310.799,285.027,9.85,9.526667
133,1.0,0,0,0,0,0,0,0,0,0,0,0,109.43296,70.192,0.000,103.81632,293.901,285.140,9.85,9.186667
134,1.0,0,0,0,0,0,0,0,0,0,0,0,133.17440,0.164,96.228,93.23584,272.017,290.060,8.88,9.176667
135,1.0,0,0,0,0,0,0,0,0,0,0,0,148.96896,35.800,0.640,73.73248,286.616,292.969,8.83,9.013333


In [59]:
accu(df_hour_test["production"], model_hour_9.predict(df_test))

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,137,6.862044,2.92424,0.426147,0.030159,0.261119,1.607669,1.30034,0.189497,0.189497


In [60]:
df_hour_month = df_hour["date"].dt.month
df_hour_month = pd.get_dummies(df_hour_month, prefix="month")
for i in range(5,12):
    df_hour_month[f"month_{i}"] = 0

df_predict = df_hour_month.copy()
df_predict["trend"] = np.arange(len(df_predict))
df_predict = sm.add_constant(df_predict)

features = df_hour.copy()
features.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour.columns[df_hour.columns.str.contains(feature)]
    features[f"mean_{feature}"] = df_hour[feature_col].mean(axis=1)
features = features.filter(regex=r'^mean_')

df_predict.reset_index(drop=True, inplace=True)
features.reset_index(drop=True, inplace=True)
df_predict = pd.concat([df_predict, features], axis=1)

df_predict.drop(columns = drop_lst_hour_9, inplace = True)

df_predict["lag_2"] = df_hour["production"].shift(2, fill_value=0)
df_predict["ma_3"] = df_hour["production"].rolling(window=3).mean()
df_predict.fillna(method='bfill', inplace=True)

df_predict

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,month_11,month_12,mean_dswrf_surface,mean_tcdc_low.cloud.layer,mean_tcdc_high.cloud.layer,mean_uswrf_top_of_atmosphere,mean_dlwrf_surface,mean_tmp_surface,lag_2,ma_3
0,1.0,1,0,0,0,0,0,0,0,0,...,0,0,7.36880,4.120,35.088,8.96704,229.41600,271.634,0.00,5.666667
1,1.0,1,0,0,0,0,0,0,0,0,...,0,0,1.19680,93.872,99.656,13.37856,314.23072,274.263,0.00,5.666667
2,1.0,1,0,0,0,0,0,0,0,0,...,0,0,8.56480,7.800,0.180,7.75168,200.01300,268.494,6.80,5.666667
3,1.0,1,0,0,0,0,0,0,0,0,...,0,0,6.02000,0.096,94.868,10.65856,258.46700,272.169,0.85,4.816667
4,1.0,1,0,0,0,0,0,0,0,0,...,0,0,7.56320,0.064,25.648,8.83968,226.37300,271.625,9.35,7.366667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,1.0,0,0,0,0,0,0,0,0,0,...,0,0,71.58592,78.124,99.860,131.93088,310.79900,285.027,9.85,9.526667
863,1.0,0,0,0,0,0,0,0,0,0,...,0,0,109.43296,70.192,0.000,103.81632,293.90100,285.140,9.85,9.186667
864,1.0,0,0,0,0,0,0,0,0,0,...,0,0,133.17440,0.164,96.228,93.23584,272.01700,290.060,8.88,9.176667
865,1.0,0,0,0,0,0,0,0,0,0,...,0,0,148.96896,35.800,0.640,73.73248,286.61600,292.969,8.83,9.013333


In [61]:
lm = sm.OLS(df_hour["production"], df_predict)
model_hour_9 = lm.fit()
print(model_hour_9.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.759
Model:                            OLS   Adj. R-squared:                  0.755
Method:                 Least Squares   F-statistic:                     206.4
Date:                Sat, 18 May 2024   Prob (F-statistic):          9.51e-253
Time:                        09:02:20   Log-Likelihood:                -1540.3
No. Observations:                 867   AIC:                             3109.
Df Residuals:                     853   BIC:                             3175.
Df Model:                          13                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [62]:
accu(df_hour["production"], model_hour_9.predict())

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,867,7.350842,2.911294,0.396049,8.935884e-15,0.213919,1.429908,1.015226,0.13811,0.13811


# Hour 10

In [63]:
mask = df["hour"] == 10
df_hour = df[mask]
df_hour.reset_index(drop=True, inplace=True)
df_hour_train = df_hour[df_hour["date"] < "2024"]
df_hour_test = df_hour[df_hour["date"] >= "2024"]
df_hour_train.reset_index(drop=True, inplace=True)
df_hour_test.reset_index(drop=True, inplace=True)

df_hour

Unnamed: 0,date,hour,dswrf_surface_37.75_34.5,dswrf_surface_37.75_34.75,dswrf_surface_37.75_35.0,dswrf_surface_37.75_35.25,dswrf_surface_37.75_35.5,dswrf_surface_38.0_34.5,dswrf_surface_38.0_34.75,dswrf_surface_38.0_35.0,...,tmp_surface_38.5_34.75,tmp_surface_38.5_35.0,tmp_surface_38.5_35.25,tmp_surface_38.5_35.5,tmp_surface_38.75_34.5,tmp_surface_38.75_34.75,tmp_surface_38.75_35.0,tmp_surface_38.75_35.25,tmp_surface_38.75_35.5,production
0,2022-01-01,10,179.80,188.84,203.88,207.66,208.14,194.22,188.96,184.88,...,276.810,276.510,276.610,268.710,278.610,278.410,277.610,276.810,277.110,9.38
1,2022-01-02,10,6.12,9.52,26.50,92.08,132.98,13.32,11.04,35.52,...,274.453,274.253,273.453,272.453,277.353,276.753,275.753,275.353,274.653,0.85
2,2022-01-03,10,208.86,222.88,232.50,232.52,226.44,217.82,220.22,228.10,...,275.700,275.400,274.700,263.900,277.200,277.100,276.500,276.400,275.500,9.35
3,2022-01-04,10,157.42,166.62,172.68,205.58,202.62,146.36,138.02,149.20,...,276.035,276.035,276.335,269.635,276.735,276.135,276.035,276.335,276.335,5.95
4,2022-01-05,10,180.02,189.50,199.50,216.94,207.70,179.76,181.72,183.44,...,277.561,275.861,277.161,268.061,278.561,277.861,277.561,277.361,277.761,10.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,2024-05-13,10,140.14,53.20,230.62,354.68,496.60,32.10,47.40,52.88,...,289.098,287.898,290.998,284.298,293.398,292.698,291.498,292.398,291.398,7.85
863,2024-05-14,10,653.84,613.52,748.20,761.96,727.78,463.24,718.44,750.98,...,290.100,291.000,289.000,284.700,293.000,291.800,289.800,289.100,290.900,7.44
864,2024-05-15,10,712.22,697.08,649.58,585.62,550.84,719.52,713.34,663.12,...,298.065,298.365,294.165,290.065,297.665,297.465,297.565,296.865,296.165,9.87
865,2024-05-16,10,711.58,713.28,693.16,604.94,681.14,713.02,697.68,598.70,...,294.981,294.681,294.881,290.081,299.181,299.081,297.881,297.181,297.081,2.82


In [64]:
df_hour_month_train = df_hour_train["date"].dt.month
df_hour_month_train = pd.get_dummies(df_hour_month_train, prefix="month")
df_hour_month_train.drop(columns=["month_12"], inplace=True)
df_hour_month_train

df_model_hour = df_hour_month_train.copy()
df_model_hour["trend"] = np.arange(len(df_model_hour))
df_model_hour = sm.add_constant(df_model_hour)


features_train = df_hour_train.copy()
features_train.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_train.columns[df_hour_train.columns.str.contains(feature)]
    features_train[f"mean_{feature}"] = df_hour_train[feature_col].mean(axis=1)
features_train = features_train.filter(regex=r'^mean_')
features_train

df_model_hour= pd.concat([df_model_hour, features_train], axis=1)

df_model_hour

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,mean_dswrf_surface,mean_tcdc_low.cloud.layer,mean_tcdc_middle.cloud.layer,mean_tcdc_high.cloud.layer,mean_tcdc_entire.atmosphere,mean_uswrf_top_of_atmosphere,mean_csnow_surface,mean_dlwrf_surface,mean_uswrf_surface,mean_tmp_surface
0,1.0,1,0,0,0,0,0,0,0,0,...,180.1384,4.180,13.748,80.764,85.364,135.20448,0.00,237.291,59.18848,275.786
1,1.0,1,0,0,0,0,0,0,0,0,...,26.2784,97.128,98.128,94.464,100.000,232.89360,0.68,317.272,11.94816,274.453
2,1.0,1,0,0,0,0,0,0,0,0,...,213.5536,5.124,0.000,0.000,5.304,121.90784,0.00,206.091,79.80416,272.760
3,1.0,1,0,0,0,0,0,0,0,0,...,133.6632,1.012,88.456,94.208,97.120,183.65888,0.00,260.933,49.27808,274.891
4,1.0,1,0,0,0,0,0,0,0,0,...,183.7200,0.000,75.172,28.408,82.544,138.63680,0.00,241.395,64.67584,275.697
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
725,1.0,0,0,0,0,0,0,0,0,0,...,203.1864,0.000,0.000,0.508,0.528,92.88160,0.00,245.740,45.73504,280.380
726,1.0,0,0,0,0,0,0,0,0,0,...,205.5584,0.052,0.000,0.000,0.056,94.05920,0.00,234.508,46.02240,279.527
727,1.0,0,0,0,0,0,0,0,0,0,...,211.3432,0.000,0.000,0.000,0.000,93.14688,0.00,226.458,47.66528,279.447
728,1.0,0,0,0,0,0,0,0,0,0,...,203.9376,0.000,0.000,47.604,50.628,98.75392,0.00,232.224,45.13152,279.659


In [65]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
result = lm.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.495
Model:                            OLS   Adj. R-squared:                  0.480
Method:                 Least Squares   F-statistic:                     31.54
Date:                Sat, 18 May 2024   Prob (F-statistic):           9.28e-90
Time:                        09:02:21   Log-Likelihood:                -1560.6
No. Observations:                 730   AIC:                             3167.
Df Residuals:                     707   BIC:                             3273.
Df Model:                          22                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [66]:
drop_lst_hour_10 = ["mean_uswrf_top_of_atmosphere", "mean_tcdc_high.cloud.layer"]

df_model_hour.drop(columns = drop_lst_hour_10, inplace = True)

df_model_hour["lag_2"] = df_hour_train["production"].shift(2, fill_value=0)
df_model_hour["ma_3"] = df_hour_train["production"].rolling(window=3).mean()
df_model_hour.fillna(method='bfill', inplace=True)

In [67]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
model_hour_10 = lm.fit()
print(model_hour_10.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.748
Model:                            OLS   Adj. R-squared:                  0.740
Method:                 Least Squares   F-statistic:                     95.44
Date:                Sat, 18 May 2024   Prob (F-statistic):          1.18e-194
Time:                        09:02:21   Log-Likelihood:                -1306.9
No. Observations:                 730   AIC:                             2660.
Df Residuals:                     707   BIC:                             2766.
Df Model:                          22                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [68]:
accu(df_hour_train["production"], model_hour_10.predict())

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,730,7.993521,2.88852,0.361358,-3.417084e-16,0.210855,1.449753,0.958599,0.119922,0.119922


In [69]:
df_hour_month_test = df_hour_test["date"].dt.month
df_hour_month_test = pd.get_dummies(df_hour_month_test, prefix="month")
for i in range(5,12):
    df_hour_month_test[f"month_{i}"] = 0

df_test = df_hour_month_test.copy()
df_test["trend"] = np.arange(len(df_test))
df_test = sm.add_constant(df_test)

features_test = df_hour_test.copy()
features_test.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_test.columns[df_hour_test.columns.str.contains(feature)]
    features_test[f"mean_{feature}"] = df_hour_test[feature_col].mean(axis=1)
features_test = features_test.filter(regex=r'^mean_')
features_test

df_test.reset_index(drop=True, inplace=True)
features_test.reset_index(drop=True, inplace=True)
df_test = pd.concat([df_test, features_test], axis=1)

df_test.drop(columns = drop_lst_hour_10, inplace = True)

df_test["lag_2"] = df_hour_test["production"].shift(2, fill_value=0)
df_test["ma_3"] = df_hour_test["production"].rolling(window=3).mean()
df_test.fillna(method='bfill', inplace=True)

df_test

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,mean_dswrf_surface,mean_tcdc_low.cloud.layer,mean_tcdc_middle.cloud.layer,mean_tcdc_entire.atmosphere,mean_csnow_surface,mean_dlwrf_surface,mean_uswrf_surface,mean_tmp_surface,lag_2,ma_3
0,1.0,1,0,0,0,0,0,0,0,0,...,192.5128,0.000,38.408,54.140,0.00,236.851,41.68640,279.246,0.00,5.380000
1,1.0,1,0,0,0,0,0,0,0,0,...,113.5248,58.780,98.132,99.660,0.04,295.948,20.72704,279.606,0.00,5.380000
2,1.0,1,0,0,0,0,0,0,0,0,...,57.8976,92.180,29.684,92.560,0.08,318.508,11.32736,278.784,9.57,5.380000
3,1.0,1,0,0,0,0,0,0,0,0,...,105.3672,74.440,83.208,98.148,0.12,300.767,19.69216,279.747,3.32,2.630000
4,1.0,1,0,0,0,0,0,0,0,0,...,60.2288,88.200,61.504,89.276,0.16,319.103,11.07264,279.413,3.25,3.690000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
132,1.0,0,0,0,0,0,0,0,0,0,...,364.8936,78.032,73.688,99.372,0.04,314.369,68.04800,287.034,9.78,9.056667
133,1.0,0,0,0,0,0,0,0,0,0,...,597.8704,44.304,0.000,45.220,0.04,272.469,108.55360,289.020,9.54,8.276667
134,1.0,0,0,0,0,0,0,0,0,0,...,671.2184,0.112,28.724,74.384,0.00,271.943,122.53632,295.173,7.85,8.386667
135,1.0,0,0,0,0,0,0,0,0,0,...,663.9208,39.820,64.916,68.664,0.00,299.552,119.28896,295.977,7.44,6.710000


In [70]:
accu(df_hour_test["production"], model_hour_10.predict(df_test))

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,137,7.183869,2.95182,0.410896,0.012661,0.265975,1.747041,1.370515,0.190777,0.190777


In [71]:
df_hour_month = df_hour["date"].dt.month
df_hour_month = pd.get_dummies(df_hour_month, prefix="month")
for i in range(5,12):
    df_hour_month[f"month_{i}"] = 0

df_predict = df_hour_month.copy()
df_predict["trend"] = np.arange(len(df_predict))
df_predict = sm.add_constant(df_predict)

features = df_hour.copy()
features.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour.columns[df_hour.columns.str.contains(feature)]
    features[f"mean_{feature}"] = df_hour[feature_col].mean(axis=1)
features = features.filter(regex=r'^mean_')

df_predict.reset_index(drop=True, inplace=True)
features.reset_index(drop=True, inplace=True)
df_predict = pd.concat([df_predict, features], axis=1)

df_predict.drop(columns = drop_lst_hour_10, inplace = True)

df_predict["lag_2"] = df_hour["production"].shift(2, fill_value=0)
df_predict["ma_3"] = df_hour["production"].rolling(window=3).mean()
df_predict.fillna(method='bfill', inplace=True)

df_predict

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,mean_dswrf_surface,mean_tcdc_low.cloud.layer,mean_tcdc_middle.cloud.layer,mean_tcdc_entire.atmosphere,mean_csnow_surface,mean_dlwrf_surface,mean_uswrf_surface,mean_tmp_surface,lag_2,ma_3
0,1.0,1,0,0,0,0,0,0,0,0,...,180.1384,4.180,13.748,85.364,0.00,237.291,59.18848,275.786,0.00,6.526667
1,1.0,1,0,0,0,0,0,0,0,0,...,26.2784,97.128,98.128,100.000,0.68,317.272,11.94816,274.453,0.00,6.526667
2,1.0,1,0,0,0,0,0,0,0,0,...,213.5536,5.124,0.000,5.304,0.00,206.091,79.80416,272.760,9.38,6.526667
3,1.0,1,0,0,0,0,0,0,0,0,...,133.6632,1.012,88.456,97.120,0.00,260.933,49.27808,274.891,0.85,5.383333
4,1.0,1,0,0,0,0,0,0,0,0,...,183.7200,0.000,75.172,82.544,0.00,241.395,64.67584,275.697,9.35,8.433333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,1.0,0,0,0,0,0,0,0,0,0,...,364.8936,78.032,73.688,99.372,0.04,314.369,68.04800,287.034,9.78,9.056667
863,1.0,0,0,0,0,0,0,0,0,0,...,597.8704,44.304,0.000,45.220,0.04,272.469,108.55360,289.020,9.54,8.276667
864,1.0,0,0,0,0,0,0,0,0,0,...,671.2184,0.112,28.724,74.384,0.00,271.943,122.53632,295.173,7.85,8.386667
865,1.0,0,0,0,0,0,0,0,0,0,...,663.9208,39.820,64.916,68.664,0.00,299.552,119.28896,295.977,7.44,6.710000


In [72]:
lm = sm.OLS(df_hour["production"], df_predict)
model_hour_10 = lm.fit()
print(model_hour_10.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.737
Model:                            OLS   Adj. R-squared:                  0.733
Method:                 Least Squares   F-statistic:                     149.2
Date:                Sat, 18 May 2024   Prob (F-statistic):          8.98e-234
Time:                        09:02:21   Log-Likelihood:                -1577.6
No. Observations:                 867   AIC:                             3189.
Df Residuals:                     850   BIC:                             3270.
Df Model:                          16                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [73]:
accu(df_hour["production"], model_hour_10.predict())

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,867,7.865582,2.91362,0.370426,2.137528e-15,0.220702,1.492874,1.015817,0.129147,0.129147


# Hour 11

In [74]:
mask = df["hour"] == 11
df_hour = df[mask]
df_hour.reset_index(drop=True, inplace=True)
df_hour_train = df_hour[df_hour["date"] < "2024"]
df_hour_test = df_hour[df_hour["date"] >= "2024"]
df_hour_train.reset_index(drop=True, inplace=True)
df_hour_test.reset_index(drop=True, inplace=True)

df_hour

Unnamed: 0,date,hour,dswrf_surface_37.75_34.5,dswrf_surface_37.75_34.75,dswrf_surface_37.75_35.0,dswrf_surface_37.75_35.25,dswrf_surface_37.75_35.5,dswrf_surface_38.0_34.5,dswrf_surface_38.0_34.75,dswrf_surface_38.0_35.0,...,tmp_surface_38.5_34.75,tmp_surface_38.5_35.0,tmp_surface_38.5_35.25,tmp_surface_38.5_35.5,tmp_surface_38.75_34.5,tmp_surface_38.75_34.75,tmp_surface_38.75_35.0,tmp_surface_38.75_35.25,tmp_surface_38.75_35.5,production
0,2022-01-01,11,258.34,272.38,286.10,284.68,278.16,266.00,269.52,270.88,...,279.957,279.857,279.857,272.057,281.257,281.557,281.057,280.357,280.557,7.65
1,2022-01-02,11,8.02,13.24,35.58,122.60,179.32,26.62,16.24,47.50,...,274.382,274.282,273.482,272.582,278.082,277.182,276.082,275.282,274.582,0.85
2,2022-01-03,11,283.40,299.60,309.56,308.82,300.50,293.88,295.76,304.88,...,278.900,278.700,277.500,267.100,280.600,280.400,279.600,279.500,279.400,10.00
3,2022-01-04,11,206.24,219.34,226.32,251.20,243.92,208.86,193.28,194.40,...,278.351,278.451,277.951,271.051,279.951,279.251,279.051,279.551,279.351,5.95
4,2022-01-05,11,268.60,279.32,286.36,289.70,274.18,271.16,272.22,276.52,...,281.402,279.402,280.302,271.902,282.202,281.602,281.202,280.402,280.602,9.35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,2024-05-13,11,130.24,60.78,195.98,418.90,527.08,248.98,275.46,91.32,...,293.235,293.235,294.735,285.235,296.835,296.335,294.635,295.635,294.535,8.36
863,2024-05-14,11,752.20,721.54,827.60,838.12,802.26,551.54,790.90,828.32,...,293.300,294.000,294.100,289.900,295.700,295.300,293.200,290.400,292.400,5.84
864,2024-05-15,11,791.80,786.20,761.08,716.74,682.38,800.62,798.90,773.84,...,302.632,303.032,298.832,294.532,301.832,301.932,302.432,301.332,300.432,9.82
865,2024-05-16,11,785.62,784.16,772.66,717.98,769.32,775.38,780.30,709.04,...,297.777,298.477,297.977,293.477,301.777,301.477,300.277,296.577,296.077,4.96


In [75]:
df_hour_month_train = df_hour_train["date"].dt.month
df_hour_month_train = pd.get_dummies(df_hour_month_train, prefix="month")
df_hour_month_train.drop(columns=["month_12"], inplace=True)
df_hour_month_train

df_model_hour = df_hour_month_train.copy()
df_model_hour["trend"] = np.arange(len(df_model_hour))
df_model_hour = sm.add_constant(df_model_hour)


features_train = df_hour_train.copy()
features_train.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_train.columns[df_hour_train.columns.str.contains(feature)]
    features_train[f"mean_{feature}"] = df_hour_train[feature_col].mean(axis=1)
features_train = features_train.filter(regex=r'^mean_')
features_train

df_model_hour= pd.concat([df_model_hour, features_train], axis=1)

df_model_hour

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,mean_dswrf_surface,mean_tcdc_low.cloud.layer,mean_tcdc_middle.cloud.layer,mean_tcdc_high.cloud.layer,mean_tcdc_entire.atmosphere,mean_uswrf_top_of_atmosphere,mean_csnow_surface,mean_dlwrf_surface,mean_uswrf_surface,mean_tmp_surface
0,1.0,1,0,0,0,0,0,0,0,0,...,254.7928,3.972,16.520,69.392,75.468,152.98944,0.00,237.387,82.53120,278.553
1,1.0,1,0,0,0,0,0,0,0,0,...,37.0608,97.924,97.400,74.404,99.948,286.67648,0.72,317.983,16.81472,274.646
2,1.0,1,0,0,0,0,0,0,0,0,...,288.0176,4.660,0.000,0.208,5.020,146.44160,0.00,207.648,104.98496,275.948
3,1.0,1,0,0,0,0,0,0,0,0,...,185.0968,1.520,90.348,96.248,98.496,220.17664,0.00,262.613,66.28992,276.915
4,1.0,1,0,0,0,0,0,0,0,0,...,263.0896,0.000,59.676,14.796,63.920,156.69248,0.00,239.801,91.10144,278.430
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
725,1.0,0,0,0,0,0,0,0,0,0,...,272.8248,0.000,0.000,0.256,0.264,106.64800,0.00,248.104,57.98784,284.109
726,1.0,0,0,0,0,0,0,0,0,0,...,276.2912,0.028,0.000,0.000,0.028,107.96992,0.00,236.368,58.36224,283.008
727,1.0,0,0,0,0,0,0,0,0,0,...,283.3672,0.000,0.000,0.000,0.000,107.07840,0.00,228.543,60.14016,283.109
728,1.0,0,0,0,0,0,0,0,0,0,...,270.7192,0.000,0.000,53.792,56.228,117.06432,0.00,235.244,56.43136,283.154


In [76]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
result = lm.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.489
Model:                            OLS   Adj. R-squared:                  0.473
Method:                 Least Squares   F-statistic:                     30.73
Date:                Sat, 18 May 2024   Prob (F-statistic):           7.56e-88
Time:                        09:02:21   Log-Likelihood:                -1548.9
No. Observations:                 730   AIC:                             3144.
Df Residuals:                     707   BIC:                             3249.
Df Model:                          22                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [77]:
drop_lst_hour_11 = ["mean_uswrf_top_of_atmosphere", "mean_tcdc_high.cloud.layer", "trend"]

df_model_hour.drop(columns = drop_lst_hour_11, inplace = True)

df_model_hour["lag_2"] = df_hour_train["production"].shift(2, fill_value=0)
df_model_hour["ma_3"] = df_hour_train["production"].rolling(window=3).mean()
df_model_hour.fillna(method='bfill', inplace=True)

In [78]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
model_hour_11 = lm.fit()
print(model_hour_11.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.744
Model:                            OLS   Adj. R-squared:                  0.737
Method:                 Least Squares   F-statistic:                     98.09
Date:                Sat, 18 May 2024   Prob (F-statistic):          2.53e-193
Time:                        09:02:21   Log-Likelihood:                -1296.2
No. Observations:                 730   AIC:                             2636.
Df Residuals:                     708   BIC:                             2737.
Df Model:                          21                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [79]:
accu(df_hour_train["production"], model_hour_11.predict())

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,730,8.056479,2.82455,0.350594,4.418972e-15,0.197261,1.428561,0.95498,0.118536,0.118536


In [80]:
df_hour_month_test = df_hour_test["date"].dt.month
df_hour_month_test = pd.get_dummies(df_hour_month_test, prefix="month")
for i in range(5,12):
    df_hour_month_test[f"month_{i}"] = 0

df_test = df_hour_month_test.copy()
df_test["trend"] = np.arange(len(df_test))
df_test = sm.add_constant(df_test)

features_test = df_hour_test.copy()
features_test.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_test.columns[df_hour_test.columns.str.contains(feature)]
    features_test[f"mean_{feature}"] = df_hour_test[feature_col].mean(axis=1)
features_test = features_test.filter(regex=r'^mean_')
features_test

df_test.reset_index(drop=True, inplace=True)
features_test.reset_index(drop=True, inplace=True)
df_test = pd.concat([df_test, features_test], axis=1)

df_test.drop(columns = drop_lst_hour_11, inplace = True)

df_test["lag_2"] = df_hour_test["production"].shift(2, fill_value=0)
df_test["ma_3"] = df_hour_test["production"].rolling(window=3).mean()
df_test.fillna(method='bfill', inplace=True)

df_test

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,mean_dswrf_surface,mean_tcdc_low.cloud.layer,mean_tcdc_middle.cloud.layer,mean_tcdc_entire.atmosphere,mean_csnow_surface,mean_dlwrf_surface,mean_uswrf_surface,mean_tmp_surface,lag_2,ma_3
0,1.0,1,0,0,0,0,0,0,0,0,...,262.6432,0.000,37.184,59.432,0.00,238.687,54.09728,282.883,0.00,4.923333
1,1.0,1,0,0,0,0,0,0,0,0,...,166.3000,56.996,92.164,97.664,0.04,294.293,30.77824,282.358,0.00,4.923333
2,1.0,1,0,0,0,0,0,0,0,0,...,103.8216,85.792,15.452,86.420,0.04,313.064,20.04416,280.733,9.71,4.923333
3,1.0,1,0,0,0,0,0,0,0,0,...,158.7032,75.452,80.548,94.964,0.08,298.165,29.90336,282.323,2.35,3.506667
4,1.0,1,0,0,0,0,0,0,0,0,...,124.5496,78.776,50.704,81.624,0.16,308.648,23.61280,281.343,2.71,4.220000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
132,1.0,0,0,0,0,0,0,0,0,0,...,444.5496,74.244,61.020,96.460,0.04,312.076,80.06912,289.819,9.64,7.920000
133,1.0,0,0,0,0,0,0,0,0,0,...,694.0552,42.072,0.000,42.936,0.04,270.755,121.44256,292.428,5.76,6.653333
134,1.0,0,0,0,0,0,0,0,0,0,...,770.0160,0.140,23.212,56.932,0.00,270.559,133.23072,299.452,8.36,8.006667
135,1.0,0,0,0,0,0,0,0,0,0,...,729.2104,39.380,51.952,59.240,0.00,301.285,126.31936,298.061,5.84,6.873333


In [81]:
accu(df_hour_test["production"], model_hour_11.predict(df_test))

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,137,6.993504,2.98041,0.426168,0.006129,0.278713,1.779788,1.407282,0.201227,0.201227


In [82]:
df_hour_month = df_hour["date"].dt.month
df_hour_month = pd.get_dummies(df_hour_month, prefix="month")
for i in range(5,12):
    df_hour_month[f"month_{i}"] = 0

df_predict = df_hour_month.copy()
df_predict["trend"] = np.arange(len(df_predict))
df_predict = sm.add_constant(df_predict)

features = df_hour.copy()
features.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour.columns[df_hour.columns.str.contains(feature)]
    features[f"mean_{feature}"] = df_hour[feature_col].mean(axis=1)
features = features.filter(regex=r'^mean_')

df_predict.reset_index(drop=True, inplace=True)
features.reset_index(drop=True, inplace=True)
df_predict = pd.concat([df_predict, features], axis=1)

df_predict.drop(columns = drop_lst_hour_11, inplace = True)

df_predict["lag_2"] = df_hour["production"].shift(2, fill_value=0)
df_predict["ma_3"] = df_hour["production"].rolling(window=3).mean()
df_predict.fillna(method='bfill', inplace=True)

df_predict

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,mean_dswrf_surface,mean_tcdc_low.cloud.layer,mean_tcdc_middle.cloud.layer,mean_tcdc_entire.atmosphere,mean_csnow_surface,mean_dlwrf_surface,mean_uswrf_surface,mean_tmp_surface,lag_2,ma_3
0,1.0,1,0,0,0,0,0,0,0,0,...,254.7928,3.972,16.520,75.468,0.00,237.387,82.53120,278.553,0.00,6.166667
1,1.0,1,0,0,0,0,0,0,0,0,...,37.0608,97.924,97.400,99.948,0.72,317.983,16.81472,274.646,0.00,6.166667
2,1.0,1,0,0,0,0,0,0,0,0,...,288.0176,4.660,0.000,5.020,0.00,207.648,104.98496,275.948,7.65,6.166667
3,1.0,1,0,0,0,0,0,0,0,0,...,185.0968,1.520,90.348,98.496,0.00,262.613,66.28992,276.915,0.85,5.600000
4,1.0,1,0,0,0,0,0,0,0,0,...,263.0896,0.000,59.676,63.920,0.00,239.801,91.10144,278.430,10.00,8.433333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,1.0,0,0,0,0,0,0,0,0,0,...,444.5496,74.244,61.020,96.460,0.04,312.076,80.06912,289.819,9.64,7.920000
863,1.0,0,0,0,0,0,0,0,0,0,...,694.0552,42.072,0.000,42.936,0.04,270.755,121.44256,292.428,5.76,6.653333
864,1.0,0,0,0,0,0,0,0,0,0,...,770.0160,0.140,23.212,56.932,0.00,270.559,133.23072,299.452,8.36,8.006667
865,1.0,0,0,0,0,0,0,0,0,0,...,729.2104,39.380,51.952,59.240,0.00,301.285,126.31936,298.061,5.84,6.873333


In [83]:
lm = sm.OLS(df_hour["production"], df_predict)
model_hour_11 = lm.fit()
print(model_hour_11.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.735
Model:                            OLS   Adj. R-squared:                  0.731
Method:                 Least Squares   F-statistic:                     157.7
Date:                Sat, 18 May 2024   Prob (F-statistic):          1.89e-233
Time:                        09:02:22   Log-Likelihood:                -1569.7
No. Observations:                 867   AIC:                             3171.
Df Residuals:                     851   BIC:                             3248.
Df Model:                          15                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [84]:
accu(df_hour["production"], model_hour_11.predict())

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,867,7.888512,2.876001,0.364581,-1.079189e-14,0.209619,1.479355,1.014181,0.128564,0.128564


# Hour 12

In [85]:
mask = df["hour"] == 12
df_hour = df[mask]
df_hour.reset_index(drop=True, inplace=True)
df_hour_train = df_hour[df_hour["date"] < "2024"]
df_hour_test = df_hour[df_hour["date"] >= "2024"]
df_hour_train.reset_index(drop=True, inplace=True)
df_hour_test.reset_index(drop=True, inplace=True)

df_hour

Unnamed: 0,date,hour,dswrf_surface_37.75_34.5,dswrf_surface_37.75_34.75,dswrf_surface_37.75_35.0,dswrf_surface_37.75_35.25,dswrf_surface_37.75_35.5,dswrf_surface_38.0_34.5,dswrf_surface_38.0_34.75,dswrf_surface_38.0_35.0,...,tmp_surface_38.5_34.75,tmp_surface_38.5_35.0,tmp_surface_38.5_35.25,tmp_surface_38.5_35.5,tmp_surface_38.75_34.5,tmp_surface_38.75_34.75,tmp_surface_38.75_35.0,tmp_surface_38.75_35.25,tmp_surface_38.75_35.5,production
0,2022-01-01,12,320.12,334.50,348.38,346.44,329.14,324.08,326.20,332.38,...,281.663,281.863,281.763,273.363,283.463,283.663,283.163,282.463,282.863,6.80
1,2022-01-02,12,11.14,17.36,47.56,165.40,221.08,40.88,20.82,49.80,...,274.700,274.500,273.600,272.300,277.800,277.500,276.700,275.900,275.000,1.70
2,2022-01-03,12,343.34,360.82,370.56,368.90,358.84,354.58,355.72,365.70,...,281.065,280.965,279.465,269.065,282.865,282.565,281.665,281.665,281.965,10.00
3,2022-01-04,12,266.72,280.58,286.60,314.08,300.26,262.86,243.88,254.22,...,280.049,280.349,279.949,271.449,281.449,281.049,281.049,281.549,280.749,4.25
4,2022-01-05,12,333.18,345.02,352.86,353.74,335.78,337.44,337.72,343.76,...,283.802,281.802,282.902,273.002,284.402,284.402,284.202,283.402,283.502,10.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,2024-05-13,12,116.88,106.76,190.18,452.64,595.46,330.14,329.52,142.14,...,295.964,296.064,296.164,284.264,299.164,299.064,297.464,297.264,296.164,5.34
863,2024-05-14,12,830.16,812.90,889.14,895.36,857.98,643.22,852.84,887.68,...,295.400,295.900,296.100,292.900,297.200,297.400,295.500,293.900,292.900,7.78
864,2024-05-15,12,850.62,849.48,836.14,805.64,765.04,859.58,859.04,843.66,...,301.500,303.400,300.500,296.200,305.300,304.600,305.300,303.400,302.100,7.92
865,2024-05-16,12,811.86,798.04,794.74,731.52,828.36,818.88,780.04,722.64,...,298.789,293.189,291.289,293.989,300.189,302.889,298.789,292.789,292.789,5.52


In [86]:
df_hour_month_train = df_hour_train["date"].dt.month
df_hour_month_train = pd.get_dummies(df_hour_month_train, prefix="month")
df_hour_month_train.drop(columns=["month_12"], inplace=True)
df_hour_month_train

df_model_hour = df_hour_month_train.copy()
df_model_hour["trend"] = np.arange(len(df_model_hour))
df_model_hour = sm.add_constant(df_model_hour)


features_train = df_hour_train.copy()
features_train.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_train.columns[df_hour_train.columns.str.contains(feature)]
    features_train[f"mean_{feature}"] = df_hour_train[feature_col].mean(axis=1)
features_train = features_train.filter(regex=r'^mean_')
features_train

df_model_hour= pd.concat([df_model_hour, features_train], axis=1)

df_model_hour

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,mean_dswrf_surface,mean_tcdc_low.cloud.layer,mean_tcdc_middle.cloud.layer,mean_tcdc_high.cloud.layer,mean_tcdc_entire.atmosphere,mean_uswrf_top_of_atmosphere,mean_csnow_surface,mean_dlwrf_surface,mean_uswrf_surface,mean_tmp_surface
0,1.0,1,0,0,0,0,0,0,0,0,...,312.5520,4.408,23.468,60.516,70.368,167.55392,0.00,238.387,99.73056,280.215
1,1.0,1,0,0,0,0,0,0,0,0,...,46.2992,97.724,91.184,64.628,99.324,327.99040,0.68,318.225,20.73024,274.856
2,1.0,1,0,0,0,0,0,0,0,0,...,347.1080,3.376,0.000,0.580,4.120,164.44032,0.00,209.456,124.15680,277.865
3,1.0,1,0,0,0,0,0,0,0,0,...,237.7824,1.784,84.868,95.488,97.768,241.87584,0.00,260.233,84.66624,278.249
4,1.0,1,0,0,0,0,0,0,0,0,...,328.5280,0.056,41.976,9.872,45.068,168.79424,0.00,238.397,111.75552,280.314
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
725,1.0,0,0,0,0,0,0,0,0,0,...,328.0696,0.000,0.000,0.168,0.176,115.91552,0.00,249.991,66.78720,286.703
726,1.0,0,0,0,0,0,0,0,0,0,...,332.4368,0.076,0.000,0.000,0.080,117.30368,0.00,237.948,67.22816,285.316
727,1.0,0,0,0,0,0,0,0,0,0,...,340.5184,0.000,0.000,0.000,0.000,116.47808,0.00,230.212,69.07904,285.467
728,1.0,0,0,0,0,0,0,0,0,0,...,324.4408,0.000,0.000,55.968,58.012,129.11360,0.00,237.471,64.80704,285.231


In [87]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
result = lm.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.511
Model:                            OLS   Adj. R-squared:                  0.496
Method:                 Least Squares   F-statistic:                     33.61
Date:                Sat, 18 May 2024   Prob (F-statistic):           1.59e-94
Time:                        09:02:22   Log-Likelihood:                -1517.0
No. Observations:                 730   AIC:                             3080.
Df Residuals:                     707   BIC:                             3186.
Df Model:                          22                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [88]:
drop_lst_hour_12 = ["mean_uswrf_top_of_atmosphere", "mean_tcdc_high.cloud.layer", "trend"]

df_model_hour.drop(columns = drop_lst_hour_12, inplace = True)

df_model_hour["lag_2"] = df_hour_train["production"].shift(2, fill_value=0)
df_model_hour["ma_3"] = df_hour_train["production"].rolling(window=3).mean()
df_model_hour.fillna(method='bfill', inplace=True)

In [89]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
model_hour_12 = lm.fit()
print(model_hour_12.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.744
Model:                            OLS   Adj. R-squared:                  0.737
Method:                 Least Squares   F-statistic:                     98.12
Date:                Sat, 18 May 2024   Prob (F-statistic):          2.33e-193
Time:                        09:02:22   Log-Likelihood:                -1280.5
No. Observations:                 730   AIC:                             2605.
Df Residuals:                     708   BIC:                             2706.
Df Model:                          21                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [90]:
accu(df_hour_train["production"], model_hour_12.predict())

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,730,7.975726,2.764879,0.346662,4.718316e-15,0.190842,1.398219,0.958882,0.120225,0.120225


In [91]:
df_hour_month_test = df_hour_test["date"].dt.month
df_hour_month_test = pd.get_dummies(df_hour_month_test, prefix="month")
for i in range(5,12):
    df_hour_month_test[f"month_{i}"] = 0

df_test = df_hour_month_test.copy()
df_test["trend"] = np.arange(len(df_test))
df_test = sm.add_constant(df_test)

features_test = df_hour_test.copy()
features_test.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_test.columns[df_hour_test.columns.str.contains(feature)]
    features_test[f"mean_{feature}"] = df_hour_test[feature_col].mean(axis=1)
features_test = features_test.filter(regex=r'^mean_')
features_test

df_test.reset_index(drop=True, inplace=True)
features_test.reset_index(drop=True, inplace=True)
df_test = pd.concat([df_test, features_test], axis=1)

df_test.drop(columns = drop_lst_hour_12, inplace = True)

df_test["lag_2"] = df_hour_test["production"].shift(2, fill_value=0)
df_test["ma_3"] = df_hour_test["production"].rolling(window=3).mean()
df_test.fillna(method='bfill', inplace=True)

df_test

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,mean_dswrf_surface,mean_tcdc_low.cloud.layer,mean_tcdc_middle.cloud.layer,mean_tcdc_entire.atmosphere,mean_csnow_surface,mean_dlwrf_surface,mean_uswrf_surface,mean_tmp_surface,lag_2,ma_3
0,1.0,1,0,0,0,0,0,0,0,0,...,317.4072,0.000,32.744,63.996,0.00,240.498,62.90304,285.19244,0.00,6.066667
1,1.0,1,0,0,0,0,0,0,0,0,...,214.1000,57.636,88.376,95.008,0.04,292.779,39.47328,284.49000,0.00,6.066667
2,1.0,1,0,0,0,0,0,0,0,0,...,148.9216,81.988,10.712,83.124,0.04,308.892,28.65344,282.64700,8.60,6.066667
3,1.0,1,0,0,0,0,0,0,0,0,...,206.7184,69.164,77.060,94.688,0.04,295.608,38.53696,284.43500,4.82,4.610000
4,1.0,1,0,0,0,0,0,0,0,0,...,176.7088,76.236,47.092,79.336,0.12,304.345,32.85184,282.40000,4.78,5.636667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
132,1.0,0,0,0,0,0,0,0,0,0,...,498.2280,71.296,47.688,91.608,0.00,312.908,87.48800,291.76000,8.29,5.293333
133,1.0,0,0,0,0,0,0,0,0,0,...,778.6504,36.304,0.000,37.208,0.04,266.665,131.42848,295.26800,2.25,5.123333
134,1.0,0,0,0,0,0,0,0,0,0,...,837.9216,0.480,17.300,42.900,0.00,271.397,139.62432,301.11600,5.34,7.013333
135,1.0,0,0,0,0,0,0,0,0,0,...,718.0504,48.948,48.868,63.392,0.00,310.045,122.92544,296.28500,7.78,7.073333


In [92]:
accu(df_hour_test["production"], model_hour_12.predict(df_test))

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,137,6.782482,2.966158,0.437326,0.013006,0.256916,1.765605,1.420194,0.209391,0.209391


In [93]:
df_hour_month = df_hour["date"].dt.month
df_hour_month = pd.get_dummies(df_hour_month, prefix="month")
for i in range(5,12):
    df_hour_month[f"month_{i}"] = 0

df_predict = df_hour_month.copy()
df_predict["trend"] = np.arange(len(df_predict))
df_predict = sm.add_constant(df_predict)

features = df_hour.copy()
features.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour.columns[df_hour.columns.str.contains(feature)]
    features[f"mean_{feature}"] = df_hour[feature_col].mean(axis=1)
features = features.filter(regex=r'^mean_')

df_predict.reset_index(drop=True, inplace=True)
features.reset_index(drop=True, inplace=True)
df_predict = pd.concat([df_predict, features], axis=1)

df_predict.drop(columns = drop_lst_hour_12, inplace = True)

df_predict["lag_2"] = df_hour["production"].shift(2, fill_value=0)
df_predict["ma_3"] = df_hour["production"].rolling(window=3).mean()
df_predict.fillna(method='bfill', inplace=True)

df_predict

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,mean_dswrf_surface,mean_tcdc_low.cloud.layer,mean_tcdc_middle.cloud.layer,mean_tcdc_entire.atmosphere,mean_csnow_surface,mean_dlwrf_surface,mean_uswrf_surface,mean_tmp_surface,lag_2,ma_3
0,1.0,1,0,0,0,0,0,0,0,0,...,312.5520,4.408,23.468,70.368,0.00,238.387,99.73056,280.215,0.00,6.166667
1,1.0,1,0,0,0,0,0,0,0,0,...,46.2992,97.724,91.184,99.324,0.68,318.225,20.73024,274.856,0.00,6.166667
2,1.0,1,0,0,0,0,0,0,0,0,...,347.1080,3.376,0.000,4.120,0.00,209.456,124.15680,277.865,6.80,6.166667
3,1.0,1,0,0,0,0,0,0,0,0,...,237.7824,1.784,84.868,97.768,0.00,260.233,84.66624,278.249,1.70,5.316667
4,1.0,1,0,0,0,0,0,0,0,0,...,328.5280,0.056,41.976,45.068,0.00,238.397,111.75552,280.314,10.00,8.083333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,1.0,0,0,0,0,0,0,0,0,0,...,498.2280,71.296,47.688,91.608,0.00,312.908,87.48800,291.760,8.29,5.293333
863,1.0,0,0,0,0,0,0,0,0,0,...,778.6504,36.304,0.000,37.208,0.04,266.665,131.42848,295.268,2.25,5.123333
864,1.0,0,0,0,0,0,0,0,0,0,...,837.9216,0.480,17.300,42.900,0.00,271.397,139.62432,301.116,5.34,7.013333
865,1.0,0,0,0,0,0,0,0,0,0,...,718.0504,48.948,48.868,63.392,0.00,310.045,122.92544,296.285,7.78,7.073333


In [94]:
lm = sm.OLS(df_hour["production"], df_predict)
model_hour_12 = lm.fit()
print(model_hour_12.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.736
Model:                            OLS   Adj. R-squared:                  0.732
Method:                 Least Squares   F-statistic:                     158.3
Date:                Sat, 18 May 2024   Prob (F-statistic):          5.71e-234
Time:                        09:02:22   Log-Likelihood:                -1554.9
No. Observations:                 867   AIC:                             3142.
Df Residuals:                     851   BIC:                             3218.
Df Model:                          15                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [95]:
accu(df_hour["production"], model_hour_12.predict())

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,867,7.787174,2.831302,0.363585,1.134383e-14,0.2003,1.454301,1.021944,0.131234,0.131234


# Hour 13

In [96]:
mask = df["hour"] == 13
df_hour = df[mask]
df_hour.reset_index(drop=True, inplace=True)
df_hour_train = df_hour[df_hour["date"] < "2024"]
df_hour_test = df_hour[df_hour["date"] >= "2024"]
df_hour_train.reset_index(drop=True, inplace=True)
df_hour_test.reset_index(drop=True, inplace=True)

df_hour

Unnamed: 0,date,hour,dswrf_surface_37.75_34.5,dswrf_surface_37.75_34.75,dswrf_surface_37.75_35.0,dswrf_surface_37.75_35.25,dswrf_surface_37.75_35.5,dswrf_surface_38.0_34.5,dswrf_surface_38.0_34.75,dswrf_surface_38.0_35.0,...,tmp_surface_38.5_34.75,tmp_surface_38.5_35.0,tmp_surface_38.5_35.25,tmp_surface_38.5_35.5,tmp_surface_38.75_34.5,tmp_surface_38.75_34.75,tmp_surface_38.75_35.0,tmp_surface_38.75_35.25,tmp_surface_38.75_35.5,production
0,2022-01-01,13,362.58,376.98,388.18,382.76,352.94,357.86,362.52,373.12,...,281.777,282.477,282.277,273.877,283.877,283.577,282.877,282.577,283.477,5.10
1,2022-01-02,13,13.70,22.20,60.94,201.08,233.40,56.30,29.72,57.48,...,274.376,274.276,273.776,273.076,283.776,282.477,278.576,276.676,275.676,1.70
2,2022-01-03,13,385.24,403.26,412.46,409.94,398.60,396.74,397.20,407.60,...,281.894,281.794,280.394,269.994,283.594,283.194,282.394,282.394,282.994,9.35
3,2022-01-04,13,297.84,313.38,317.94,340.50,317.30,288.08,266.86,277.38,...,280.365,280.665,280.165,272.165,282.265,281.265,281.465,281.765,281.865,5.95
4,2022-01-05,13,377.56,389.94,397.92,396.42,369.88,382.96,382.54,389.48,...,284.692,282.792,283.492,273.492,286.292,285.192,284.892,284.392,284.492,9.35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,2024-05-13,13,151.30,232.94,299.18,473.34,634.50,345.88,334.68,184.10,...,297.329,296.329,293.329,282.729,299.829,300.429,298.929,298.229,299.429,4.04
863,2024-05-14,13,880.10,869.28,929.82,932.14,892.46,737.64,897.76,926.94,...,296.715,297.115,297.015,293.915,298.715,298.115,297.815,295.515,293.815,9.64
864,2024-05-15,13,889.96,890.62,881.08,840.96,824.28,898.40,898.26,882.50,...,301.857,302.957,301.257,296.557,302.957,302.757,302.857,302.057,301.457,7.18
865,2024-05-16,13,715.82,770.14,787.76,760.16,865.64,854.28,761.52,681.96,...,297.116,298.616,291.616,293.816,292.416,293.416,295.016,293.716,292.716,8.89


In [97]:
df_hour_month_train = df_hour_train["date"].dt.month
df_hour_month_train = pd.get_dummies(df_hour_month_train, prefix="month")
df_hour_month_train.drop(columns=["month_12"], inplace=True)
df_hour_month_train

df_model_hour = df_hour_month_train.copy()
df_model_hour["trend"] = np.arange(len(df_model_hour))
df_model_hour = sm.add_constant(df_model_hour)


features_train = df_hour_train.copy()
features_train.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_train.columns[df_hour_train.columns.str.contains(feature)]
    features_train[f"mean_{feature}"] = df_hour_train[feature_col].mean(axis=1)
features_train = features_train.filter(regex=r'^mean_')
features_train

df_model_hour= pd.concat([df_model_hour, features_train], axis=1)

df_model_hour

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,mean_dswrf_surface,mean_tcdc_low.cloud.layer,mean_tcdc_middle.cloud.layer,mean_tcdc_high.cloud.layer,mean_tcdc_entire.atmosphere,mean_uswrf_top_of_atmosphere,mean_csnow_surface,mean_dlwrf_surface,mean_uswrf_surface,mean_tmp_surface
0,1.0,1,0,0,0,0,0,0,0,0,...,347.4144,5.348,32.996,54.852,70.452,180.69312,0.00,240.887,109.43168,280.60900
1,1.0,1,0,0,0,0,0,0,0,0,...,61.1096,97.172,84.420,49.072,98.604,350.97280,0.68,317.684,25.65248,275.47208
2,1.0,1,0,0,0,0,0,0,0,0,...,387.9000,2.640,0.000,1.060,3.932,176.32704,0.00,211.134,137.00032,278.54200
3,1.0,1,0,0,0,0,0,0,0,0,...,265.5128,2.408,79.360,96.452,98.180,259.67616,0.00,261.613,92.86400,278.51300
4,1.0,1,0,0,0,0,0,0,0,0,...,372.8912,0.120,31.628,7.352,34.112,177.49568,0.00,238.210,124.95744,281.06400
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
725,1.0,0,0,0,0,0,0,0,0,0,...,365.8720,0.000,0.000,0.124,0.132,121.74528,0.00,251.637,72.45568,287.99700
726,1.0,0,0,0,0,0,0,0,0,0,...,370.8936,0.108,0.000,0.004,0.116,123.17888,0.00,239.364,72.94784,286.36200
727,1.0,0,0,0,0,0,0,0,0,0,...,379.7568,0.000,0.000,0.000,0.000,122.41088,0.00,231.713,74.85568,286.58900
728,1.0,0,0,0,0,0,0,0,0,0,...,363.7856,0.000,0.000,52.904,54.700,134.63872,0.00,238.878,70.78016,286.53200


In [98]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
result = lm.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.541
Model:                            OLS   Adj. R-squared:                  0.527
Method:                 Least Squares   F-statistic:                     37.95
Date:                Sat, 18 May 2024   Prob (F-statistic):          4.31e-104
Time:                        09:02:23   Log-Likelihood:                -1550.5
No. Observations:                 730   AIC:                             3147.
Df Residuals:                     707   BIC:                             3253.
Df Model:                          22                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [99]:
drop_lst_hour_13 = ["mean_csnow_surface", "mean_tcdc_high.cloud.layer", "trend"]

df_model_hour.drop(columns = drop_lst_hour_13, inplace = True)

df_model_hour["lag_2"] = df_hour_train["production"].shift(2, fill_value=0)
df_model_hour["ma_3"] = df_hour_train["production"].rolling(window=3).mean()
df_model_hour.fillna(method='bfill', inplace=True)

In [100]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
model_hour_13 = lm.fit()
print(model_hour_13.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.756
Model:                            OLS   Adj. R-squared:                  0.749
Method:                 Least Squares   F-statistic:                     104.3
Date:                Sat, 18 May 2024   Prob (F-statistic):          2.25e-200
Time:                        09:02:23   Log-Likelihood:                -1320.6
No. Observations:                 730   AIC:                             2685.
Df Residuals:                     708   BIC:                             2786.
Df Model:                          21                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [101]:
accu(df_hour_train["production"], model_hour_13.predict())

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,730,7.533603,2.988937,0.396747,2.969676e-15,0.211843,1.477128,1.031958,0.136981,0.136981


In [102]:
df_hour_month_test = df_hour_test["date"].dt.month
df_hour_month_test = pd.get_dummies(df_hour_month_test, prefix="month")
for i in range(5,12):
    df_hour_month_test[f"month_{i}"] = 0

df_test = df_hour_month_test.copy()
df_test["trend"] = np.arange(len(df_test))
df_test = sm.add_constant(df_test)

features_test = df_hour_test.copy()
features_test.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_test.columns[df_hour_test.columns.str.contains(feature)]
    features_test[f"mean_{feature}"] = df_hour_test[feature_col].mean(axis=1)
features_test = features_test.filter(regex=r'^mean_')
features_test

df_test.reset_index(drop=True, inplace=True)
features_test.reset_index(drop=True, inplace=True)
df_test = pd.concat([df_test, features_test], axis=1)

df_test.drop(columns = drop_lst_hour_13, inplace = True)

df_test["lag_2"] = df_hour_test["production"].shift(2, fill_value=0)
df_test["ma_3"] = df_hour_test["production"].rolling(window=3).mean()
df_test.fillna(method='bfill', inplace=True)

df_test

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,mean_dswrf_surface,mean_tcdc_low.cloud.layer,mean_tcdc_middle.cloud.layer,mean_tcdc_entire.atmosphere,mean_uswrf_top_of_atmosphere,mean_dlwrf_surface,mean_uswrf_surface,mean_tmp_surface,lag_2,ma_3
0,1.0,1,0,0,0,0,0,0,0,0,...,356.2600,0.044,27.196,65.884,137.88672,241.754,68.80192,286.448,0.00,4.380000
1,1.0,1,0,0,0,0,0,0,0,0,...,249.8152,58.904,83.784,92.460,206.53696,292.132,45.73888,285.706,0.00,4.380000
2,1.0,1,0,0,0,0,0,0,0,0,...,186.7640,77.444,8.008,82.520,256.78080,306.095,35.76960,284.019,7.16,4.380000
3,1.0,1,0,0,0,0,0,0,0,0,...,239.9456,68.600,79.464,95.388,222.77056,295.485,44.33728,285.505,0.80,2.746667
4,1.0,1,0,0,0,0,0,0,0,0,...,211.6016,74.684,49.580,77.752,237.65952,301.156,39.22880,282.753,5.18,5.733333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
132,1.0,0,0,0,0,0,0,0,0,0,...,536.9904,71.124,40.848,88.564,431.33888,314.322,93.25120,292.685,6.97,4.893333
133,1.0,0,0,0,0,0,0,0,0,0,...,835.1480,31.292,0.072,32.200,240.00640,264.266,137.60640,296.527,3.67,5.783333
134,1.0,0,0,0,0,0,0,0,0,0,...,877.8248,0.864,17.044,38.024,199.39392,273.757,143.27296,301.077,4.04,6.953333
135,1.0,0,0,0,0,0,0,0,0,0,...,699.7336,56.600,53.236,68.972,317.77728,316.368,119.09248,295.148,9.64,8.570000


In [103]:
accu(df_hour_test["production"], model_hour_13.predict(df_test))

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,137,6.533504,2.977072,0.455662,0.030648,0.256639,1.777177,1.405585,0.215135,0.215135


In [104]:
df_hour_month = df_hour["date"].dt.month
df_hour_month = pd.get_dummies(df_hour_month, prefix="month")
for i in range(5,12):
    df_hour_month[f"month_{i}"] = 0

df_predict = df_hour_month.copy()
df_predict["trend"] = np.arange(len(df_predict))
df_predict = sm.add_constant(df_predict)

features = df_hour.copy()
features.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour.columns[df_hour.columns.str.contains(feature)]
    features[f"mean_{feature}"] = df_hour[feature_col].mean(axis=1)
features = features.filter(regex=r'^mean_')

df_predict.reset_index(drop=True, inplace=True)
features.reset_index(drop=True, inplace=True)
df_predict = pd.concat([df_predict, features], axis=1)

df_predict.drop(columns = drop_lst_hour_13, inplace = True)

df_predict["lag_2"] = df_hour["production"].shift(2, fill_value=0)
df_predict["ma_3"] = df_hour["production"].rolling(window=3).mean()
df_predict.fillna(method='bfill', inplace=True)

df_predict

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,mean_dswrf_surface,mean_tcdc_low.cloud.layer,mean_tcdc_middle.cloud.layer,mean_tcdc_entire.atmosphere,mean_uswrf_top_of_atmosphere,mean_dlwrf_surface,mean_uswrf_surface,mean_tmp_surface,lag_2,ma_3
0,1.0,1,0,0,0,0,0,0,0,0,...,347.4144,5.348,32.996,70.452,180.69312,240.887,109.43168,280.60900,0.00,5.383333
1,1.0,1,0,0,0,0,0,0,0,0,...,61.1096,97.172,84.420,98.604,350.97280,317.684,25.65248,275.47208,0.00,5.383333
2,1.0,1,0,0,0,0,0,0,0,0,...,387.9000,2.640,0.000,3.932,176.32704,211.134,137.00032,278.54200,5.10,5.383333
3,1.0,1,0,0,0,0,0,0,0,0,...,265.5128,2.408,79.360,98.180,259.67616,261.613,92.86400,278.51300,1.70,5.666667
4,1.0,1,0,0,0,0,0,0,0,0,...,372.8912,0.120,31.628,34.112,177.49568,238.210,124.95744,281.06400,9.35,8.216667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,1.0,0,0,0,0,0,0,0,0,0,...,536.9904,71.124,40.848,88.564,431.33888,314.322,93.25120,292.68500,6.97,4.893333
863,1.0,0,0,0,0,0,0,0,0,0,...,835.1480,31.292,0.072,32.200,240.00640,264.266,137.60640,296.52700,3.67,5.783333
864,1.0,0,0,0,0,0,0,0,0,0,...,877.8248,0.864,17.044,38.024,199.39392,273.757,143.27296,301.07700,4.04,6.953333
865,1.0,0,0,0,0,0,0,0,0,0,...,699.7336,56.600,53.236,68.972,317.77728,316.368,119.09248,295.14800,9.64,8.570000


In [105]:
lm = sm.OLS(df_hour["production"], df_predict)
model_hour_13 = lm.fit()
print(model_hour_13.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.748
Model:                            OLS   Adj. R-squared:                  0.743
Method:                 Least Squares   F-statistic:                     168.0
Date:                Sat, 18 May 2024   Prob (F-statistic):          4.57e-242
Time:                        09:02:23   Log-Likelihood:                -1588.7
No. Observations:                 867   AIC:                             3209.
Df Residuals:                     851   BIC:                             3286.
Df Model:                          15                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [106]:
accu(df_hour["production"], model_hour_13.predict())

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,867,7.375571,3.009257,0.408003,-5.835797e-15,0.217205,1.512038,1.080752,0.146531,0.146531


# Hour 14

In [107]:
mask = df["hour"] == 14
df_hour = df[mask]
df_hour.reset_index(drop=True, inplace=True)
df_hour_train = df_hour[df_hour["date"] < "2024"]
df_hour_test = df_hour[df_hour["date"] >= "2024"]
df_hour_train.reset_index(drop=True, inplace=True)
df_hour_test.reset_index(drop=True, inplace=True)

df_hour

Unnamed: 0,date,hour,dswrf_surface_37.75_34.5,dswrf_surface_37.75_34.75,dswrf_surface_37.75_35.0,dswrf_surface_37.75_35.25,dswrf_surface_37.75_35.5,dswrf_surface_38.0_34.5,dswrf_surface_38.0_34.75,dswrf_surface_38.0_35.0,...,tmp_surface_38.5_34.75,tmp_surface_38.5_35.0,tmp_surface_38.5_35.25,tmp_surface_38.5_35.5,tmp_surface_38.75_34.5,tmp_surface_38.75_34.75,tmp_surface_38.75_35.0,tmp_surface_38.75_35.25,tmp_surface_38.75_35.5,production
0,2022-01-01,14,384.92,398.06,406.24,396.56,360.98,377.78,381.08,394.48,...,280.689,281.689,281.389,273.889,284.289,283.289,282.589,282.089,282.289,5.10
1,2022-01-02,14,22.60,23.98,74.94,225.64,242.04,62.04,35.24,62.80,...,277.040,276.440,274.140,272.540,284.240,283.240,282.240,281.140,276.940,5.10
2,2022-01-03,14,407.48,425.24,433.90,430.22,418.36,418.68,418.58,429.08,...,281.297,281.197,279.697,269.997,282.797,282.697,281.897,281.797,281.597,7.65
3,2022-01-04,14,315.84,331.58,334.90,347.60,311.10,310.42,290.82,301.86,...,279.297,279.497,279.397,271.597,282.597,281.297,280.597,281.097,280.497,5.95
4,2022-01-05,14,401.52,413.92,420.80,416.76,387.16,407.52,406.54,413.84,...,283.992,282.392,282.592,273.492,286.792,285.492,284.892,283.992,284.092,8.50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,2024-05-13,14,232.26,294.08,404.22,504.78,624.12,373.12,336.76,210.76,...,292.035,292.735,289.535,281.935,293.735,292.735,294.735,293.535,291.535,5.89
863,2024-05-14,14,905.98,898.84,949.34,946.74,888.80,791.02,919.94,945.18,...,297.028,297.528,297.028,293.628,299.128,298.628,297.828,297.528,295.828,9.70
864,2024-05-15,14,909.18,910.78,899.72,851.36,848.52,916.26,917.20,894.46,...,301.441,302.341,301.042,296.441,302.642,301.841,301.742,301.642,300.841,6.00
865,2024-05-16,14,719.80,695.54,760.30,755.46,882.58,848.32,744.32,624.56,...,294.753,294.453,288.053,292.753,299.853,296.553,291.053,291.153,293.353,9.50


In [108]:
df_hour_month_train = df_hour_train["date"].dt.month
df_hour_month_train = pd.get_dummies(df_hour_month_train, prefix="month")
df_hour_month_train.drop(columns=["month_12"], inplace=True)
df_hour_month_train

df_model_hour = df_hour_month_train.copy()
df_model_hour["trend"] = np.arange(len(df_model_hour))
df_model_hour = sm.add_constant(df_model_hour)


features_train = df_hour_train.copy()
features_train.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_train.columns[df_hour_train.columns.str.contains(feature)]
    features_train[f"mean_{feature}"] = df_hour_train[feature_col].mean(axis=1)
features_train = features_train.filter(regex=r'^mean_')
features_train

df_model_hour= pd.concat([df_model_hour, features_train], axis=1)

df_model_hour

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,mean_dswrf_surface,mean_tcdc_low.cloud.layer,mean_tcdc_middle.cloud.layer,mean_tcdc_high.cloud.layer,mean_tcdc_entire.atmosphere,mean_uswrf_top_of_atmosphere,mean_csnow_surface,mean_dlwrf_surface,mean_uswrf_surface,mean_tmp_surface
0,1.0,1,0,0,0,0,0,0,0,0,...,364.6544,6.772,39.460,45.880,71.136,187.03872,0.00,243.01500,113.51040,280.277
1,1.0,1,0,0,0,0,0,0,0,0,...,82.7064,96.184,80.056,39.252,97.536,354.57664,0.68,315.90968,31.95392,276.244
2,1.0,1,0,0,0,0,0,0,0,0,...,408.8584,2.176,0.000,1.492,3.912,182.31936,0.00,212.65400,143.39584,278.193
3,1.0,1,0,0,0,0,0,0,0,0,...,281.1584,2.960,72.272,97.164,98.544,268.75008,0.00,261.97200,97.83936,278.225
4,1.0,1,0,0,0,0,0,0,0,0,...,396.4072,0.176,25.312,5.920,27.360,181.57504,0.00,238.42800,131.42336,280.896
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
725,1.0,0,0,0,0,0,0,0,0,0,...,384.7200,0.000,0.000,0.104,0.108,124.66112,0.00,252.98700,75.21984,287.814
726,1.0,0,0,0,0,0,0,0,0,0,...,390.2008,0.148,0.000,0.000,0.156,126.14464,0.00,240.53600,75.77664,285.995
727,1.0,0,0,0,0,0,0,0,0,0,...,399.5576,0.000,0.000,0.000,0.000,125.40608,0.00,233.01900,77.71904,286.442
728,1.0,0,0,0,0,0,0,0,0,0,...,386.0296,0.000,0.000,45.124,46.780,135.33888,0.00,239.74900,74.25088,286.557


In [109]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
result = lm.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.546
Model:                            OLS   Adj. R-squared:                  0.531
Method:                 Least Squares   F-statistic:                     38.58
Date:                Sat, 18 May 2024   Prob (F-statistic):          1.90e-105
Time:                        09:02:23   Log-Likelihood:                -1540.8
No. Observations:                 730   AIC:                             3128.
Df Residuals:                     707   BIC:                             3233.
Df Model:                          22                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [110]:
drop_lst_hour_14 = ["mean_csnow_surface", "mean_tcdc_low.cloud.layer", "mean_dswrf_surface"]

df_model_hour.drop(columns = drop_lst_hour_14, inplace = True)

df_model_hour["lag_2"] = df_hour_train["production"].shift(2, fill_value=0)
df_model_hour["ma_3"] = df_hour_train["production"].rolling(window=3).mean()
df_model_hour.fillna(method='bfill', inplace=True)

In [111]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
model_hour_14 = lm.fit()
print(model_hour_14.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.775
Model:                            OLS   Adj. R-squared:                  0.768
Method:                 Least Squares   F-statistic:                     116.1
Date:                Sat, 18 May 2024   Prob (F-statistic):          7.21e-213
Time:                        09:02:24   Log-Likelihood:                -1284.3
No. Observations:                 730   AIC:                             2613.
Df Residuals:                     708   BIC:                             2714.
Df Model:                          21                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [112]:
accu(df_hour_train["production"], model_hour_14.predict())

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,730,6.498466,2.962823,0.455927,4.976471e-16,0.225527,1.405445,1.018552,0.156737,0.156737


In [113]:
df_hour_month_test = df_hour_test["date"].dt.month
df_hour_month_test = pd.get_dummies(df_hour_month_test, prefix="month")
for i in range(5,12):
    df_hour_month_test[f"month_{i}"] = 0

df_test = df_hour_month_test.copy()
df_test["trend"] = np.arange(len(df_test))
df_test = sm.add_constant(df_test)

features_test = df_hour_test.copy()
features_test.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_test.columns[df_hour_test.columns.str.contains(feature)]
    features_test[f"mean_{feature}"] = df_hour_test[feature_col].mean(axis=1)
features_test = features_test.filter(regex=r'^mean_')
features_test

df_test.reset_index(drop=True, inplace=True)
features_test.reset_index(drop=True, inplace=True)
df_test = pd.concat([df_test, features_test], axis=1)

df_test.drop(columns = drop_lst_hour_14, inplace = True)

df_test["lag_2"] = df_hour_test["production"].shift(2, fill_value=0)
df_test["ma_3"] = df_hour_test["production"].rolling(window=3).mean()
df_test.fillna(method='bfill', inplace=True)

df_test

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,trend,mean_tcdc_middle.cloud.layer,mean_tcdc_high.cloud.layer,mean_tcdc_entire.atmosphere,mean_uswrf_top_of_atmosphere,mean_dlwrf_surface,mean_uswrf_surface,mean_tmp_surface,lag_2,ma_3
0,1.0,1,0,0,0,0,0,0,0,0,...,0,24.944,55.636,69.048,147.63392,244.294,70.20864,285.75600,0.00,2.166667
1,1.0,1,0,0,0,0,0,0,0,0,...,1,76.024,27.368,88.932,209.94496,291.726,49.12512,285.52900,0.00,2.166667
2,1.0,1,0,0,0,0,0,0,0,0,...,2,7.880,30.868,86.016,263.60576,303.875,39.67296,283.80800,2.54,2.166667
3,1.0,1,0,0,0,0,0,0,0,0,...,3,75.924,72.880,94.728,225.43424,295.157,48.04800,285.50900,0.75,1.736667
4,1.0,1,0,0,0,0,0,0,0,0,...,4,46.980,0.000,72.616,232.41792,295.184,45.77984,283.53300,3.21,4.180000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
132,1.0,0,0,0,0,0,0,0,0,0,...,132,38.216,43.892,89.568,452.88128,318.677,92.70976,290.90300,6.13,4.643333
133,1.0,0,0,0,0,0,0,0,0,0,...,133,0.704,0.000,28.880,231.80416,263.268,141.16736,296.81200,1.91,5.833333
134,1.0,0,0,0,0,0,0,0,0,0,...,134,18.136,25.016,36.932,201.66912,276.612,144.44480,300.16556,5.89,7.196667
135,1.0,0,0,0,0,0,0,0,0,0,...,135,57.632,0.452,71.760,345.98464,320.206,115.85472,294.62500,9.70,8.400000


In [114]:
accu(df_hour_test["production"], model_hour_14.predict(df_test))

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,137,5.629854,2.967516,0.527104,0.046422,0.259905,1.610889,1.282401,0.227786,0.227786


In [115]:
df_hour_month = df_hour["date"].dt.month
df_hour_month = pd.get_dummies(df_hour_month, prefix="month")
for i in range(5,12):
    df_hour_month[f"month_{i}"] = 0

df_predict = df_hour_month.copy()
df_predict["trend"] = np.arange(len(df_predict))
df_predict = sm.add_constant(df_predict)

features = df_hour.copy()
features.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour.columns[df_hour.columns.str.contains(feature)]
    features[f"mean_{feature}"] = df_hour[feature_col].mean(axis=1)
features = features.filter(regex=r'^mean_')

df_predict.reset_index(drop=True, inplace=True)
features.reset_index(drop=True, inplace=True)
df_predict = pd.concat([df_predict, features], axis=1)

df_predict.drop(columns = drop_lst_hour_14, inplace = True)

df_predict["lag_2"] = df_hour["production"].shift(2, fill_value=0)
df_predict["ma_3"] = df_hour["production"].rolling(window=3).mean()
df_predict.fillna(method='bfill', inplace=True)

df_predict

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,trend,mean_tcdc_middle.cloud.layer,mean_tcdc_high.cloud.layer,mean_tcdc_entire.atmosphere,mean_uswrf_top_of_atmosphere,mean_dlwrf_surface,mean_uswrf_surface,mean_tmp_surface,lag_2,ma_3
0,1.0,1,0,0,0,0,0,0,0,0,...,0,39.460,45.880,71.136,187.03872,243.01500,113.51040,280.27700,0.00,5.950000
1,1.0,1,0,0,0,0,0,0,0,0,...,1,80.056,39.252,97.536,354.57664,315.90968,31.95392,276.24400,0.00,5.950000
2,1.0,1,0,0,0,0,0,0,0,0,...,2,0.000,1.492,3.912,182.31936,212.65400,143.39584,278.19300,5.10,5.950000
3,1.0,1,0,0,0,0,0,0,0,0,...,3,72.272,97.164,98.544,268.75008,261.97200,97.83936,278.22500,5.10,6.233333
4,1.0,1,0,0,0,0,0,0,0,0,...,4,25.312,5.920,27.360,181.57504,238.42800,131.42336,280.89600,7.65,7.366667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,1.0,0,0,0,0,0,0,0,0,0,...,862,38.216,43.892,89.568,452.88128,318.67700,92.70976,290.90300,6.13,4.643333
863,1.0,0,0,0,0,0,0,0,0,0,...,863,0.704,0.000,28.880,231.80416,263.26800,141.16736,296.81200,1.91,5.833333
864,1.0,0,0,0,0,0,0,0,0,0,...,864,18.136,25.016,36.932,201.66912,276.61200,144.44480,300.16556,5.89,7.196667
865,1.0,0,0,0,0,0,0,0,0,0,...,865,57.632,0.452,71.760,345.98464,320.20600,115.85472,294.62500,9.70,8.400000


In [116]:
lm = sm.OLS(df_hour["production"], df_predict)
model_hour_14 = lm.fit()
print(model_hour_14.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.770
Model:                            OLS   Adj. R-squared:                  0.766
Method:                 Least Squares   F-statistic:                     189.6
Date:                Sat, 18 May 2024   Prob (F-statistic):          5.56e-259
Time:                        09:02:24   Log-Likelihood:                -1540.5
No. Observations:                 867   AIC:                             3113.
Df Residuals:                     851   BIC:                             3189.
Df Model:                          15                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [117]:
accu(df_hour["production"], model_hour_14.predict())

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,867,6.361211,2.980453,0.468536,1.19745e-14,0.23079,1.430251,1.057874,0.166301,0.166301


# Hour 15

In [118]:
mask = df["hour"] == 15
df_hour = df[mask]
df_hour.reset_index(drop=True, inplace=True)
df_hour_train = df_hour[df_hour["date"] < "2024"]
df_hour_test = df_hour[df_hour["date"] >= "2024"]
df_hour_train.reset_index(drop=True, inplace=True)
df_hour_test.reset_index(drop=True, inplace=True)

df_hour

Unnamed: 0,date,hour,dswrf_surface_37.75_34.5,dswrf_surface_37.75_34.75,dswrf_surface_37.75_35.0,dswrf_surface_37.75_35.25,dswrf_surface_37.75_35.5,dswrf_surface_38.0_34.5,dswrf_surface_38.0_34.75,dswrf_surface_38.0_35.0,...,tmp_surface_38.5_34.75,tmp_surface_38.5_35.0,tmp_surface_38.5_35.25,tmp_surface_38.5_35.5,tmp_surface_38.75_34.5,tmp_surface_38.75_34.75,tmp_surface_38.75_35.0,tmp_surface_38.75_35.25,tmp_surface_38.75_35.5,production
0,2022-01-01,15,386.800,399.720,406.180,394.520,352.300,376.020,381.540,395.400,...,279.457,279.957,279.557,273.157,283.057,281.857,281.457,281.057,280.757,1.70
1,2022-01-02,15,28.500,25.080,81.360,238.120,248.880,73.720,46.720,67.340,...,276.435,276.535,274.435,271.335,281.435,281.935,280.835,280.735,278.935,0.85
2,2022-01-03,15,409.900,426.820,434.640,428.760,418.040,420.320,419.720,429.880,...,279.497,279.297,277.597,268.997,281.297,280.797,280.197,280.197,279.897,4.25
3,2022-01-04,15,318.720,335.560,338.260,344.760,295.420,319.640,298.440,307.320,...,278.190,278.090,279.390,270.490,281.990,280.990,280.390,280.790,280.190,3.40
4,2022-01-05,15,405.260,417.160,423.300,417.200,386.400,411.420,409.980,417.160,...,283.010,280.410,281.110,271.310,285.810,284.510,283.710,283.010,282.810,3.40
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,2024-05-13,15,329.600,332.224,451.760,503.664,581.984,419.632,332.240,219.632,...,286.355,286.855,289.855,282.255,290.555,288.855,288.555,289.355,288.855,5.32
863,2024-05-14,15,909.504,904.720,948.160,937.104,857.984,812.768,920.592,942.896,...,295.797,296.297,295.897,292.297,297.997,297.797,296.597,296.697,295.797,7.82
864,2024-05-15,15,908.320,898.416,878.288,829.712,797.888,915.024,915.504,890.944,...,299.012,300.812,299.312,295.112,301.712,300.512,299.812,300.312,299.912,3.11
865,2024-05-16,15,692.208,688.944,720.864,702.000,871.152,794.112,681.008,569.952,...,290.900,291.100,296.100,285.300,299.100,294.200,295.900,297.600,293.100,5.32


In [119]:
df_hour_month_train = df_hour_train["date"].dt.month
df_hour_month_train = pd.get_dummies(df_hour_month_train, prefix="month")
df_hour_month_train.drop(columns=["month_12"], inplace=True)
df_hour_month_train

df_model_hour = df_hour_month_train.copy()
df_model_hour["trend"] = np.arange(len(df_model_hour))
df_model_hour = sm.add_constant(df_model_hour)


features_train = df_hour_train.copy()
features_train.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_train.columns[df_hour_train.columns.str.contains(feature)]
    features_train[f"mean_{feature}"] = df_hour_train[feature_col].mean(axis=1)
features_train = features_train.filter(regex=r'^mean_')
features_train

df_model_hour= pd.concat([df_model_hour, features_train], axis=1)

df_model_hour

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,mean_dswrf_surface,mean_tcdc_low.cloud.layer,mean_tcdc_middle.cloud.layer,mean_tcdc_high.cloud.layer,mean_tcdc_entire.atmosphere,mean_uswrf_top_of_atmosphere,mean_csnow_surface,mean_dlwrf_surface,mean_uswrf_surface,mean_tmp_surface
0,1.0,1,0,0,0,0,0,0,0,0,...,362.2984,9.116,43.452,39.864,71.612,188.88640,0.00,245.296,112.10304,279.1650
1,1.0,1,0,0,0,0,0,0,0,0,...,100.8448,94.664,72.196,32.720,95.932,345.70880,0.72,312.749,37.98080,276.0630
2,1.0,1,0,0,0,0,0,0,0,0,...,409.8368,1.820,0.040,1.872,4.024,182.58880,0.00,214.044,143.45920,276.9770
3,1.0,1,0,0,0,0,0,0,0,0,...,285.3384,3.620,73.972,97.632,98.784,267.23968,0.00,261.319,98.58176,277.6980
4,1.0,1,0,0,0,0,0,0,0,0,...,399.4552,0.200,21.100,4.916,22.856,180.76992,0.00,238.703,131.69856,279.9060
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
725,1.0,0,0,0,0,0,0,0,0,0,...,384.4616,0.004,0.000,0.084,0.088,124.80448,0.00,253.972,75.17696,286.2540
726,1.0,0,0,0,0,0,0,0,0,0,...,390.1504,0.192,0.000,0.000,0.196,126.32128,0.00,241.368,75.79392,284.4600
727,1.0,0,0,0,0,0,0,0,0,0,...,399.7608,0.000,0.000,0.000,0.000,125.60192,0.00,234.035,77.77856,284.8610
728,1.0,0,0,0,0,0,0,0,0,0,...,388.1504,0.000,0.000,39.236,40.772,133.88992,0.00,240.463,74.76928,285.0416


In [120]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
result = lm.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.663
Model:                            OLS   Adj. R-squared:                  0.653
Method:                 Least Squares   F-statistic:                     63.35
Date:                Sat, 18 May 2024   Prob (F-statistic):          1.08e-150
Time:                        09:02:24   Log-Likelihood:                -1409.6
No. Observations:                 730   AIC:                             2865.
Df Residuals:                     707   BIC:                             2971.
Df Model:                          22                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [121]:
drop_lst_hour_15 = ["mean_csnow_surface", "mean_tcdc_high.cloud.layer"]

df_model_hour.drop(columns = drop_lst_hour_15, inplace = True)

df_model_hour["lag_2"] = df_hour_train["production"].shift(2, fill_value=0)
df_model_hour["ma_3"] = df_hour_train["production"].rolling(window=3).mean()
df_model_hour.fillna(method='bfill', inplace=True)

In [122]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
model_hour_15 = lm.fit()
print(model_hour_15.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.841
Model:                            OLS   Adj. R-squared:                  0.837
Method:                 Least Squares   F-statistic:                     170.6
Date:                Sat, 18 May 2024   Prob (F-statistic):          3.09e-265
Time:                        09:02:24   Log-Likelihood:                -1134.8
No. Observations:                 730   AIC:                             2316.
Df Residuals:                     707   BIC:                             2421.
Df Model:                          22                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [123]:
accu(df_hour_train["production"], model_hour_15.predict())

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,730,4.71663,2.876147,0.609789,1.832777e-15,0.239368,1.145186,0.846513,0.179474,0.179474


In [124]:
df_hour_month_test = df_hour_test["date"].dt.month
df_hour_month_test = pd.get_dummies(df_hour_month_test, prefix="month")
for i in range(5,12):
    df_hour_month_test[f"month_{i}"] = 0

df_test = df_hour_month_test.copy()
df_test["trend"] = np.arange(len(df_test))
df_test = sm.add_constant(df_test)

features_test = df_hour_test.copy()
features_test.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_test.columns[df_hour_test.columns.str.contains(feature)]
    features_test[f"mean_{feature}"] = df_hour_test[feature_col].mean(axis=1)
features_test = features_test.filter(regex=r'^mean_')
features_test

df_test.reset_index(drop=True, inplace=True)
features_test.reset_index(drop=True, inplace=True)
df_test = pd.concat([df_test, features_test], axis=1)

df_test.drop(columns = drop_lst_hour_15, inplace = True)

df_test["lag_2"] = df_hour_test["production"].shift(2, fill_value=0)
df_test["ma_3"] = df_hour_test["production"].rolling(window=3).mean()
df_test.fillna(method='bfill', inplace=True)

df_test

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,mean_dswrf_surface,mean_tcdc_low.cloud.layer,mean_tcdc_middle.cloud.layer,mean_tcdc_entire.atmosphere,mean_uswrf_top_of_atmosphere,mean_dlwrf_surface,mean_uswrf_surface,mean_tmp_surface,lag_2,ma_3
0,1.0,1,0,0,0,0,0,0,0,0,...,357.59520,0.188,25.764,73.840,158.50368,247.741,67.60128,283.630,0.00,1.550000
1,1.0,1,0,0,0,0,0,0,0,0,...,270.82640,59.560,69.432,86.868,209.34848,291.756,49.65056,284.310,0.00,1.550000
2,1.0,1,0,0,0,0,0,0,0,0,...,214.97840,66.200,12.832,88.328,261.67808,301.006,41.16736,283.019,2.08,1.550000
3,1.0,1,0,0,0,0,0,0,0,0,...,262.25520,74.520,69.960,94.360,225.50528,296.104,48.40128,284.281,0.46,0.980000
4,1.0,1,0,0,0,0,0,0,0,0,...,268.87280,62.684,41.728,65.928,219.05856,287.797,51.16544,282.805,2.11,2.170000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
132,1.0,0,0,0,0,0,0,0,0,0,...,505.62432,78.492,40.764,90.644,469.48416,321.361,89.02400,289.299,3.94,3.500000
133,1.0,0,0,0,0,0,0,0,0,0,...,871.79904,25.160,1.452,26.264,225.75744,262.970,142.29824,295.757,1.24,4.793333
134,1.0,0,0,0,0,0,0,0,0,0,...,880.27072,6.796,22.468,40.604,207.31840,280.244,143.31904,298.016,5.32,5.416667
135,1.0,0,0,0,0,0,0,0,0,0,...,665.87776,61.144,60.424,73.620,356.17792,322.512,113.47136,293.912,7.82,5.416667


In [125]:
accu(df_hour_test["production"], model_hour_15.predict(df_test))

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,137,3.798905,2.388871,0.628831,0.003818,0.253657,1.146788,0.913325,0.240418,0.240418


In [126]:
df_hour_month = df_hour["date"].dt.month
df_hour_month = pd.get_dummies(df_hour_month, prefix="month")
for i in range(5,12):
    df_hour_month[f"month_{i}"] = 0

df_predict = df_hour_month.copy()
df_predict["trend"] = np.arange(len(df_predict))
df_predict = sm.add_constant(df_predict)

features = df_hour.copy()
features.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour.columns[df_hour.columns.str.contains(feature)]
    features[f"mean_{feature}"] = df_hour[feature_col].mean(axis=1)
features = features.filter(regex=r'^mean_')

df_predict.reset_index(drop=True, inplace=True)
features.reset_index(drop=True, inplace=True)
df_predict = pd.concat([df_predict, features], axis=1)

df_predict.drop(columns = drop_lst_hour_15, inplace = True)

df_predict["lag_2"] = df_hour["production"].shift(2, fill_value=0)
df_predict["ma_3"] = df_hour["production"].rolling(window=3).mean()
df_predict.fillna(method='bfill', inplace=True)

df_predict

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,mean_dswrf_surface,mean_tcdc_low.cloud.layer,mean_tcdc_middle.cloud.layer,mean_tcdc_entire.atmosphere,mean_uswrf_top_of_atmosphere,mean_dlwrf_surface,mean_uswrf_surface,mean_tmp_surface,lag_2,ma_3
0,1.0,1,0,0,0,0,0,0,0,0,...,362.29840,9.116,43.452,71.612,188.88640,245.296,112.10304,279.165,0.00,2.266667
1,1.0,1,0,0,0,0,0,0,0,0,...,100.84480,94.664,72.196,95.932,345.70880,312.749,37.98080,276.063,0.00,2.266667
2,1.0,1,0,0,0,0,0,0,0,0,...,409.83680,1.820,0.040,4.024,182.58880,214.044,143.45920,276.977,1.70,2.266667
3,1.0,1,0,0,0,0,0,0,0,0,...,285.33840,3.620,73.972,98.784,267.23968,261.319,98.58176,277.698,0.85,2.833333
4,1.0,1,0,0,0,0,0,0,0,0,...,399.45520,0.200,21.100,22.856,180.76992,238.703,131.69856,279.906,4.25,3.683333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,1.0,0,0,0,0,0,0,0,0,0,...,505.62432,78.492,40.764,90.644,469.48416,321.361,89.02400,289.299,3.94,3.500000
863,1.0,0,0,0,0,0,0,0,0,0,...,871.79904,25.160,1.452,26.264,225.75744,262.970,142.29824,295.757,1.24,4.793333
864,1.0,0,0,0,0,0,0,0,0,0,...,880.27072,6.796,22.468,40.604,207.31840,280.244,143.31904,298.016,5.32,5.416667
865,1.0,0,0,0,0,0,0,0,0,0,...,665.87776,61.144,60.424,73.620,356.17792,322.512,113.47136,293.912,7.82,5.416667


In [127]:
lm = sm.OLS(df_hour["production"], df_predict)
model_hour_15 = lm.fit()
print(model_hour_15.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.836
Model:                            OLS   Adj. R-squared:                  0.833
Method:                 Least Squares   F-statistic:                     271.4
Date:                Sat, 18 May 2024   Prob (F-statistic):          1.34e-320
Time:                        09:02:25   Log-Likelihood:                -1345.9
No. Observations:                 867   AIC:                             2726.
Df Residuals:                     850   BIC:                             2807.
Df Model:                          16                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [128]:
accu(df_hour["production"], model_hour_15.predict())

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,867,4.571615,2.824692,0.617876,3.7198e-17,0.240549,1.142785,0.856876,0.187434,0.187434


# Hour 16

In [129]:
mask = df["hour"] == 16
df_hour = df[mask]
df_hour.reset_index(drop=True, inplace=True)
df_hour_train = df_hour[df_hour["date"] < "2024"]
df_hour_test = df_hour[df_hour["date"] >= "2024"]
df_hour_train.reset_index(drop=True, inplace=True)
df_hour_test.reset_index(drop=True, inplace=True)

df_hour

Unnamed: 0,date,hour,dswrf_surface_37.75_34.5,dswrf_surface_37.75_34.75,dswrf_surface_37.75_35.0,dswrf_surface_37.75_35.25,dswrf_surface_37.75_35.5,dswrf_surface_38.0_34.5,dswrf_surface_38.0_34.75,dswrf_surface_38.0_35.0,...,tmp_surface_38.5_34.75,tmp_surface_38.5_35.0,tmp_surface_38.5_35.25,tmp_surface_38.5_35.5,tmp_surface_38.75_34.5,tmp_surface_38.75_34.75,tmp_surface_38.75_35.0,tmp_surface_38.75_35.25,tmp_surface_38.75_35.5,production
0,2022-01-01,16,235.90,233.04,259.00,240.86,192.50,231.36,252.10,255.36,...,277.343,277.743,276.943,271.443,280.543,280.143,279.343,278.643,278.343,0.00
1,2022-01-02,16,27.84,96.70,198.84,243.56,223.06,194.06,108.42,69.46,...,275.182,274.882,273.682,269.782,279.782,278.782,277.882,278.382,277.782,0.00
2,2022-01-03,16,295.82,303.04,304.38,289.60,284.24,299.34,295.86,300.22,...,276.397,276.397,275.197,266.497,278.797,278.197,277.397,277.397,277.097,0.00
3,2022-01-04,16,239.56,238.76,236.80,245.34,196.22,240.82,233.64,239.60,...,276.337,276.237,277.137,268.837,280.137,279.037,278.337,278.337,277.637,0.85
4,2022-01-05,16,298.18,304.02,304.80,292.58,262.52,303.12,299.10,302.64,...,280.654,277.954,278.754,267.854,283.454,282.154,281.254,280.654,280.154,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,2024-05-13,16,722.06,629.92,719.90,484.22,157.12,657.66,362.34,322.96,...,285.898,285.898,287.198,282.298,292.498,288.098,287.398,287.698,288.198,3.62
863,2024-05-14,16,788.44,792.78,799.42,708.46,436.56,787.50,786.34,790.46,...,293.500,294.000,294.100,290.600,295.500,295.500,294.600,293.900,294.000,4.55
864,2024-05-15,16,769.36,713.72,461.62,427.38,225.40,775.42,772.04,773.34,...,295.200,295.700,296.600,292.500,298.300,298.100,297.100,297.600,298.100,2.50
865,2024-05-16,16,586.50,543.90,259.58,407.54,694.70,493.72,548.72,252.48,...,288.904,290.404,288.104,289.704,292.404,291.604,291.104,290.704,294.604,2.88


In [130]:
df_hour_month_train = df_hour_train["date"].dt.month
df_hour_month_train = pd.get_dummies(df_hour_month_train, prefix="month")
df_hour_month_train.drop(columns=["month_12"], inplace=True)
df_hour_month_train

df_model_hour = df_hour_month_train.copy()
df_model_hour["trend"] = np.arange(len(df_model_hour))
df_model_hour = sm.add_constant(df_model_hour)


features_train = df_hour_train.copy()
features_train.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_train.columns[df_hour_train.columns.str.contains(feature)]
    features_train[f"mean_{feature}"] = df_hour_train[feature_col].mean(axis=1)
features_train = features_train.filter(regex=r'^mean_')
features_train

df_model_hour= pd.concat([df_model_hour, features_train], axis=1)

df_model_hour

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,mean_dswrf_surface,mean_tcdc_low.cloud.layer,mean_tcdc_middle.cloud.layer,mean_tcdc_high.cloud.layer,mean_tcdc_entire.atmosphere,mean_uswrf_top_of_atmosphere,mean_csnow_surface,mean_dlwrf_surface,mean_uswrf_surface,mean_tmp_surface
0,1.0,1,0,0,0,0,0,0,0,0,...,221.4584,37.100,82.296,12.680,89.116,168.56448,0.00,260.756,66.06976,277.05900
1,1.0,1,0,0,0,0,0,0,0,0,...,180.0944,70.740,27.392,0.000,73.692,206.52864,0.72,275.812,67.22624,274.99400
2,1.0,1,0,0,0,0,0,0,0,0,...,286.0080,0.000,1.036,7.052,10.080,144.46592,0.00,221.920,101.92000,274.62900
3,1.0,1,0,0,0,0,0,0,0,0,...,208.7968,6.684,92.924,96.968,98.336,203.79968,0.00,256.476,69.98464,275.91700
4,1.0,1,0,0,0,0,0,0,0,0,...,287.3808,0.360,0.000,0.000,0.368,139.80416,0.00,240.253,94.24448,277.84200
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
725,1.0,0,0,0,0,0,0,0,0,0,...,257.0600,0.028,0.000,0.000,0.028,103.89824,0.00,258.628,54.48064,283.46900
726,1.0,0,0,0,0,0,0,0,0,0,...,263.1552,0.196,0.000,0.000,0.204,105.34208,0.00,244.828,55.61216,281.68400
727,1.0,0,0,0,0,0,0,0,0,0,...,272.1896,0.000,0.000,0.000,0.000,104.99008,0.00,239.411,57.70624,281.95836
728,1.0,0,0,0,0,0,0,0,0,0,...,270.3448,0.000,0.000,16.904,18.848,106.39424,0.00,243.848,56.93760,282.05600


In [131]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
result = lm.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.650
Model:                            OLS   Adj. R-squared:                  0.639
Method:                 Least Squares   F-statistic:                     59.74
Date:                Sat, 18 May 2024   Prob (F-statistic):          7.16e-145
Time:                        09:02:25   Log-Likelihood:                -1297.8
No. Observations:                 730   AIC:                             2642.
Df Residuals:                     707   BIC:                             2747.
Df Model:                          22                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [132]:
drop_lst_hour_16 = ["mean_csnow_surface", "mean_tcdc_middle.cloud.layer"]

df_model_hour.drop(columns = drop_lst_hour_16, inplace = True)

df_model_hour["lag_2"] = df_hour_train["production"].shift(2, fill_value=0)
df_model_hour["ma_3"] = df_hour_train["production"].rolling(window=3).mean()
df_model_hour.fillna(method='bfill', inplace=True)

In [133]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
model_hour_16 = lm.fit()
print(model_hour_16.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.875
Model:                            OLS   Adj. R-squared:                  0.871
Method:                 Least Squares   F-statistic:                     224.4
Date:                Sat, 18 May 2024   Prob (F-statistic):          3.19e-301
Time:                        09:02:25   Log-Likelihood:                -923.05
No. Observations:                 730   AIC:                             1892.
Df Residuals:                     707   BIC:                             1998.
Df Model:                          22                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [134]:
accu(df_hour_train["production"], model_hour_16.predict())

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,730,2.498,2.420928,0.969147,7.022471e-15,0.214541,0.85686,0.542724,0.217263,0.217263


In [135]:
df_hour_month_test = df_hour_test["date"].dt.month
df_hour_month_test = pd.get_dummies(df_hour_month_test, prefix="month")
for i in range(5,12):
    df_hour_month_test[f"month_{i}"] = 0

df_test = df_hour_month_test.copy()
df_test["trend"] = np.arange(len(df_test))
df_test = sm.add_constant(df_test)

features_test = df_hour_test.copy()
features_test.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_test.columns[df_hour_test.columns.str.contains(feature)]
    features_test[f"mean_{feature}"] = df_hour_test[feature_col].mean(axis=1)
features_test = features_test.filter(regex=r'^mean_')
features_test

df_test.reset_index(drop=True, inplace=True)
features_test.reset_index(drop=True, inplace=True)
df_test = pd.concat([df_test, features_test], axis=1)

df_test.drop(columns = drop_lst_hour_16, inplace = True)

df_test["lag_2"] = df_hour_test["production"].shift(2, fill_value=0)
df_test["ma_3"] = df_hour_test["production"].rolling(window=3).mean()
df_test.fillna(method='bfill', inplace=True)

df_test

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,mean_dswrf_surface,mean_tcdc_low.cloud.layer,mean_tcdc_high.cloud.layer,mean_tcdc_entire.atmosphere,mean_uswrf_top_of_atmosphere,mean_dlwrf_surface,mean_uswrf_surface,mean_tmp_surface,lag_2,ma_3
0,1.0,1,0,0,0,0,0,0,0,0,...,177.6936,0.656,100.000,100.000,193.27808,273.44200,31.27296,281.112,0.00,0.086667
1,1.0,1,0,0,0,0,0,0,0,0,...,132.8608,83.916,73.080,97.576,213.17632,315.34300,23.86240,281.811,0.00,0.086667
2,1.0,1,0,0,0,0,0,0,0,0,...,196.2856,20.052,99.816,99.844,183.43360,273.24400,39.11808,281.538,0.10,0.086667
3,1.0,1,0,0,0,0,0,0,0,0,...,185.9528,83.440,0.948,85.160,176.60096,296.63700,36.06592,282.508,0.02,0.053333
4,1.0,1,0,0,0,0,0,0,0,0,...,274.2768,14.212,0.000,15.408,120.86144,243.09600,60.41088,280.611,0.14,0.153333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
132,1.0,0,0,0,0,0,0,0,0,0,...,356.1088,89.456,0.000,92.420,468.04480,326.53200,66.11328,288.214,2.38,2.260000
133,1.0,0,0,0,0,0,0,0,0,0,...,755.0248,15.048,0.000,16.892,191.94304,264.97300,133.83552,293.624,0.78,2.983333
134,1.0,0,0,0,0,0,0,0,0,0,...,645.3344,16.200,57.868,79.420,259.09504,305.59200,117.19168,295.492,3.62,3.556667
135,1.0,0,0,0,0,0,0,0,0,0,...,427.2560,72.608,38.572,87.900,416.54336,340.30028,76.70720,291.848,4.55,3.310000


In [136]:
accu(df_hour_test["production"], model_hour_16.predict(df_test))

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,137,1.775182,1.503903,0.847182,-0.040138,0.19367,0.570023,0.427049,0.240566,0.240566


In [137]:
df_hour_month = df_hour["date"].dt.month
df_hour_month = pd.get_dummies(df_hour_month, prefix="month")
for i in range(5,12):
    df_hour_month[f"month_{i}"] = 0

df_predict = df_hour_month.copy()
df_predict["trend"] = np.arange(len(df_predict))
df_predict = sm.add_constant(df_predict)

features = df_hour.copy()
features.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour.columns[df_hour.columns.str.contains(feature)]
    features[f"mean_{feature}"] = df_hour[feature_col].mean(axis=1)
features = features.filter(regex=r'^mean_')

df_predict.reset_index(drop=True, inplace=True)
features.reset_index(drop=True, inplace=True)
df_predict = pd.concat([df_predict, features], axis=1)

df_predict.drop(columns = drop_lst_hour_16, inplace = True)

df_predict["lag_2"] = df_hour["production"].shift(2, fill_value=0)
df_predict["ma_3"] = df_hour["production"].rolling(window=3).mean()
df_predict.fillna(method='bfill', inplace=True)

df_predict

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,mean_dswrf_surface,mean_tcdc_low.cloud.layer,mean_tcdc_high.cloud.layer,mean_tcdc_entire.atmosphere,mean_uswrf_top_of_atmosphere,mean_dlwrf_surface,mean_uswrf_surface,mean_tmp_surface,lag_2,ma_3
0,1.0,1,0,0,0,0,0,0,0,0,...,221.4584,37.100,12.680,89.116,168.56448,260.75600,66.06976,277.059,0.00,0.000000
1,1.0,1,0,0,0,0,0,0,0,0,...,180.0944,70.740,0.000,73.692,206.52864,275.81200,67.22624,274.994,0.00,0.000000
2,1.0,1,0,0,0,0,0,0,0,0,...,286.0080,0.000,7.052,10.080,144.46592,221.92000,101.92000,274.629,0.00,0.000000
3,1.0,1,0,0,0,0,0,0,0,0,...,208.7968,6.684,96.968,98.336,203.79968,256.47600,69.98464,275.917,0.00,0.283333
4,1.0,1,0,0,0,0,0,0,0,0,...,287.3808,0.360,0.000,0.368,139.80416,240.25300,94.24448,277.842,0.00,0.283333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,1.0,0,0,0,0,0,0,0,0,0,...,356.1088,89.456,0.000,92.420,468.04480,326.53200,66.11328,288.214,2.38,2.260000
863,1.0,0,0,0,0,0,0,0,0,0,...,755.0248,15.048,0.000,16.892,191.94304,264.97300,133.83552,293.624,0.78,2.983333
864,1.0,0,0,0,0,0,0,0,0,0,...,645.3344,16.200,57.868,79.420,259.09504,305.59200,117.19168,295.492,3.62,3.556667
865,1.0,0,0,0,0,0,0,0,0,0,...,427.2560,72.608,38.572,87.900,416.54336,340.30028,76.70720,291.848,4.55,3.310000


In [138]:
lm = sm.OLS(df_hour["production"], df_predict)
model_hour_16 = lm.fit()
print(model_hour_16.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.875
Model:                            OLS   Adj. R-squared:                  0.873
Method:                 Least Squares   F-statistic:                     371.7
Date:                Sat, 18 May 2024   Prob (F-statistic):               0.00
Time:                        09:02:25   Log-Likelihood:                -1057.0
No. Observations:                 867   AIC:                             2148.
Df Residuals:                     850   BIC:                             2229.
Df Model:                          16                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [139]:
accu(df_hour["production"], model_hour_16.predict())

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,867,2.383783,2.31553,0.971368,1.827484e-14,0.209379,0.818867,0.524548,0.220048,0.220048


# Hour 17

In [140]:
mask = df["hour"] == 17
df_hour = df[mask]
df_hour.reset_index(drop=True, inplace=True)
df_hour_train = df_hour[df_hour["date"] < "2024"]
df_hour_test = df_hour[df_hour["date"] >= "2024"]
df_hour_train.reset_index(drop=True, inplace=True)
df_hour_test.reset_index(drop=True, inplace=True)

df_hour

Unnamed: 0,date,hour,dswrf_surface_37.75_34.5,dswrf_surface_37.75_34.75,dswrf_surface_37.75_35.0,dswrf_surface_37.75_35.25,dswrf_surface_37.75_35.5,dswrf_surface_38.0_34.5,dswrf_surface_38.0_34.75,dswrf_surface_38.0_35.0,...,tmp_surface_38.5_34.75,tmp_surface_38.5_35.0,tmp_surface_38.5_35.25,tmp_surface_38.5_35.5,tmp_surface_38.75_34.5,tmp_surface_38.75_34.75,tmp_surface_38.75_35.0,tmp_surface_38.75_35.25,tmp_surface_38.75_35.5,production
0,2022-01-01,17,169.42,165.62,187.44,175.32,137.44,165.38,180.36,183.08,...,274.523,273.923,273.723,268.023,277.823,277.323,276.323,275.323,274.923,0.00
1,2022-01-02,17,45.54,98.42,157.74,178.84,169.42,151.74,103.26,63.40,...,272.808,272.008,270.608,265.408,277.308,276.008,275.408,275.208,274.408,0.00
2,2022-01-03,17,215.80,220.10,215.62,203.82,198.88,217.72,214.54,216.84,...,272.068,272.068,271.468,261.768,274.968,274.368,273.168,272.968,272.968,0.00
3,2022-01-04,17,181.86,174.58,175.28,181.22,144.62,178.88,172.06,173.98,...,273.507,273.107,274.207,266.207,277.007,275.907,275.207,275.407,274.607,0.00
4,2022-01-05,17,218.66,222.36,222.50,214.32,187.00,221.94,218.26,220.16,...,276.485,274.185,275.185,263.185,278.985,277.985,277.185,276.385,276.085,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,2024-05-13,17,606.62,508.82,613.28,441.24,213.76,491.46,330.52,266.28,...,289.398,285.998,286.298,281.198,293.098,290.998,289.298,290.098,288.798,1.30
863,2024-05-14,17,697.96,701.16,706.54,644.44,381.42,697.34,696.14,699.62,...,290.476,290.876,290.876,287.476,292.276,292.376,291.776,291.176,290.776,1.33
864,2024-05-15,17,680.60,647.24,516.74,442.98,174.36,685.66,682.56,682.96,...,292.312,291.212,292.312,288.312,295.212,295.112,294.812,293.412,293.512,0.83
865,2024-05-16,17,534.96,470.48,323.78,468.90,616.26,392.76,380.46,218.62,...,287.915,288.815,289.015,287.415,291.015,292.815,293.415,293.315,293.515,1.27


In [141]:
df_hour_month_train = df_hour_train["date"].dt.month
df_hour_month_train = pd.get_dummies(df_hour_month_train, prefix="month")
df_hour_month_train.drop(columns=["month_12"], inplace=True)
df_hour_month_train

df_model_hour = df_hour_month_train.copy()
df_model_hour["trend"] = np.arange(len(df_model_hour))
df_model_hour = sm.add_constant(df_model_hour)


features_train = df_hour_train.copy()
features_train.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_train.columns[df_hour_train.columns.str.contains(feature)]
    features_train[f"mean_{feature}"] = df_hour_train[feature_col].mean(axis=1)
features_train = features_train.filter(regex=r'^mean_')
features_train

df_model_hour= pd.concat([df_model_hour, features_train], axis=1)

df_model_hour

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,mean_dswrf_surface,mean_tcdc_low.cloud.layer,mean_tcdc_middle.cloud.layer,mean_tcdc_high.cloud.layer,mean_tcdc_entire.atmosphere,mean_uswrf_top_of_atmosphere,mean_csnow_surface,mean_dlwrf_surface,mean_uswrf_surface,mean_tmp_surface
0,1.0,1,0,0,0,0,0,0,0,0,...,157.6448,37.032,78.252,17.376,85.688,133.18080,0.00,258.960,47.22432,273.467
1,1.0,1,0,0,0,0,0,0,0,0,...,137.5160,66.484,36.600,0.000,72.272,156.70464,0.76,262.866,52.67264,272.476
2,1.0,1,0,0,0,0,0,0,0,0,...,205.0728,0.000,1.756,13.392,17.180,115.29024,0.00,221.424,73.93472,270.352
3,1.0,1,0,0,0,0,0,0,0,0,...,157.5056,5.848,80.380,82.016,89.380,154.18240,0.00,248.888,53.05024,272.451
4,1.0,1,0,0,0,0,0,0,0,0,...,208.0632,0.272,0.000,0.000,0.272,111.87392,0.00,239.075,69.19232,273.861
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
725,1.0,0,0,0,0,0,0,0,0,0,...,180.0656,0.028,0.000,0.000,0.028,83.73248,0.00,257.324,39.24352,278.904
726,1.0,0,0,0,0,0,0,0,0,0,...,185.2728,0.224,0.000,0.000,0.236,85.07264,0.00,243.435,40.34432,277.550
727,1.0,0,0,0,0,0,0,0,0,0,...,192.5992,0.000,0.000,0.000,0.000,85.02080,0.00,238.503,42.19648,276.938
728,1.0,0,0,0,0,0,0,0,0,0,...,189.1200,0.000,0.000,33.320,35.204,89.46752,0.00,243.523,40.65920,277.070


In [142]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
result = lm.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.568
Model:                            OLS   Adj. R-squared:                  0.554
Method:                 Least Squares   F-statistic:                     42.21
Date:                Sat, 18 May 2024   Prob (F-statistic):          6.03e-113
Time:                        09:02:26   Log-Likelihood:                -976.51
No. Observations:                 730   AIC:                             1999.
Df Residuals:                     707   BIC:                             2105.
Df Model:                          22                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [143]:
drop_lst_hour_17 = ["mean_csnow_surface"]

df_model_hour.drop(columns = drop_lst_hour_17, inplace = True)

df_model_hour["lag_2"] = df_hour_train["production"].shift(2, fill_value=0)
df_model_hour["ma_3"] = df_hour_train["production"].rolling(window=3).mean()
df_model_hour.fillna(method='bfill', inplace=True)

In [144]:
lm = sm.OLS(df_hour_train["production"], df_model_hour)
model_hour_17 = lm.fit()
print(model_hour_17.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.892
Model:                            OLS   Adj. R-squared:                  0.889
Method:                 Least Squares   F-statistic:                     254.6
Date:                Sat, 18 May 2024   Prob (F-statistic):          2.96e-323
Time:                        09:02:26   Log-Likelihood:                -468.97
No. Observations:                 730   AIC:                             985.9
Df Residuals:                     706   BIC:                             1096.
Df Model:                          23                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [145]:
accu(df_hour_train["production"], model_hour_17.predict())

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,730,0.93037,1.402285,1.507234,4.829193e-14,0.121031,0.460007,0.248439,0.267032,0.267032


In [146]:
df_hour_month_test = df_hour_test["date"].dt.month
df_hour_month_test = pd.get_dummies(df_hour_month_test, prefix="month")
for i in range(5,12):
    df_hour_month_test[f"month_{i}"] = 0

df_test = df_hour_month_test.copy()
df_test["trend"] = np.arange(len(df_test))
df_test = sm.add_constant(df_test)

features_test = df_hour_test.copy()
features_test.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour_test.columns[df_hour_test.columns.str.contains(feature)]
    features_test[f"mean_{feature}"] = df_hour_test[feature_col].mean(axis=1)
features_test = features_test.filter(regex=r'^mean_')
features_test

df_test.reset_index(drop=True, inplace=True)
features_test.reset_index(drop=True, inplace=True)
df_test = pd.concat([df_test, features_test], axis=1)

df_test.drop(columns = drop_lst_hour_17, inplace = True)

df_test["lag_2"] = df_hour_test["production"].shift(2, fill_value=0)
df_test["ma_3"] = df_hour_test["production"].rolling(window=3).mean()
df_test.fillna(method='bfill', inplace=True)

df_test

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,mean_tcdc_low.cloud.layer,mean_tcdc_middle.cloud.layer,mean_tcdc_high.cloud.layer,mean_tcdc_entire.atmosphere,mean_uswrf_top_of_atmosphere,mean_dlwrf_surface,mean_uswrf_surface,mean_tmp_surface,lag_2,ma_3
0,1.0,1,0,0,0,0,0,0,0,0,...,0.732,68.668,100.000,100.000,154.59264,276.621,21.69920,278.133,0.00,0.000000
1,1.0,1,0,0,0,0,0,0,0,0,...,83.868,71.736,86.532,98.792,173.87904,317.079,15.89120,279.943,0.00,0.000000
2,1.0,1,0,0,0,0,0,0,0,0,...,17.028,2.732,99.336,99.436,145.53664,269.599,28.25024,278.726,0.00,0.000000
3,1.0,1,0,0,0,0,0,0,0,0,...,74.920,14.560,1.972,77.220,134.02240,288.063,27.87136,279.758,0.00,0.000000
4,1.0,1,0,0,0,0,0,0,0,0,...,11.344,0.620,0.068,12.380,98.27520,239.440,45.46368,277.653,0.00,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
132,1.0,0,0,0,0,0,0,0,0,0,...,87.160,68.280,0.000,90.560,409.64352,324.750,63.33056,287.406,1.50,1.033333
133,1.0,0,0,0,0,0,0,0,0,0,...,14.224,9.364,0.000,15.724,183.87392,265.093,123.37152,290.568,0.30,0.976667
134,1.0,0,0,0,0,0,0,0,0,0,...,17.192,67.296,56.676,78.460,245.66080,305.315,105.66080,292.580,1.30,1.153333
135,1.0,0,0,0,0,0,0,0,0,0,...,65.192,75.272,35.172,85.516,366.70080,334.503,73.11296,290.619,1.33,1.143333


In [147]:
accu(df_hour_test["production"], model_hour_17.predict(df_test))

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,137,0.357445,0.426077,1.192006,-0.111596,0.08664,0.167226,0.11485,0.321308,0.321308


In [148]:
df_hour_month = df_hour["date"].dt.month
df_hour_month = pd.get_dummies(df_hour_month, prefix="month")
for i in range(5,12):
    df_hour_month[f"month_{i}"] = 0

df_predict = df_hour_month.copy()
df_predict["trend"] = np.arange(len(df_predict))
df_predict = sm.add_constant(df_predict)

features = df_hour.copy()
features.drop(columns=["date", "hour", "production"], inplace=True)
lst = weather_df.columns[4:-10]
for feature in lst:
    feature_col = df_hour.columns[df_hour.columns.str.contains(feature)]
    features[f"mean_{feature}"] = df_hour[feature_col].mean(axis=1)
features = features.filter(regex=r'^mean_')

df_predict.reset_index(drop=True, inplace=True)
features.reset_index(drop=True, inplace=True)
df_predict = pd.concat([df_predict, features], axis=1)

df_predict.drop(columns = drop_lst_hour_17, inplace = True)

df_predict["lag_2"] = df_hour["production"].shift(2, fill_value=0)
df_predict["ma_3"] = df_hour["production"].rolling(window=3).mean()
df_predict.fillna(method='bfill', inplace=True)

df_predict

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,mean_tcdc_low.cloud.layer,mean_tcdc_middle.cloud.layer,mean_tcdc_high.cloud.layer,mean_tcdc_entire.atmosphere,mean_uswrf_top_of_atmosphere,mean_dlwrf_surface,mean_uswrf_surface,mean_tmp_surface,lag_2,ma_3
0,1.0,1,0,0,0,0,0,0,0,0,...,37.032,78.252,17.376,85.688,133.18080,258.960,47.22432,273.467,0.00,0.000000
1,1.0,1,0,0,0,0,0,0,0,0,...,66.484,36.600,0.000,72.272,156.70464,262.866,52.67264,272.476,0.00,0.000000
2,1.0,1,0,0,0,0,0,0,0,0,...,0.000,1.756,13.392,17.180,115.29024,221.424,73.93472,270.352,0.00,0.000000
3,1.0,1,0,0,0,0,0,0,0,0,...,5.848,80.380,82.016,89.380,154.18240,248.888,53.05024,272.451,0.00,0.000000
4,1.0,1,0,0,0,0,0,0,0,0,...,0.272,0.000,0.000,0.272,111.87392,239.075,69.19232,273.861,0.00,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,1.0,0,0,0,0,0,0,0,0,0,...,87.160,68.280,0.000,90.560,409.64352,324.750,63.33056,287.406,1.50,1.033333
863,1.0,0,0,0,0,0,0,0,0,0,...,14.224,9.364,0.000,15.724,183.87392,265.093,123.37152,290.568,0.30,0.976667
864,1.0,0,0,0,0,0,0,0,0,0,...,17.192,67.296,56.676,78.460,245.66080,305.315,105.66080,292.580,1.30,1.153333
865,1.0,0,0,0,0,0,0,0,0,0,...,65.192,75.272,35.172,85.516,366.70080,334.503,73.11296,290.619,1.33,1.143333


In [149]:
lm = sm.OLS(df_hour["production"], df_predict)
model_hour_17 = lm.fit()
print(model_hour_17.summary())

                            OLS Regression Results                            
Dep. Variable:             production   R-squared:                       0.894
Model:                            OLS   Adj. R-squared:                  0.892
Method:                 Least Squares   F-statistic:                     422.8
Date:                Sat, 18 May 2024   Prob (F-statistic):               0.00
Time:                        09:02:26   Log-Likelihood:                -493.00
No. Observations:                 867   AIC:                             1022.
Df Residuals:                     849   BIC:                             1108.
Df Model:                          17                                         
Covariance Type:            nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
const           

In [150]:
accu(df_hour["production"], model_hour_17.predict())

Unnamed: 0,n,mean,sd,CV,FBias,MAPE,RMSE,MAD,MADP,WMAPE
0,867,0.839839,1.314549,1.56524,2.309186e-14,0.109779,0.427284,0.220971,0.263111,0.263111


# Hour 18

# Complete Model

In [151]:
cut_off_date = production_df["date"].iloc[-1]
next_day = cut_off_date + timedelta(days=2)
predict_df = predict_df[predict_df["date"] == next_day]

In [152]:
features = predict_df.copy()
features.drop(columns=["date", "hour"], inplace=True)
lst = weather_df.columns[4:-10]

for feature in lst:
    feature_col = predict_df.columns[predict_df.columns.str.contains(feature)]
    features[f"mean_{feature}"] = predict_df[feature_col].mean(axis=1)

features = features.filter(regex=r'^mean_')
features.reset_index(drop=True, inplace=True)

df_month = predict_df["date"].dt.month
df_month = pd.get_dummies(df_month, prefix="month")

for i in range(1,13):
    if i != 5:
        df_month[f"month_{i}"] = 0

column_order = ['month_' + str(i) for i in range(1, 13)] + ['trend']  
df_month = df_month.reindex(columns=column_order)   

df_predict = df_month.copy()
df_predict["trend"] = np.arange(len(df_predict))
df_predict.insert(0, 'const', 1)


df_predict.reset_index(drop=True, inplace=True)
df_concat = pd.concat([df_predict, features], axis=1)

production_2_day_before = production_df["production"].iloc[-2*24:-24]

df_concat["lag_2"] = production_2_day_before.values

last_3_production = production_df["production"].iloc[-3*24:]
last_3_production.reset_index(drop=True, inplace=True)

moving_avg_3 = [(last_3_production[i] + last_3_production[i+24] + last_3_production[i + 48])/3 for i in range(len(last_3_production)-48)]

df_concat["ma_3"] = moving_avg_3

df_concat

Unnamed: 0,const,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,...,mean_tcdc_middle.cloud.layer,mean_tcdc_high.cloud.layer,mean_tcdc_entire.atmosphere,mean_uswrf_top_of_atmosphere,mean_csnow_surface,mean_dlwrf_surface,mean_uswrf_surface,mean_tmp_surface,lag_2,ma_3
0,1,0,0,0,0,1,0,0,0,0,...,17.892,6.064,26.308,0.0,0.0,315.692,0.0,286.537,0.0,0.0
1,1,0,0,0,0,1,0,0,0,0,...,22.044,9.036,32.764,0.0,0.0,317.62252,0.0,285.819,0.0,0.0
2,1,0,0,0,0,1,0,0,0,0,...,29.236,9.568,39.86,0.0,0.0,320.903,0.0,286.159,0.0,0.0
3,1,0,0,0,0,1,0,0,0,0,...,36.38,12.38,46.808,0.0,0.0,324.835,0.0,286.378,0.0,0.0
4,1,0,0,0,0,1,0,0,0,0,...,68.652,65.576,89.624,0.0,0.0,341.979,0.0,286.05,0.04,0.053333
5,1,0,0,0,0,1,0,0,0,0,...,61.392,81.02,93.388,0.0,0.0,335.842,0.0,285.259,0.6,0.733333
6,1,0,0,0,0,1,0,0,0,0,...,49.096,86.852,95.216,5.25184,0.0,329.215,0.52928,285.179,2.89,2.753333
7,1,0,0,0,0,1,0,0,0,0,...,37.816,90.144,96.412,33.68896,0.0,323.959,5.40608,288.032,6.45,5.753333
8,1,0,0,0,0,1,0,0,0,0,...,31.476,91.8,96.932,65.97568,0.0,322.273,14.84544,291.338,8.39,7.66
9,1,0,0,0,0,1,0,0,0,0,...,30.496,92.592,97.048,90.67712,0.0,320.547,27.59488,295.248,8.39,9.21


In [153]:
prediction = [0] * 4

df_concat_hour_5 = df_concat.copy()
df_concat_hour_5.drop(columns = drop_lst_hour_5, inplace = True)
prediction.append(model_hour_5.predict(df_concat_hour_5.iloc[5]).iloc[0])

df_concat_hour_6 = df_concat.copy()
df_concat_hour_6.drop(columns = drop_lst_hour_6, inplace = True)
prediction.append(model_hour_6.predict(df_concat_hour_6.iloc[6]).iloc[0])

df_concat_hour_7 = df_concat.copy()
df_concat_hour_7.drop(columns = drop_lst_hour_7, inplace = True)
prediction.append(model_hour_7.predict(df_concat_hour_7.iloc[7]).iloc[0])

df_concat_hour_8 = df_concat.copy()
df_concat_hour_8.drop(columns = drop_lst_hour_8, inplace = True)
prediction.append(model_hour_8.predict(df_concat_hour_8.iloc[8]).iloc[0])

df_concat_hour_9 = df_concat.copy()
df_concat_hour_9.drop(columns = drop_lst_hour_9, inplace = True)
prediction.append(model_hour_9.predict(df_concat_hour_9.iloc[9]).iloc[0])

df_concat_hour_10 = df_concat.copy()
df_concat_hour_10.drop(columns = drop_lst_hour_10, inplace = True)
prediction.append(model_hour_10.predict(df_concat_hour_10.iloc[10]).iloc[0])

df_concat_hour_11 = df_concat.copy()
df_concat_hour_11.drop(columns = drop_lst_hour_11, inplace = True)
prediction.append(model_hour_11.predict(df_concat_hour_11.iloc[11]).iloc[0])

df_concat_hour_12 = df_concat.copy()
df_concat_hour_12.drop(columns = drop_lst_hour_12, inplace = True)
prediction.append(model_hour_12.predict(df_concat_hour_12.iloc[12]).iloc[0])

df_concat_hour_13 = df_concat.copy()
df_concat_hour_13.drop(columns = drop_lst_hour_13, inplace = True)
prediction.append(model_hour_13.predict(df_concat_hour_13.iloc[13]).iloc[0])

df_concat_hour_14 = df_concat.copy()
df_concat_hour_14.drop(columns = drop_lst_hour_14, inplace = True)
prediction.append(model_hour_14.predict(df_concat_hour_14.iloc[14]).iloc[0])

df_concat_hour_15 = df_concat.copy()
df_concat_hour_15.drop(columns = drop_lst_hour_15, inplace = True)
prediction.append(model_hour_15.predict(df_concat_hour_15.iloc[15]).iloc[0])

df_concat_hour_16 = df_concat.copy()
df_concat_hour_16.drop(columns = drop_lst_hour_16, inplace = True)
prediction.append(model_hour_16.predict(df_concat_hour_16.iloc[16]).iloc[0])

df_concat_hour_17 = df_concat.copy()
df_concat_hour_17.drop(columns = drop_lst_hour_17, inplace = True)
prediction.append(model_hour_17.predict(df_concat_hour_17.iloc[17]).iloc[0])

prediction += [0] * 7
prediction = [round(i,2) for i in prediction]

formatted_list = ",".join(map(str, prediction))

print(next_day)
print("\n")
print(formatted_list)

2024-05-19 00:00:00


0,0,0,0,0.81,2.63,4.83,6.3,8.29,8.77,7.2,7.14,6.93,4.72,4.63,1.27,0.42,0,0,0,0,0,0,0
