In [1]:
# pip install h2o

In [9]:
import h2o
from h2o.automl import H2OAutoML
import pandas as pd
import numpy as np

# Initialize H2O
h2o.init()

# Load the dataset
df = pd.read_csv('modified_electricity_consumption_data.csv')
df['datetime'] = pd.to_datetime(df['datetime'])

# Aggregate daily data to monthly data
df['year_month'] = df['datetime'].dt.to_period('M')
monthly_df = df.groupby(['year_month', 'guri_num'])['total_KW'].sum().reset_index()

# Extract month and year
monthly_df['year'] = monthly_df['year_month'].dt.year
monthly_df['month'] = monthly_df['year_month'].dt.month

# Convert the DataFrame to an H2O Frame
h2o_df = h2o.H2OFrame(monthly_df)

# Convert categorical features to factors
h2o_df['guri_num'] = h2o_df['guri_num'].asfactor()
h2o_df['month'] = h2o_df['month'].asfactor()
h2o_df['year'] = h2o_df['year'].asfactor()

# Split the data into training and testing sets
train, test = h2o_df.split_frame(ratios=[.8], seed=42)

# Run H2O AutoML
aml = H2OAutoML(max_models=10, seed=1, max_runtime_secs=3600)
aml.train(x=['guri_num', 'month', 'year'], y='total_KW', training_frame=train)

# View the AutoML Leaderboard
lb = aml.leaderboard
print(lb.head(rows=lb.nrows))  # Print all rows of the leaderboard

# Make predictions on the test set
preds = aml.leader.predict(test)
print(preds.head())

# Evaluate the model
performance = aml.leader.model_performance(test)
print(performance)

# Prepare for future predictions
valid_guri_nums = monthly_df['guri_num'].unique()
future_dates = pd.date_range(start='2024-01-01', end='2028-12-31', freq='M').to_period('M')

future_df = pd.DataFrame({
    'year_month': np.tile(future_dates, len(valid_guri_nums)),
    'guri_num': np.repeat(valid_guri_nums, len(future_dates))
})

future_df['year'] = future_df['year_month'].dt.year
future_df['month'] = future_df['year_month'].dt.month

# Convert future_df to H2O Frame for prediction
future_h2o_df = h2o.H2OFrame(future_df)
future_h2o_df['guri_num'] = future_h2o_df['guri_num'].asfactor()
future_h2o_df['month'] = future_h2o_df['month'].asfactor()
future_h2o_df['year'] = future_h2o_df['year'].asfactor()

# Predict future electricity consumption
future_predictions = aml.leader.predict(future_h2o_df)
future_df['predicted_total_KW'] = h2o.as_list(future_predictions['predict'], use_pandas=True)

# Display future predictions
print(future_df.head(12))


Checking whether there is an H2O instance running at http://localhost:54321. connected.


0,1
H2O_cluster_uptime:,1 hour 1 min
H2O_cluster_timezone:,Africa/Nairobi
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.46.0.4
H2O_cluster_version_age:,"7 days, 17 hours and 38 minutes"
H2O_cluster_name:,H2O_from_python_xusee_46t06z
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,3.531 Gb
H2O_cluster_total_cores:,8
H2O_cluster_allowed_cores:,8


Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |█
13:50:39.46: AutoML: XGBoost is not available; skipping it.

██████████████████████████████████████████████████████████████| (done) 100%
model_id                                                    rmse      mse      mae     rmsle    mean_residual_deviance
StackedEnsemble_AllModels_1_AutoML_2_20240717_135039     14.1313  199.693  10.9465  0.158519                   199.693
StackedEnsemble_BestOfFamily_1_AutoML_2_20240717_135039  14.1808  201.095  10.9653  0.158215                   201.095
GBM_5_AutoML_2_20240717_135039                           14.4745  209.512  11.1769  0.160771                   209.512
GBM_grid_1_AutoML_2_20240717_135039_model_1              14.617   213.657  11.4789  0.176657                   213.657
GBM_1_AutoML_2_20240717_135039                           14.6594  214.899  11.3391  0.163273                   214.899
GBM_3_AutoML_2_20240717_135039    




   year_month  guri_num  year  month  predicted_total_KW
0     2024-01  BOO13096  2024      1           98.848583
1     2024-02  BOO13096  2024      2          135.752166
2     2024-03  BOO13096  2024      3          159.775211
3     2024-04  BOO13096  2024      4          151.503282
4     2024-05  BOO13096  2024      5          149.320036
5     2024-06  BOO13096  2024      6           95.126197
6     2024-07  BOO13096  2024      7           91.633663
7     2024-08  BOO13096  2024      8           84.557476
8     2024-09  BOO13096  2024      9           76.634571
9     2024-10  BOO13096  2024     10           79.375192
10    2024-11  BOO13096  2024     11           80.307760
11    2024-12  BOO13096  2024     12           93.147606


In [7]:

# Prepare for future predictions
valid_guri_nums = monthly_df['guri_num'].unique()
future_dates = pd.date_range(start='2018-01-01', end='2022-12-31', freq='M').to_period('M')

future_df = pd.DataFrame({
    'year_month': np.tile(future_dates, len(valid_guri_nums)),
    'guri_num': np.repeat(valid_guri_nums, len(future_dates))
})

future_df['year'] = future_df['year_month'].dt.year
future_df['month'] = future_df['year_month'].dt.month

# Convert future_df to H2O Frame for prediction
future_h2o_df = h2o.H2OFrame(future_df)
future_h2o_df['guri_num'] = future_h2o_df['guri_num'].asfactor()
future_h2o_df['month'] = future_h2o_df['month'].asfactor()
future_h2o_df['year'] = future_h2o_df['year'].asfactor()

# Predict future electricity consumption
future_predictions = aml.leader.predict(future_h2o_df)
future_df['predicted_total_KW'] = h2o.as_list(future_predictions['predict'], use_pandas=True)

# Display future predictions
print(future_df.head(12))

Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%





   year_month  guri_num  year  month  predicted_total_KW
0     2018-01  BOO13096  2018      1          102.536671
1     2018-02  BOO13096  2018      2          129.821393
2     2018-03  BOO13096  2018      3          153.223997
3     2018-04  BOO13096  2018      4          142.922192
4     2018-05  BOO13096  2018      5          143.456121
5     2018-06  BOO13096  2018      6           98.712847
6     2018-07  BOO13096  2018      7          102.230077
7     2018-08  BOO13096  2018      8          102.545148
8     2018-09  BOO13096  2018      9           98.343207
9     2018-10  BOO13096  2018     10          101.138740
10    2018-11  BOO13096  2018     11           97.577830
11    2018-12  BOO13096  2018     12          102.044688


In [10]:
# Display future predictions
print(future_df.head(48))

   year_month  guri_num  year  month  predicted_total_KW
0     2024-01  BOO13096  2024      1           98.848583
1     2024-02  BOO13096  2024      2          135.752166
2     2024-03  BOO13096  2024      3          159.775211
3     2024-04  BOO13096  2024      4          151.503282
4     2024-05  BOO13096  2024      5          149.320036
5     2024-06  BOO13096  2024      6           95.126197
6     2024-07  BOO13096  2024      7           91.633663
7     2024-08  BOO13096  2024      8           84.557476
8     2024-09  BOO13096  2024      9           76.634571
9     2024-10  BOO13096  2024     10           79.375192
10    2024-11  BOO13096  2024     11           80.307760
11    2024-12  BOO13096  2024     12           93.147606
12    2025-01  BOO13096  2025      1           98.848583
13    2025-02  BOO13096  2025      2          135.752166
14    2025-03  BOO13096  2025      3          159.775211
15    2025-04  BOO13096  2025      4          151.503282
16    2025-05  BOO13096  2025  

In [5]:
# Save the future predictions to CSV
future_df.to_csv('future_predictions.csv', index=False)