In [245]:
import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import joblib
from sklearn.tree import DecisionTreeRegressor

In [225]:
df = pd.read_csv("5G_energy_consumption_dataset.csv")

In [231]:
df.head()

Unnamed: 0,Energy,load,ESMODE,TXpower,energy_lag_1,energy_lag_24,timestamp
24,65.171898,0.149776,0.0,7.101719,56.203288,64.275037,1672639000.0
25,89.835575,0.556403,0.0,7.101719,65.171898,55.904335,1672646000.0
26,79.372197,0.533371,0.0,7.101719,89.835575,57.698057,1672650000.0
27,71.150972,0.395061,0.0,7.101719,79.372197,55.156951,1672654000.0
28,74.289985,0.311067,0.0,7.101719,71.150972,56.053812,1672657000.0


In [227]:
df['energy_lag_1'] = df['Energy'].shift(1)  
df['energy_lag_24'] = df['Energy'].shift(24) 
df = df.dropna()  

In [232]:
train_size = int(len(df) * 0.8) 
train_df, test_df = df[:train_size], df[train_size:]

In [233]:
X_train = train_df.drop('Energy', axis=1) 
y_train = train_df['Energy'] 
X_test = test_df.drop('Energy', axis=1) 
y_test = test_df['Energy'] 

In [234]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('regressor', RandomForestRegressor(random_state=42))
])

In [235]:
tscv = TimeSeriesSplit(n_splits=5) 

In [236]:
param_grid = {
    'regressor__n_estimators': [50, 100],
    'regressor__max_depth': [None, 10],
    'regressor__min_samples_split': [2, 5]
}

In [237]:
grid_search = GridSearchCV(pipeline, param_grid, cv=tscv, scoring='neg_mean_squared_error', n_jobs=-1, verbose=1)

In [207]:
print(X_train.dtypes)

Time              object
BS                object
load             float64
ESMODE           float64
TXpower          float64
energy_lag_1     float64
energy_lag_24    float64
dtype: object


In [228]:
df['timestamp'] = pd.to_datetime(df['Time'])  
df['timestamp'] = df['timestamp'].astype(int) / 10**9  # Convert to Unix timestamp

In [229]:
del df['Time']

In [230]:
del df['BS']

In [238]:
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 8 candidates, totalling 40 fits


In [239]:
best_model = grid_search.best_estimator_

In [240]:
joblib.dump(best_model, '5g_energy_model.pkl')

['5g_energy_model.pkl']

In [241]:
y_pred_test = best_model.predict(X_test)

In [242]:
rmse = np.sqrt(mean_squared_error(y_test, y_pred_test))
mae = mean_absolute_error(y_test, y_pred_test)
r2 = r2_score(y_test, y_pred_test)

In [243]:
print(f"RMSE (test set): {rmse}")
print(f"MAE (test set): {mae}")
print(f"R-squared (test set): {r2}")

RMSE (test set): 6.907607180767024
MAE (test set): 3.2258831670480346
R-squared (test set): 0.7303411396697056


In [263]:
pipeline1 = Pipeline([
    ('scaler', StandardScaler()),
    ('regressor', DecisionTreeRegressor(random_state=42))
])

In [264]:
param_grid1 = {
    'regressor__max_depth': [None, 10],
    'regressor__min_samples_split': [2, 5, 10],  # Min samples needed to split
    'regressor__min_samples_leaf': [1, 2, 5]
}

In [265]:
grid_search1 = GridSearchCV(pipeline, param_grid, cv=tscv, scoring='neg_mean_squared_error', n_jobs=-1, verbose=1)

In [266]:
grid_search1 = GridSearchCV(pipeline1, param_grid1, cv=tscv, scoring='neg_mean_squared_error', n_jobs=-1, verbose=1)

In [267]:
grid_search1.fit(X_train, y_train)

Fitting 5 folds for each of 18 candidates, totalling 90 fits


In [271]:
rmse1 = np.sqrt(mean_squared_error(y_test, y_pred_test1))
mae1 = mean_absolute_error(y_test, y_pred_test1)
r21 = r2_score(y_test, y_pred_test1)

In [272]:
print(f"RMSE (test set): {rmse1}")
print(f"MAE (test set): {mae1}")
print(f"R-squared (test set): {r21}")

RMSE (test set): 8.0789100596042
MAE (test set): 3.5883246824885244
R-squared (test set): 0.6311370997474


In [270]:
y_pred_test1 = best_model1.predict(X_test)

In [269]:
best_model1 = grid_search1.best_estimator_