## Import Library

In [53]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, Flatten, LSTM
from sklearn.ensemble import AdaBoostRegressor

## Loading and Cleaning Data

In [54]:
file_path = 'Energy_Mgmt.xlsx'
df = pd.read_excel(file_path)

In [55]:
df.shape

(8487, 107)

In [56]:
df.describe()

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,A,A.1,A.2,A.3,V,V.1,V.2,...,V.18,V.19,V.20,V.21,V.22,V.23,V.24,V.25,V.26,Unnamed: 106
count,8487,8487,8487,8487,8487,8487,8487,8487,8487,8487,...,8487,8487,8487,8487,8487,8487,8487,8487,8487,8487
unique,8487,8487,97,95,100,111,47,21,21,21,...,2,2,2,2,2,2,2,2,2,3
top,UTC,UTCTimeZone,12:00,57,48,46,0,396,396,396,...,0,0,0,0,0,0,0,0,0,High
freq,1,1,89,766,834,729,8088,1189,1259,1161,...,8486,8486,8486,8486,8486,8486,8486,8486,8486,8485


In [57]:
df.head()

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,A,A.1,A.2,A.3,V,V.1,V.2,...,V.18,V.19,V.20,V.21,V.22,V.23,V.24,V.25,V.26,Unnamed: 106
0,UTC,UTCTimeZone,Time,Avg IL1 (A),Avg IL2 (A),Avg IL3 (A),Avg INe (A),Avg Phase-to-phase voltages U12 (V),Avg Phase-to-phase voltages U23 (V),Avg Phase-to-phase voltages U31 (V),...,Avg OutV1 (V),Max OutV1 (V),Min OutV1 (V),Avg OutV2 (V),Max OutV2 (V),Min OutV2 (V),Avg OutV3 (V),Max OutV3 (V),Min OutV3 (V),Quality
1,4/30/2021 10:14:40 PM +00:00,5/1/2021 12:14:40 AM +02:00,00:15,57,50,50,0,402,401,399,...,0,0,0,0,0,0,0,0,0,High
2,4/30/2021 10:29:40 PM +00:00,5/1/2021 12:29:40 AM +02:00,00:30,57,49,48,0,398,397,396,...,0,0,0,0,0,0,0,0,0,High
3,4/30/2021 10:44:40 PM +00:00,5/1/2021 12:44:40 AM +02:00,00:45,59,50,51,0,397,396,395,...,0,0,0,0,0,0,0,0,0,High
4,4/30/2021 10:59:40 PM +00:00,5/1/2021 12:59:40 AM +02:00,01:00,58,51,50,0,397,396,395,...,0,0,0,0,0,0,0,0,0,High


In [58]:
df.isna().sum()

Unnamed: 0      0
Unnamed: 1      0
Unnamed: 2      0
A               0
A.1             0
               ..
V.23            0
V.24            0
V.25            0
V.26            0
Unnamed: 106    0
Length: 107, dtype: int64

## Data Preprocessing

### Renaming Columns and Dropping Header Row

In [59]:
df.columns = df.iloc[0]

In [60]:
df = df.drop(0).reset_index(drop=True)

In [67]:
print(df.columns)

Index(['UTC ', 'UTCTimeZone ', 'Time ', 'Avg IL1 (A)', 'Avg IL2 (A)',
       'Avg IL3 (A)', 'Avg INe (A)', 'Avg Phase-to-phase voltages U12 (V)',
       'Avg Phase-to-phase voltages U23 (V)',
       'Avg Phase-to-phase voltages U31 (V)',
       ...
       'Avg OutV1 (V)', 'Max OutV1 (V)', 'Min OutV1 (V)', 'Avg OutV2 (V)',
       'Max OutV2 (V)', 'Min OutV2 (V)', 'Avg OutV3 (V)', 'Max OutV3 (V)',
       'Min OutV3 (V)', 'Quality '],
      dtype='object', name=0, length=107)


In [68]:
df = df.loc[:, ['Avg IL1 (A)', 'Avg IL2 (A)', 'Avg IL3 (A)', 
                 'Avg Phase-to-phase voltages U12 (V)', 
                 'Avg Phase-to-phase voltages U23 (V)', 
                 'Avg Phase-to-phase voltages U31 (V)', 
                 'Avg Total active power (kW)', 
                 'Avg Total apparent power (kVA)', 
                 'Avg Total reactive power (kVAR)', 
                 'Total active energy (kWh)']]


In [69]:
df.columns

Index(['Avg IL1 (A)', 'Avg IL2 (A)', 'Avg IL3 (A)',
       'Avg Phase-to-phase voltages U12 (V)',
       'Avg Phase-to-phase voltages U23 (V)',
       'Avg Phase-to-phase voltages U31 (V)', 'Avg Total active power (kW)',
       'Avg Total apparent power (kVA)', 'Avg Total reactive power (kVAR)',
       'Total active energy (kWh)'],
      dtype='object', name=0)

In [70]:
df.head()

Unnamed: 0,Avg IL1 (A),Avg IL2 (A),Avg IL3 (A),Avg Phase-to-phase voltages U12 (V),Avg Phase-to-phase voltages U23 (V),Avg Phase-to-phase voltages U31 (V),Avg Total active power (kW),Avg Total apparent power (kVA),Avg Total reactive power (kVAR),Total active energy (kWh)
0,57,50,50,402,401,399,35,36,-7,8
1,57,49,48,398,397,396,34,35,-6,9
2,59,50,51,397,396,395,35,35,-5,9
3,58,51,50,397,396,395,35,36,-7,8
4,57,49,50,398,397,396,34,35,-7,9


### Handling Missing Values and Cleaning

In [71]:
df = df.apply(pd.to_numeric, errors='coerce')
df = df.dropna(subset=['Total active energy (kWh)']).reset_index(drop=True)
df = df.dropna(axis=1, how='all')

In [72]:
df.head()

Unnamed: 0,Avg IL1 (A),Avg IL2 (A),Avg IL3 (A),Avg Phase-to-phase voltages U12 (V),Avg Phase-to-phase voltages U23 (V),Avg Phase-to-phase voltages U31 (V),Avg Total active power (kW),Avg Total apparent power (kVA),Avg Total reactive power (kVAR),Total active energy (kWh)
0,57,50,50,402,401,399,35,36,-7,8.0
1,57,49,48,398,397,396,34,35,-6,9.0
2,59,50,51,397,396,395,35,35,-5,9.0
3,58,51,50,397,396,395,35,36,-7,8.0
4,57,49,50,398,397,396,34,35,-7,9.0


### Feature-Target Separation

In [73]:
target_column = 'Total active energy (kWh)'
X = df.drop(columns=[target_column])
y = df[target_column]

In [74]:
print(X.shape)
X.head()

(8480, 9)


Unnamed: 0,Avg IL1 (A),Avg IL2 (A),Avg IL3 (A),Avg Phase-to-phase voltages U12 (V),Avg Phase-to-phase voltages U23 (V),Avg Phase-to-phase voltages U31 (V),Avg Total active power (kW),Avg Total apparent power (kVA),Avg Total reactive power (kVAR)
0,57,50,50,402,401,399,35,36,-7
1,57,49,48,398,397,396,34,35,-6
2,59,50,51,397,396,395,35,35,-5
3,58,51,50,397,396,395,35,36,-7
4,57,49,50,398,397,396,34,35,-7


In [75]:
print(y.shape)
y.head()

(8480,)


0    8.0
1    9.0
2    9.0
3    8.0
4    9.0
Name: Total active energy (kWh), dtype: float64

### Train-Test Split

In [76]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [77]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(6784, 9)
(1696, 9)
(6784,)
(1696,)


### Feature Scaling

In [78]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
imputer = SimpleImputer(strategy='mean')
X_train_scaled_cleaned = imputer.fit_transform(X_train_scaled)
X_test_scaled_cleaned = imputer.fit_transform(X_test_scaled)

In [80]:
nan_present_train = np.isnan(X_train_scaled_cleaned).any()
print("NaN in X_train_scaled_cleaned:", nan_present_train)

NaN in X_train_scaled_cleaned: False


In [81]:
nan_present_test = np.isnan(X_test_scaled_cleaned).any()
print("NaN in X_test_scaled_cleaned:", nan_present_test)

NaN in X_test_scaled_cleaned: False


## Model Evalution

### Model Evaluation Function

In [82]:
def evaluate_model(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    print(f"MSE: {mse}, R2: {r2}")

### CNN Model

In [83]:
X_train_cnn = X_train_scaled_cleaned.reshape(X_train_scaled_cleaned.shape[0], X_train_scaled_cleaned.shape[1], 1)
X_test_cnn = X_test_scaled.reshape(X_test_scaled_cleaned.shape[0], X_test_scaled_cleaned.shape[1], 1)

In [84]:
cnn_model = Sequential([
    Conv1D(64, kernel_size=3, activation='relu', input_shape=(X_train_cnn.shape[1], 1)),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(1)
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [85]:
cnn_model.compile(optimizer='adam', loss='mse')
cnn_model.fit(X_train_cnn, y_train, epochs=10, batch_size=32, verbose=0)
y_pred_cnn = cnn_model.predict(X_test_cnn).flatten()
evaluate_model(y_test, y_pred_cnn)

[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
MSE: 0.19403146583126116, R2: 0.9811950519360754


### LSTM Model

In [86]:
X_train_lstm = X_train_scaled_cleaned.reshape(X_train_scaled_cleaned.shape[0], X_train_scaled_cleaned.shape[1], 1)
X_test_lstm = X_test_scaled_cleaned.reshape(X_test_scaled_cleaned.shape[0], X_test_scaled_cleaned.shape[1], 1)

In [87]:
lstm_model = Sequential([
    LSTM(50, activation='relu', input_shape=(X_train_lstm.shape[1], 1)),
    Dense(1)
])

  super().__init__(**kwargs)


In [89]:
lstm_model.compile(optimizer='adam', loss='mse')
lstm_model.fit(X_train_lstm, y_train, epochs=10, batch_size=32, verbose=0)
y_pred_lstm = lstm_model.predict(X_test_lstm).flatten()
evaluate_model(y_test, y_pred_lstm)

[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
MSE: 0.22298816873035857, R2: 0.9783886550880839


### AdaBoost Model

In [90]:
adaboost_model = AdaBoostRegressor()
adaboost_model.fit(X_train_scaled_cleaned, y_train)
y_pred_adaboost = adaboost_model.predict(X_test_scaled_cleaned)
evaluate_model(y_test, y_pred_adaboost)

MSE: 0.3717620720621717, R2: 0.9639699343231999


## Summary Table

| Model        | MSE               | R2-score         |
|--------------|-------------------|------------------|
| CNN Model    | 0.19403146583126116 | 0.9811950519360754 |
| LSTM Model   | 0.22298816873035857 | 0.9783886550880839 |
| AdaBoost     | 0.3717620720621717  | 0.9639699343231999 |
