In [79]:
import kagglehub
import numpy as np
import pandas as pd
import keras
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
from keras.layers import Input, Dense, Dropout, LSTM, GRU, SimpleRNN
from keras.models import Sequential

# Download latest version
path = kagglehub.dataset_download("fedesoriano/electric-power-consumption")

print("Path to dataset files:", path)

Path to dataset files: /home/aqr/.cache/kagglehub/datasets/fedesoriano/electric-power-consumption/versions/1


In [80]:
_df = pd.read_csv(path + "/powerconsumption.csv")
_df.head()

Unnamed: 0,Datetime,Temperature,Humidity,WindSpeed,GeneralDiffuseFlows,DiffuseFlows,PowerConsumption_Zone1,PowerConsumption_Zone2,PowerConsumption_Zone3
0,1/1/2017 0:00,6.559,73.8,0.083,0.051,0.119,34055.6962,16128.87538,20240.96386
1,1/1/2017 0:10,6.414,74.5,0.083,0.07,0.085,29814.68354,19375.07599,20131.08434
2,1/1/2017 0:20,6.313,74.5,0.08,0.062,0.1,29128.10127,19006.68693,19668.43373
3,1/1/2017 0:30,6.121,75.0,0.083,0.091,0.096,28228.86076,18361.09422,18899.27711
4,1/1/2017 0:40,5.921,75.7,0.081,0.048,0.085,27335.6962,17872.34043,18442.40964


## Understand Data Quality & Structure


In [81]:
_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52416 entries, 0 to 52415
Data columns (total 9 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Datetime                52416 non-null  object 
 1   Temperature             52416 non-null  float64
 2   Humidity                52416 non-null  float64
 3   WindSpeed               52416 non-null  float64
 4   GeneralDiffuseFlows     52416 non-null  float64
 5   DiffuseFlows            52416 non-null  float64
 6   PowerConsumption_Zone1  52416 non-null  float64
 7   PowerConsumption_Zone2  52416 non-null  float64
 8   PowerConsumption_Zone3  52416 non-null  float64
dtypes: float64(8), object(1)
memory usage: 3.6+ MB


In [82]:
_df.describe()

Unnamed: 0,Temperature,Humidity,WindSpeed,GeneralDiffuseFlows,DiffuseFlows,PowerConsumption_Zone1,PowerConsumption_Zone2,PowerConsumption_Zone3
count,52416.0,52416.0,52416.0,52416.0,52416.0,52416.0,52416.0,52416.0
mean,18.810024,68.259518,1.959489,182.696614,75.028022,32344.970564,21042.509082,17835.406218
std,5.815476,15.551177,2.348862,264.40096,124.210949,7130.562564,5201.465892,6622.165099
min,3.247,11.34,0.05,0.004,0.011,13895.6962,8560.081466,5935.17407
25%,14.41,58.31,0.078,0.062,0.122,26310.668692,16980.766032,13129.32663
50%,18.78,69.86,0.086,5.0355,4.456,32265.92034,20823.168405,16415.11747
75%,22.89,81.4,4.915,319.6,101.0,37309.018185,24713.71752,21624.10042
max,40.01,94.8,6.483,1163.0,936.0,52204.39512,37408.86076,47598.32636


we will work with zone one


### Computing Power Consumption Columns

In [83]:
_df["PowerConsumptionAvg"] = (
    _df["PowerConsumption_Zone1"]
    + _df["PowerConsumption_Zone2"]
    + _df["PowerConsumption_Zone3"]
) / 3
ec_df = _df.drop(
    ["PowerConsumption_Zone1", "PowerConsumption_Zone2", "PowerConsumption_Zone3"],
    axis=1,
)
ec_df.head(10)

Unnamed: 0,Datetime,Temperature,Humidity,WindSpeed,GeneralDiffuseFlows,DiffuseFlows,PowerConsumptionAvg
0,1/1/2017 0:00,6.559,73.8,0.083,0.051,0.119,23475.17848
1,1/1/2017 0:10,6.414,74.5,0.083,0.07,0.085,23106.947957
2,1/1/2017 0:20,6.313,74.5,0.08,0.062,0.1,22601.073977
3,1/1/2017 0:30,6.121,75.0,0.083,0.091,0.096,21829.74403
4,1/1/2017 0:40,5.921,75.7,0.081,0.048,0.085,21216.815423
5,1/1/2017 0:50,5.853,76.9,0.081,0.059,0.108,20723.781327
6,1/1/2017 1:00,5.641,77.7,0.08,0.048,0.096,20312.45355
7,1/1/2017 1:10,5.496,78.2,0.085,0.055,0.093,19855.583747
8,1/1/2017 1:20,5.678,78.1,0.081,0.066,0.141,19343.53977
9,1/1/2017 1:30,5.491,77.3,0.082,0.062,0.111,19004.306757


### Sort with Datetime

In [84]:
ec_df["Datetime"] = pd.to_datetime(ec_df["Datetime"])
print(ec_df["Datetime"].notna().count())
ec_df.sort_values("Datetime", inplace=True, ascending=True)
print(ec_df.head(10))

52416
             Datetime  Temperature  ...  DiffuseFlows  PowerConsumptionAvg
0 2017-01-01 00:00:00        6.559  ...         0.119         23475.178480
1 2017-01-01 00:10:00        6.414  ...         0.085         23106.947957
2 2017-01-01 00:20:00        6.313  ...         0.100         22601.073977
3 2017-01-01 00:30:00        6.121  ...         0.096         21829.744030
4 2017-01-01 00:40:00        5.921  ...         0.085         21216.815423
5 2017-01-01 00:50:00        5.853  ...         0.108         20723.781327
6 2017-01-01 01:00:00        5.641  ...         0.096         20312.453550
7 2017-01-01 01:10:00        5.496  ...         0.093         19855.583747
8 2017-01-01 01:20:00        5.678  ...         0.141         19343.539770
9 2017-01-01 01:30:00        5.491  ...         0.111         19004.306757

[10 rows x 7 columns]


### Remove Datetime Column

In [85]:
ecdf_timeindex = ec_df.drop("Datetime", axis=1)
ecdf_timeindex.corr()

Unnamed: 0,Temperature,Humidity,WindSpeed,GeneralDiffuseFlows,DiffuseFlows,PowerConsumptionAvg
Temperature,1.0,-0.460243,0.477109,0.460294,0.196522,0.488238
Humidity,-0.460243,1.0,-0.135853,-0.468138,-0.256886,-0.299059
WindSpeed,0.477109,-0.135853,1.0,0.133733,-0.000972,0.221706
GeneralDiffuseFlows,0.460294,-0.468138,0.133733,1.0,0.564718,0.150368
DiffuseFlows,0.196522,-0.256886,-0.000972,0.564718,1.0,0.032068
PowerConsumptionAvg,0.488238,-0.299059,0.221706,0.150368,0.032068,1.0


## Preprocessing Steps


In [86]:
x = ecdf_timeindex.drop("PowerConsumptionAvg", axis=1).values
y = ecdf_timeindex["PowerConsumptionAvg"].values
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.15, random_state=42)
print(f"Train set shape: {xtrain.shape}")
print(f"Test set shape: {xtest.shape}")

Train set shape: (44553, 5)
Test set shape: (7863, 5)


### Scaling

In [87]:
# رعایت اصل عدم تداخل داده تست و یادگیری و استانداردسازی
scaler = StandardScaler().fit(xtrain)
xtrain_scaled = scaler.transform(xtrain)
xtest_scaled = scaler.transform(xtest)

xtrain_scaled = xtrain_scaled.astype(np.float32)
xtest_scaled = xtest_scaled.astype(np.float32)

print(xtest_scaled[:5])

[[ 0.67960966 -2.4792528  -0.797708    1.6307168   4.750212  ]
 [-0.49832723 -0.8882141  -0.797708   -0.6213205  -0.44979367]
 [-0.6429861  -0.7085703  -0.8015402   0.80411136 -0.2780338 ]
 [-1.1010727   0.26176372 -0.7994112  -0.6932869  -0.6044098 ]
 [-0.72909266  0.7961879  -0.799837   -0.693453   -0.60395867]]


### TargetScaling

In [88]:
target_scaler = MinMaxScaler(feature_range=(0, 10)).fit(ytrain.reshape(-1, 1))
ytrain_scaled = target_scaler.transform(ytrain.reshape(-1, 1))
ytest_scaled = target_scaler.transform(ytest.reshape(-1, 1))

# Training With DeepModel

In [None]:
from keras.activations import relu, leaky_relu, elu, gelu, selu, tanh
import random, string


class DeepModel:
    def __init__(
        self,
        input_shape,
        hidden_layer_count=4,
        units_per_layer=[128, 128, 64, 32],
        activation_per_layer=(leaky_relu, leaky_relu, leaky_relu, relu),
        optimizer="adam",
        save_model=False
    ):
        if len(units_per_layer) != hidden_layer_count:
            raise ValueError("Length of units_per_layer must match hidden_layer_count")
        if len(activation_per_layer) != hidden_layer_count:
            raise ValueError(
                "Length of activation_per_layer must match hidden_layer_count"
            )

        self.model = Sequential()
        self.model.add(Input(shape=input_shape))
        for i in range(hidden_layer_count):
            self.model.add(
                Dense(units=units_per_layer[i], activation=activation_per_layer[i])
            )
        self.model.add(Dense(1))
        self.model.compile(optimizer=optimizer, loss="mse")
        self.callbacks = [
            keras.callbacks.EarlyStopping(monitor="val_loss", patience=10, verbose=1),
            keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.2, patience=5, verbose=1),
        ]
        self.modelname = ''.join(random.choices(string.ascii_letters + string.digits, k=6))
        self.save_model = save_model

    def learn(self, x_train, y_train):
        modelhistory = self.model.fit(x_train, y_train, validation_split=0.2, epochs=100, batch_size=128, callbacks=self.callbacks)
        if self.save_model:
            self.model.save(f"deep_model_{self.modelname}.keras")
        return modelhistory

    # x should already be scaled
    def predict(self, x):
        return self.model.predict(x)
    # x,y most be scaled
    def evaluate(self, x_test, y_test):
        return self.model.evaluate(x_test, y_test)

In [99]:
deep_one = DeepModel(input_shape=(xtrain_scaled.shape[1],), optimizer=keras.optimizers.RMSprop())
deep_one_history = deep_one.learn(xtrain_scaled, ytrain_scaled)

Epoch 1/100
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 2.2917 - val_loss: 2.1547 - learning_rate: 0.0010
Epoch 2/100
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 2.0355 - val_loss: 1.9694 - learning_rate: 0.0010
Epoch 3/100
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 1.9973 - val_loss: 2.0693 - learning_rate: 0.0010
Epoch 4/100
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 1.9778 - val_loss: 2.0454 - learning_rate: 0.0010
Epoch 5/100
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 1.9591 - val_loss: 1.9448 - learning_rate: 0.0010
Epoch 6/100
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 1.9451 - val_loss: 1.9056 - learning_rate: 0.0010
Epoch 7/100
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 1.9381 - val_loss: 1.9338 - learnin

# Training With Recurrent Neural Networks

In [None]:
# rnn_mode = {'lstm':0, 'gru':1, 'simple_rnn':2}

# rnn_model = seq