<a href="https://colab.research.google.com/github/MileneBedouhene/Smart-House-Energy-Prediction-Using-LSTM/blob/main/LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import numpy as np

In [None]:
data = pd.read_csv('train_energy.csv')
data.head()

In [None]:
data['DateTime'] = pd.to_datetime(data['DateTime'])
data.info()

In [None]:
data['Hour'] = data['DateTime'].dt.hour
data['DayOfWeek'] = data['DateTime'].dt.dayofweek
data['Month'] = data['DateTime'].dt.month
data['IsWeekend'] = data['DayOfWeek'].apply(lambda x: 1 if x >= 5 else 0)
data.info()

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data[['EnergyConsumption(kWh)']])

In [None]:
print(scaled_data[:5])

In [None]:
def create_sequences(data, sequence_length):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:i+sequence_length, 1:])
        y.append(data[i+sequence_length, 0])
    return np.array(X), np.array(y)


In [None]:
sequence_length = 24
data_values = data[[ 'Hour', 'DayOfWeek', 'Month', 'IsWeekend']].values
X, y = create_sequences(data_values, sequence_length)


split_index = int(len(X) * 0.8)
X_train, X_val = X[:split_index], X[split_index:]
y_train, y_val = y[:split_index], y[split_index:]

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

In [None]:
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.2),
    LSTM(32, return_sequences=False),
    Dropout(0.2),
    Dense(1)
])

model.compile(optimizer='adam', loss='mean_squared_error')
model.summary()


In [None]:
history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=32,
    validation_data=(X_val, y_val),
    shuffle=False  # IMPORTANT
)


In [None]:
val_loss = model.evaluate(X_val, y_val)
print(f"Validation Loss: {val_loss}")


y_pred = model.predict(X_val)

y_pred_rescaled = scaler.inverse_transform(y_pred)
y_val_rescaled = scaler.inverse_transform(y_val.reshape(-1, 1))


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.plot(y_val_rescaled, label='Vraies Valeurs')
plt.plot(y_pred_rescaled, label='Prédictions')
plt.legend()
plt.show()


In [None]:
model.save('lstm_energy_model.h5')

# Test

In [None]:
test_data = pd.read_csv('test_energy.csv')
test_data['DateTime'] = pd.to_datetime(test_data['DateTime'])

In [None]:
test_data['Hour'] = test_data['DateTime'].dt.hour
test_data['DayOfWeek'] = test_data['DateTime'].dt.dayofweek
test_data['Month'] = test_data['DateTime'].dt.month
test_data['IsWeekend'] = test_data['DayOfWeek'].apply(lambda x: 1 if x >= 5 else 0)

In [None]:
test_data_values = test_data[['Hour', 'DayOfWeek', 'Month']].values

In [None]:
def create_test_sequences(data, sequence_length):
    X = []
    for i in range(len(data) - sequence_length):
        X.append(data[i:i+sequence_length, :])
    return np.array(X)


sequence_length = 24
X_test = create_test_sequences(test_data_values, sequence_length)

In [None]:
import tensorflow as tf
from tensorflow.keras.models import load_model

model = load_model('lstm_energy_model.h5')

print(f"Forme de X_test : {X_test.shape}")

y_test_pred = model.predict(X_test)


y_test_pred_rescaled = y_test_pred

predictions_df = pd.DataFrame({
    'DateTime': test_data['DateTime'].iloc[sequence_length:].values,
    'EnergyConsumption(kWh)': y_test_pred_rescaled.flatten()
})

print(predictions_df.head())

predictions_df.to_csv('predictions.csv', index=False)

In [None]:
predictions_df.to_csv('predictions.csv', index=False)

In [None]:
# Charger les données initiales à partir d'un fichier CSV
input_file = "predictions.csv"
df = pd.read_csv(input_file)

# Générer les nouvelles lignes avec des DateTime et la moyenne des consommations
new_rows = pd.DataFrame({
    "DateTime": pd.date_range("2017-01-01 01:00:00", "2017-01-02 00:00:00", freq="H"),
    "EnergyConsumption(kWh)": df["EnergyConsumption(kWh)"].mean(),
})

# Combiner les nouvelles lignes avec le DataFrame existant
df = pd.concat([new_rows, df], ignore_index=True)

# Sauvegarder dans un fichier CSV
output_file = "energy_consumption_updated.csv"
df.to_csv(output_file, index=False)

# Affichage du DataFrame mis à jour
print(df)