In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt


In [2]:
# Charger les données (adapter le chemin si besoin)
df = pd.read_csv("household_power_consumption.txt", sep=";", na_values="?")

# Afficher les premières lignes
print(df.head())

# Vérifier les types de données
print(df.info())


         Date      Time  Global_active_power  Global_reactive_power  Voltage  \
0  16/12/2006  17:24:00                4.216                  0.418   234.84   
1  16/12/2006  17:25:00                5.360                  0.436   233.63   
2  16/12/2006  17:26:00                5.374                  0.498   233.29   
3  16/12/2006  17:27:00                5.388                  0.502   233.74   
4  16/12/2006  17:28:00                3.666                  0.528   235.68   

   Global_intensity  Sub_metering_1  Sub_metering_2  Sub_metering_3  
0              18.4             0.0             1.0            17.0  
1              23.0             0.0             1.0            16.0  
2              23.0             0.0             2.0            17.0  
3              23.0             0.0             1.0            17.0  
4              15.8             0.0             1.0            17.0  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2075259 entries, 0 to 2075258
Data columns (total

In [3]:
# Convertir la colonne "Date" et "Time" en datetime
df["Datetime"] = pd.to_datetime(df["Date"] + " " + df["Time"], format="%d/%m/%Y %H:%M:%S")

# Supprimer les colonnes inutiles
df.drop(["Date", "Time"], axis=1, inplace=True)

# Convertir toutes les colonnes numériques au format float
df = df.apply(pd.to_numeric, errors="coerce")

# Supprimer les lignes avec des NaN
df.dropna(inplace=True)

# Afficher les infos après nettoyage
print(df.info())


<class 'pandas.core.frame.DataFrame'>
Index: 2049280 entries, 0 to 2075258
Data columns (total 8 columns):
 #   Column                 Dtype  
---  ------                 -----  
 0   Global_active_power    float64
 1   Global_reactive_power  float64
 2   Voltage                float64
 3   Global_intensity       float64
 4   Sub_metering_1         float64
 5   Sub_metering_2         float64
 6   Sub_metering_3         float64
 7   Datetime               int64  
dtypes: float64(7), int64(1)
memory usage: 140.7 MB
None


In [4]:
# Sélectionner la variable cible
target_col = "Global_active_power"

# Normaliser les données
scaler = MinMaxScaler()
df_scaled = scaler.fit_transform(df)

# Convertir en DataFrame
df_scaled = pd.DataFrame(df_scaled, columns=df.columns)

# Définir X (features) et y (target)
X = df_scaled.drop(columns=[target_col]).values
y = df_scaled[target_col].values

# Séparer en train et test (80% train, 20% test)
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]


In [5]:
# Convertir les données en tensors PyTorch
X_train_t = torch.tensor(X_train, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)

X_test_t = torch.tensor(X_test, dtype=torch.float32)
y_test_t = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

# Créer les DataLoaders
batch_size = 64
train_dataset = TensorDataset(X_train_t, y_train_t)
test_dataset = TensorDataset(X_test_t, y_test_t)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [6]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size=50, num_layers=1):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        lstm_out, _ = self.lstm(x.unsqueeze(1))
        return self.fc(lstm_out[:, -1, :])

# Initialiser le modèle
input_size = X_train.shape[1]
model = LSTMModel(input_size)


In [7]:
# Définir l'optimiseur et la fonction de perte
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Boucle d'entraînement
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        y_pred = model(X_batch)
        loss = criterion(y_pred, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(train_loader):.4f}")


Epoch 1/10, Loss: 0.0001
Epoch 2/10, Loss: 0.0000
Epoch 3/10, Loss: 0.0000
Epoch 4/10, Loss: 0.0000
Epoch 5/10, Loss: 0.0000
Epoch 6/10, Loss: 0.0000
Epoch 7/10, Loss: 0.0000
Epoch 8/10, Loss: 0.0000
Epoch 9/10, Loss: 0.0000
Epoch 10/10, Loss: 0.0000


In [8]:
from sklearn.metrics import r2_score

model.eval()
with torch.no_grad():
    y_pred = model(X_test_t.unsqueeze(1))
    y_pred_np = y_pred.numpy().squeeze()
    y_test_np = y_test.numpy().squeeze()

    r2 = r2_score(y_test_np, y_pred_np)

print(f"R² Score: {r2:.4f}")


ValueError: LSTM: Expected input to be 2D or 3D, got 4D instead

In [None]:
def predict_next_day(model, last_day_data):
    model.eval()
    last_day_tensor = torch.tensor(last_day_data, dtype=torch.float32).unsqueeze(0)
    with torch.no_grad():
        prediction = model(last_day_tensor)
    return prediction.item()

# Exemple d'utilisation
last_day = X_test[-1]  # Dernier jour de test
next_day_pred = predict_next_day(model, last_day)

# Inverser la normalisation
next_day_pred_actual = scaler.inverse_transform([[0] * (X.shape[1]) + [next_day_pred]])[0][-1]
print(f"Prédiction du prochain jour: {next_day_pred_actual:.4f}")
