In [None]:
pip install gensim spacy torch scikit-learn


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# Charger la dataset (vérifiez le chemin et le nom du fichier extrait)
df = pd.read_csv("household_power_consumption.txt",
                 sep=';',
                 parse_dates={'Datetime': ['Date', 'Time']},
                 infer_datetime_format=True,
                 na_values=['?'])

# Pour cet exemple, nous nous concentrerons sur "Global_active_power"
# On peut supprimer les colonnes inutiles (ici, on garde "Datetime" et "Global_active_power")
df = df[['Datetime', 'Global_active_power']]

# Supprimer les lignes avec des valeurs manquantes
df.dropna(inplace=True)

# Trier par date (au cas où)
df.sort_values('Datetime', inplace=True)

# Créer la cible : "Global_active_power" du jour suivant
# Pour simplifier, nous agrégeons les mesures par jour (par exemple, la moyenne journalière)
df_daily = df.resample('D', on='Datetime').mean()
df_daily['Target'] = df_daily['Global_active_power'].shift(-1)
df_daily = df_daily.dropna()  # Supprimer la dernière journée sans target

# Normaliser les valeurs (ici, seule la colonne "Global_active_power" est normalisée)
scaler = MinMaxScaler()
df_daily[['Global_active_power']] = scaler.fit_transform(df_daily[['Global_active_power']])

print(df_daily.head())


In [None]:
def create_sequences(data, time_steps=10):
    X, y = [], []
    values = data['Global_active_power'].values
    targets = data['Target'].values
    for i in range(len(values) - time_steps):
        X.append(values[i:(i+time_steps)])
        y.append(targets[i+time_steps])
    return np.array(X), np.array(y)

time_steps = 10
X_seq, y_seq = create_sequences(df_daily, time_steps)

print("Shape des séquences :", X_seq.shape)  # (samples, time_steps)


In [None]:
# Division chronologique : 80% train, 10% validation, 10% test
total_samples = len(X_seq)
train_end = int(total_samples * 0.8)
val_end = int(total_samples * 0.9)

X_train = X_seq[:train_end]
y_train = y_seq[:train_end]
X_val = X_seq[train_end:val_end]
y_val = y_seq[train_end:val_end]
X_test = X_seq[val_end:]
y_test = y_seq[val_end:]


In [None]:
import torch
from torch.utils.data import Dataset, DataLoader

class PowerDataset(Dataset):
    def __init__(self, X, y):
        # Reshape X pour avoir la forme (samples, time_steps, features)
        self.X = torch.tensor(X, dtype=torch.float32).unsqueeze(-1)  # ici features=1
        self.y = torch.tensor(y, dtype=torch.float32).unsqueeze(1)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

batch_size = 32
train_dataset = PowerDataset(X_train, y_train)
val_dataset = PowerDataset(X_val, y_val)
test_dataset = PowerDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [None]:
import torch.nn as nn

class PowerPredictor(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout, output_size):
        super(PowerPredictor, self).__init__()
        self.gru = nn.GRU(input_size, hidden_size, num_layers, dropout=dropout, batch_first=True)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # x shape: (batch, time_steps, input_size)
        out, _ = self.gru(x)
        out = out[:, -1, :]  # dernière sortie de la séquence
        out = self.dropout(out)
        out = self.fc(out)
        return out

input_size = 1  # Une seule feature (Global_active_power)
hidden_size = 50
num_layers = 2
dropout = 0.2
output_size = 1

model = PowerPredictor(input_size, hidden_size, num_layers, dropout, output_size)
print(model)


In [None]:
import torch.optim as optim

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [None]:
from sklearn.metrics import r2_score

model.eval()
predictions = []
actuals = []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = model(X_batch)
        predictions.extend(outputs.squeeze().tolist())
        actuals.extend(y_batch.squeeze().tolist())

r2 = r2_score(actuals, predictions)
print("R² score sur le test set :", r2)


In [None]:
import pickle

with open('minmax_scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)
