In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer

In [5]:
# Charger le dataset
df = pd.read_csv("eco2mix/eco2mix_final.csv")

In [6]:
# Convertir la colonne 'Date' en datetime
df['Date'] = pd.to_datetime(df['Date'])

In [7]:
# Extraire des informations temporelles supplémentaires
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['Day'] = df['Date'].dt.day
df['Weekday'] = df['Date'].dt.weekday

In [9]:
# Encodage des variables catégorielles
encoder = OneHotEncoder(drop='first', sparse_output=False)

In [12]:
# Encodage de la variable 'saison'
encoded_saison = encoder.fit_transform(df[['saison']])
encoded_saison_df = pd.DataFrame(encoded_saison, columns=encoder.get_feature_names_out(['saison']), index=df.index)

In [13]:
# Encodage de la variable 'Type de jour TEMPO'
encoded_temp = encoder.fit_transform(df[['Type de jour TEMPO']])
encoded_temp_df = pd.DataFrame(encoded_temp, columns=encoder.get_feature_names_out(['Type de jour TEMPO']), index=df.index)

In [14]:
# Ajouter les colonnes encodées au DataFrame
df = pd.concat([df, encoded_saison_df, encoded_temp_df], axis=1)

In [15]:
# Remplacer les variables booléennes en entiers (True = 1, False = 0)
df['weekend'] = df['weekend'].astype(int)
df['jour_ferie'] = df['jour_ferie'].astype(int)

In [16]:
# Vérifier la transformation
print(df.head())

        Date  Consommation      tmin       tmax  weekend  jour_ferie saison  \
0 2018-01-01  56302.041667  6.501796  11.188690        0           1  Hiver   
1 2018-01-02  63102.958333  6.229518  12.299398        0           0  Hiver   
2 2018-01-03  63761.270833  8.244848  14.868485        0           0  Hiver   
3 2018-01-04  63169.187500  9.100599  14.828743        0           0  Hiver   
4 2018-01-05  62318.708333  7.344311  13.331548        0           0  Hiver   

  Type de jour TEMPO  Year  Month  Day  Weekday  BLANC  BLEU  ROUGE  \
0               BLEU  2018      1    1        0    1.0   0.0    0.0   
1               BLEU  2018      1    2        1    1.0   0.0    0.0   
2               BLEU  2018      1    3        2    1.0   0.0    0.0   
3               BLEU  2018      1    4        3    1.0   0.0    0.0   
4               BLEU  2018      1    5        4    1.0   0.0    0.0   

   saison_Hiver  saison_Printemps  saison_Été  Type de jour TEMPO_BLEU  \
0           1.0         