In [1]:
#%% 
# !pip install openpyxl
# !pip install px
# !pip install gpd
# !pip install streamlit
# !pip install pydeck
# !pip install geopandas

#%% 
import streamlit as st
import pandas as pd
import plotly.express as px
import geopandas as gpd
import pydeck as pdk
import plotly.express as px
import tensorflow as tf
import numpy as np
from sklearn import preprocessing

# %% 
# 1. Lire le fichier csv
data = pd.read_excel("consommation-historique-mrc-11mars2024.xlsx")

# %%
# 2. Afficher le contenu pour l'analyser
display(data.head(25))
print(f"Les MRC presents : {data['MRC_TXT'].unique()}")

mask1 = data['MRC_TXT'] == 'Rouville' # focus sur Rouville
mask2 = data['SECTEUR'] == 'AGRICOLE' # focus sur Besoin Agricole
Rouville = data[mask1 & mask2]
Rouville.shape

display(Rouville.shape)
display(Rouville.head())

#%%
# 3. Retire les colonnes qui presentent des donnees non-necessaires ou aberrantes (Features)
Rouville['Simulation '] = np.random.randint(1, 101, size=96)

df = Rouville.copy()

df.drop(columns=['REGION_ADM_QC_TXT','MRC_TXT','ANNEE_MOIS', 'SECTEUR'], axis=1, inplace=True)
display(df.head())

df_open = df[['Total (kWh)']].to_numpy()

# Normalisation ici....
df_restant = df.drop(columns=['Total (kWh)'], axis=1)
scaler = preprocessing.StandardScaler()
df_restant = scaler.fit_transform(df_restant)

df = np.concatenate([df_open , df_restant], axis=1)


df_train = df[ 0 : int(0.8 * df.shape[0]) , : ]
df_test = df[  int(0.8 * df.shape[0]) :  , : ]

print(df_train.shape)
print(df_test.shape)
# %%
# 4. Le generateur
class Generateur(tf.keras.utils.Sequence):

    def __init__(self, dataset, batch_size=8, window_size=7):

        # Normalise le DATASET

        self.X , self.y = self.slide_window(dataset, window_size)

        self.batch_size = batch_size


    def __len__(self):
        
        return self.X.shape[0] // self.batch_size

    def __getitem__(self, idx):

        batch_x = self.X[ self.batch_size * idx : (idx+1) * self.batch_size , : ]
        batch_y = self.y[ idx * self.batch_size : (idx + 1) * self.batch_size]

        return np.asarray(batch_x).astype(np.float32) , np.asarray(batch_y).astype(np.float32)

    def slide_window(self, dataset, window_size=7):

        X, y = [], []

        # Trouver toutes les sequences de 'window_size' donnees, dans le dataset,
        # ainsi que leur 'y' (target)
        for i in range( dataset.shape[0] - window_size ):

            X.append( dataset[ i : i + window_size, : ] )
            y.append( dataset[i + window_size, 0] )

        return np.asarray(X).astype(np.float32) , np.asarray(y).astype(np.float32)

# %%
# 5. Creation du modele LSTM
def create_model(input_shape, output_shape):

    # 1. Definir une couche d'entrée (Input)
    input = tf.keras.Input(shape=input_shape)

    output = tf.keras.layers.Dense(128, activation="relu")(input) # Hidden Layer 1
    output = tf.keras.layers.Dense(256, activation="relu")(output) # Hidden Layer 2

    # Cellule(s) memoire(s)
    output = tf.keras.layers.LSTM(512)(output)

    # Descendre l'entonnoir, en diminuant le nombre de neurones par couche cachée
    output = tf.keras.layers.Dense(256, activation="relu")(output) # Hidden Layer 3
    output = tf.keras.layers.Dense(128, activation="relu")(output) # Hidden Layer 4

    # Creer notre cellule de sortie
    output = tf.keras.layers.Dense(output_shape, activation="relu")(output)

    # Retourner le modele qu'on vient de créer
    return tf.keras.Model(inputs = input , outputs = output)


# Creer une instance du modele
model = create_model(
    input_shape = (7, 2),
    output_shape = 1)

model.summary()

# %%
# 6. Entrainnement 
model.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss = "mean_squared_error",
    metrics = [ "mean_absolute_error" ]
)


# Iterer pour faire l'entrainement
model.fit(
    x = Generateur(df_train, batch_size=4),
    epochs = 100,
    validation_data = Generateur(df_test),
)
# %%
# Note: On peut rajouter les predicteurs a volonte...

Unnamed: 0,REGION_ADM_QC_TXT,MRC_TXT,ANNEE_MOIS,SECTEUR,Total (kWh)
0,Abitibi-Témiscamingue,,2016-01,AGRICOLE,360404.0
1,Abitibi-Témiscamingue,,2016-01,COMMERCIAL,15892352.0
2,Abitibi-Témiscamingue,,2016-01,INDUSTRIEL,68781139.7
3,Abitibi-Témiscamingue,,2016-01,INSTITUTIONNEL,5586289.0
4,Abitibi-Témiscamingue,,2016-01,RÉSIDENTIEL,53487411.0
5,Abitibi-Témiscamingue,,2016-02,AGRICOLE,340645.0
6,Abitibi-Témiscamingue,,2016-02,COMMERCIAL,14879018.0
7,Abitibi-Témiscamingue,,2016-02,INDUSTRIEL,64569059.6
8,Abitibi-Témiscamingue,,2016-02,INSTITUTIONNEL,5369762.0
9,Abitibi-Témiscamingue,,2016-02,RÉSIDENTIEL,46508619.0


Les MRC presents : [nan 'Abitibi' 'Abitibi-Ouest' "La Vallée-de-l'Or" 'Témiscamingue'
 'Kamouraska' 'La Matanie' 'La Matapédia' 'La Mitis' 'Les Basques'
 'Rimouski-Neigette' 'Rivière-du-Loup' 'Témiscouata' 'Charlevoix'
 'Charlevoix-Est' "L'Île-d'Orléans" 'La Côte-de-Beaupré'
 'La Jacques-Cartier' 'Portneuf' 'Arthabaska' 'Bécancour' 'Drummond'
 "L'Érable" 'Nicolet-Yamaska' 'Beauce-Centre' 'Beauce-Sartigan'
 'Bellechasse' "L'Islet" 'La Nouvelle-Beauce' 'Les Appalaches'
 'Les Etchemins' 'Lotbinière' 'Montmagny' 'Caniapiscau'
 'La Haute-Côte-Nord' 'Le Golfe-du-Saint-Laurent' 'Manicouagan' 'Minganie'
 'Sept-Rivières' 'Brome-Missisquoi' 'Coaticook' 'La Haute-Yamaska'
 'Le Granit' 'Le Haut-Saint-François' 'Le Val-Saint-François'
 'Les Sources' 'Memphrémagog' 'Avignon' 'Bonaventure' 'La Côte-de-Gaspé'
 'La Haute-Gaspésie' 'Le Rocher-Percé' "D'Autray" 'Joliette'
 "L'Assomption" 'Les Moulins' 'Matawinie' 'Montcalm' 'Antoine-Labelle'
 'Argenteuil' 'Deux-Montagnes' 'La Rivière-du-Nord' 'Les Lauren

(96, 5)

Unnamed: 0,REGION_ADM_QC_TXT,MRC_TXT,ANNEE_MOIS,SECTEUR,Total (kWh)
43086,Montérégie,Rouville,2016-01,AGRICOLE,3740925.0
43091,Montérégie,Rouville,2016-02,AGRICOLE,3437089.0
43096,Montérégie,Rouville,2016-03,AGRICOLE,3454152.0
43101,Montérégie,Rouville,2016-04,AGRICOLE,3140963.0
43106,Montérégie,Rouville,2016-05,AGRICOLE,2997899.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Rouville['Simulation '] = np.random.randint(1, 101, size=96)


Unnamed: 0,Total (kWh),Simulation
43086,3740925.0,25
43091,3437089.0,68
43096,3454152.0,18
43101,3140963.0,11
43106,2997899.0,22


(76, 2)
(20, 2)
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 7, 2)]            0         
                                                                 
 dense (Dense)               (None, 7, 128)            384       
                                                                 
 dense_1 (Dense)             (None, 7, 256)            33024     
                                                                 
 lstm (LSTM)                 (None, 512)               1574912   
                                                                 
 dense_2 (Dense)             (None, 256)               131328    
                                                                 
 dense_3 (Dense)             (None, 128)               32896     
                                                                 
 dense_4 (Dense)             (None, 1)       

<keras.callbacks.History at 0x1dea93a6b90>