In [1]:
import torch
import pandas as pd
# from pytorch_tcn import TCN
import plotly.express as px

# model = TCN(
#     num_inputs = 2,
#     num_channels=[8, 16],
#     kernel_size = 3,
#     # dilation_reset: Optional[ int ] = None,
#     dropout = 0.1, # 0.2, 0.3
#     causal = True,
#     use_norm = 'weight_norm', # 'batch_norm', 'layer_norm', 'weight_norm'
#     activation = 'relu',
#     kernel_initializer = 'normal', # 'xavier_normal', 'normal', 'uniform', 'kaiming_normal', 'kaiming_uniform'
#     use_skip_connections = True,
#     # input_shape: str = 'NCL',
#     # embedding_shapes: Optional[ ArrayLike ] = None,
#     # embedding_mode: str = 'add',
#     # use_gate: bool = False,
#     output_projection = 1,
#     output_activation = 'gelu',
# )
# params = torch.load('tcn.pth')
# model.load_state_dict(params)

df = pd.read_csv('dataset_standardized.csv')
df= df.set_index(pd.to_datetime(df['date'], format='%Y-%m-%d')).sort_index()

train_df = df["2016":"2022"]
validation_df = df["2022":"2022"]
test_df = df["2023":]

In [4]:
def get_sequence(df: pd.DataFrame, sector_mrc: str):
    """Get a sequence of total_kwh and tavg for a given sector_mrc"""

    # print(mrc)
    sequence = df[df.sector_mrc == sector_mrc]
    sequence = sequence[["total_kwh", "tavg"]]

    original_sequence = sequence.copy()
    sequence["total_kwh"] = sequence["total_kwh"].diff()

    # kwh_scaler = StandardScaler()
    # temp_scaler = StandardScaler()
    # sequence[['total_kwh']] = kwh_scaler.fit_transform(sequence[['total_kwh']])
    # sequence[['tavg']] = temp_scaler.fit_transform(sequence[['tavg']])

    # Label is the next total_kwh
    sequence["label"] = sequence.total_kwh.shift(-1)

    # Drop the last row
    sequence = sequence.dropna()

    sequence = torch.tensor(sequence.values).float()

    # print(sequence.shape)

    return sequence, original_sequence

def get_n_sequences(sector_mrc: list[str], train=True):
    """Get a tensor of sequences for a list of sector_mrcs"""
    if train:
        df = train_df
    else:
        df = test_df

    sequences = []
    kwh_scalers = []
    temp_scalers = []
    for sector_mrc in sector_mrc:
        sequence, kwh_scaler, temp_scaler = get_sequence(df, sector_mrc)
        sequences.append(sequence)
        kwh_scalers.append(kwh_scaler)
        temp_scalers.append(temp_scaler)

    stacked = torch.stack(sequences)
    return stacked, kwh_scalers, temp_scalers


In [11]:
model.eval()
with torch.no_grad():
    pred = model(X)

pred = pred.squeeze(0)
pred.shape

torch.Size([1, 94])

In [7]:
y.shape

torch.Size([1, 94])

In [73]:
from plotly.subplots import make_subplots
from plotly.graph_objects import Scatter

# Define colors for each type of prediction
colors = {
    "Valeurs réelles": "red",
    "Prévision de SARIMA": "blue",
    "Prévision du TCN": "green",
    "Prévision du LSTM": "orange"
}

global_fig = make_subplots(rows=1, cols=5, subplot_titles=["Agricole", "Résidentiel", "Commercial", "Institutionnel", "Industriel"])

def build_preds_df(sector, mrc="Drummond"):
    sarima_preds = pd.read_csv(f"./arima_predictions/{mrc}_{sector}.csv")
    sarima_preds = sarima_preds.rename(columns={"forecast": "SARIMA"})

    lstm_preds = pd.read_csv(f"./lstm_predictions/LSTM_predictions_vs_true_values_{sector}_{mrc}.csv")
    sarima_preds["LSTM"] = lstm_preds["Predictions"]

    sector_mrc = sector + mrc
    tcn_df = pd.read_csv("tcn_preds2.csv")
    tcn_df = tcn_df.melt(id_vars=["time"], value_vars=tcn_df.columns[1:], var_name="sector_mrc", value_name="output")
    tcn_df["sector_mrc"] = tcn_df["sector_mrc"].str.replace("_", "")
    tcn_p = tcn_df[tcn_df["sector_mrc"] == sector_mrc]
    tcn_p = tcn_p.drop(columns=["sector_mrc"])

    sarima_preds["TCN"] = tcn_p["output"].values

    return sarima_preds

i = 1
for sector in df["sector"].unique():
    show_legend = False if i > 1 else True
    preds_df = build_preds_df(sector)
    global_fig.add_trace(
        Scatter(x=preds_df["date"], y=preds_df["total_kwh"], mode='lines', name="Valeurs réelles", line=dict(width=6, color=colors["Valeurs réelles"]), showlegend=show_legend),
        row=1, col=i)
    global_fig.add_trace(
        Scatter(x=preds_df["date"], y=preds_df["SARIMA"], mode='lines', name="Prévision de SARIMA", line=dict(width=3, color=colors["Prévision de SARIMA"]), showlegend=show_legend),
        row=1, col=i)
    global_fig.add_trace(
        Scatter(x=pd.date_range(start="2023-01-01", end="2023-12-31", periods=10), y=preds_df["TCN"], mode='lines', name="Prévision du TCN", line=dict(width=3, color=colors["Prévision du TCN"]), showlegend=show_legend),
        row=1, col=i)
    global_fig.add_trace(
        Scatter(x=pd.date_range(start="2023-01-01", end="2023-12-31", periods=10), y=preds_df["LSTM"], mode='lines', name="Prévision du LSTM", line=dict(width=3, color=colors["Prévision du LSTM"]), showlegend=show_legend),
        row=1, col=i)

    i += 1
    if i > 5:
        break

global_fig.update_layout(
    # title="Prévisions et consommation pour la MRC de Drummond",
    # title_font=dict(size=28),
    xaxis_title="Date",
    yaxis_title="Consommation mensuelle (kWh)",
    yaxis=dict(
        title_font=dict(size=20),
    ),
    xaxis=dict(
        title_font=dict(size=20),
    ),
    legend_title="Légende",
    legend=dict(
        font=dict(
            size=20,
        ),
        yanchor="top",
        y=0.8,
    ),
    margin=dict(l=0, r=0, t=30, b=0),
    width=2000,
        annotations=[
        dict(
            font=dict(size=20),
            showarrow=False,
            text="Agricole",
            x=0.08399999999999999,
            xanchor="center",
            xref="paper",
            y=1.0,
            yanchor="bottom",
            yref="paper",
        ),
        dict(
            font=dict(size=20),
            showarrow=False,
            text="Résidentiel",
            x=0.292,
            xanchor="center",
            xref="paper",
            y=1.0,
            yanchor="bottom",
            yref="paper",
        ),
        dict(
            font=dict(size=20),
            showarrow=False,
            text="Commercial",
            x=0.5,
            xanchor="center",
            xref="paper",
            y=1.0,
            yanchor="bottom",
            yref="paper",
        ),
        dict(
            font=dict(size=20),
            showarrow=False,
            text="Institutionnel",
            x=0.708,
            xanchor="center",
            xref="paper",
            y=1.0,
            yanchor="bottom",
            yref="paper",
        ),
        dict(
            font=dict(size=20),
            showarrow=False,
            text="Industriel",
            x=0.9159999999999999,
            xanchor="center",
            xref="paper",
            y=1.0,
            yanchor="bottom",
            yref="paper",
        ),
    ],
)
global_fig.show()


In [None]:
import plotly.express as px

# Plot forecast vs actual
fig = px.line(fore.rename("Prédiction"), title=f"{mrc} - {sector}")
fig.add_trace(go.Scatter(x=y.index, y=y["total_kwh"], mode="lines", name="Valeurs réelles"))
fig.update_layout(legend_title_text="Légende", title=f"Consommation mensuelle pour la MRC de {mrc} - Secteur {sector}", yaxis_title="Valeur (kWh)", xaxis_title="Date")
fig.show()


In [9]:
import pandas as pd 


df = pd.read_csv("results.csv")
df.head()

Unnamed: 0,MRC,SECTOR,mean_temp__month_rmse,mean_temp__month_mape
0,Abitibi,AGRICOLE,30310.2,0.039937
1,Matawinie,INDUSTRIEL,821941.3,0.170588
2,Matawinie,COMMERCIAL,666445.5,0.041837
3,Matawinie,AGRICOLE,178803.6,0.088196
4,Les Moulins,RÉSIDENTIEL,7458700.0,0.042321


In [10]:
# Get mean MAPE for each sector
df.groupby('SECTOR')['mean_temp__month_mape'].mean()

SECTOR
AGRICOLE          0.073025
COMMERCIAL        0.059453
INDUSTRIEL        0.154510
INSTITUTIONNEL    0.058509
RÉSIDENTIEL       0.054587
Name: mean_temp__month_mape, dtype: float64

In [11]:
# Get mean MAPE for each MRC

df.groupby('MRC')['mean_temp__month_mape'].mean().sort_values(ascending=False)

MRC
Lac-Saint-Jean-Est                  0.443822
La Haute-Gaspésie                   0.266998
Mékinac                             0.263477
La Rivière-du-Nord                  0.224387
Les Sources                         0.181747
                                      ...   
Montcalm                            0.034846
Portneuf                            0.034746
Kamouraska                          0.033210
Rimouski-Neigette                   0.032436
Administration régionale Kativik         NaN
Name: mean_temp__month_mape, Length: 88, dtype: float64

In [13]:
import plotly.express as px

px.box(df, x='SECTOR', y='mean_temp__month_mape')