In [2]:
import torch
import pandas as pd
from pytorch_tcn import TCN
import plotly.express as px

model = TCN(
    num_inputs = 2,
    num_channels=[8, 16],
    kernel_size = 3,
    # dilation_reset: Optional[ int ] = None,
    dropout = 0.1, # 0.2, 0.3
    causal = True,
    use_norm = 'weight_norm', # 'batch_norm', 'layer_norm', 'weight_norm'
    activation = 'relu',
    kernel_initializer = 'normal', # 'xavier_normal', 'normal', 'uniform', 'kaiming_normal', 'kaiming_uniform'
    use_skip_connections = True,
    # input_shape: str = 'NCL',
    # embedding_shapes: Optional[ ArrayLike ] = None,
    # embedding_mode: str = 'add',
    # use_gate: bool = False,
    output_projection = 1,
    output_activation = 'gelu',
)
params = torch.load('tcn.pth')
model.load_state_dict(params)

df = pd.read_csv('dataset_standardized.csv')
df= df.set_index(pd.to_datetime(df['date'], format='%Y-%m-%d')).sort_index()

train_df = df["2016":"2022"]
validation_df = df["2022":"2022"]
test_df = df["2023":]

  params = torch.load('tcn.pth')


In [4]:
def get_sequence(df: pd.DataFrame, sector_mrc: str):
    """Get a sequence of total_kwh and tavg for a given sector_mrc"""

    # print(mrc)
    sequence = df[df.sector_mrc == sector_mrc]
    sequence = sequence[["total_kwh", "tavg"]]

    original_sequence = sequence.copy()
    sequence["total_kwh"] = sequence["total_kwh"].diff()

    # kwh_scaler = StandardScaler()
    # temp_scaler = StandardScaler()
    # sequence[['total_kwh']] = kwh_scaler.fit_transform(sequence[['total_kwh']])
    # sequence[['tavg']] = temp_scaler.fit_transform(sequence[['tavg']])

    # Label is the next total_kwh
    sequence["label"] = sequence.total_kwh.shift(-1)

    # Drop the last row
    sequence = sequence.dropna()

    sequence = torch.tensor(sequence.values).float()

    # print(sequence.shape)

    return sequence, original_sequence

def get_n_sequences(sector_mrc: list[str], train=True):
    """Get a tensor of sequences for a list of sector_mrcs"""
    if train:
        df = train_df
    else:
        df = test_df

    sequences = []
    kwh_scalers = []
    temp_scalers = []
    for sector_mrc in sector_mrc:
        sequence, kwh_scaler, temp_scaler = get_sequence(df, sector_mrc)
        sequences.append(sequence)
        kwh_scalers.append(kwh_scaler)
        temp_scalers.append(temp_scaler)

    stacked = torch.stack(sequences)
    return stacked, kwh_scalers, temp_scalers


In [11]:
model.eval()
with torch.no_grad():
    pred = model(X)

pred = pred.squeeze(0)
pred.shape

torch.Size([1, 94])

In [7]:
y.shape

torch.Size([1, 94])

In [3]:
tcn_preds = pd.read_csv("tcn_preds.csv")

In [5]:
tcn_preds[tcn_preds["sector_mrc"] == "AGRICOLEDrummond"]

Unnamed: 0,sector_mrc,date,label,output
0,AGRICOLEDrummond,2023-01-01,6473865.0,9879138.0
1,AGRICOLEDrummond,2023-02-01,6611757.0,9877893.0
2,AGRICOLEDrummond,2023-03-01,5573804.0,9414407.0
3,AGRICOLEDrummond,2023-04-01,5451344.0,10390560.0
4,AGRICOLEDrummond,2023-05-01,5517997.0,9411602.0
5,AGRICOLEDrummond,2023-06-01,6449576.0,9556909.0
6,AGRICOLEDrummond,2023-07-01,6136128.0,9862333.0
7,AGRICOLEDrummond,2023-08-01,5822133.0,10025440.0
8,AGRICOLEDrummond,2023-09-01,6246672.0,10719130.0
9,AGRICOLEDrummond,2023-10-01,7069067.0,10801980.0


In [12]:
pd.read_csv("arima_predictions/Drummond_AGRICOLE.csv")

Unnamed: 0,date,total_kwh,forecast
0,2023-01-01,6846496.0,7267009.0
1,2023-02-01,6473865.0,6534239.0
2,2023-03-01,6611757.0,6471910.0
3,2023-04-01,5573804.0,5626255.0
4,2023-05-01,5451344.0,5478065.0
5,2023-06-01,5517997.0,5453901.0
6,2023-07-01,6449576.0,6012117.0
7,2023-08-01,6136128.0,6172685.0
8,2023-09-01,5822133.0,5230176.0
9,2023-10-01,6246672.0,6006716.0


In [7]:

sectors = df["sector"].unique()
for mrc in ["Drummond", "Les Etchemins"]:
    for sector in sectors:
        # On importe d'abord les prévisions pré-calculées pour SARIMA
        sarima_preds = pd.read_csv(f"./arima_predictions/{mrc}_{sector}.csv")
        
        fig = px.line(sarima_preds, x="date", y="total_kwh", title=f"{mrc} - {sector}")

        sector_mrc = sector + mrc
        tcn_p = tcn_preds[tcn_preds["sector_mrc"] == sector_mrc]
        tcn_p = tcn_p.drop(columns=["sector_mrc"])

        fig.add_scatter(y=tcn_p.label, x=sarima_preds["date"], mode='lines', name="SARIMA Prediction")
        fig.add_scatter(y=tcn_p.output, x=pd.date_range(start="2023-02-01", end="2023-12-31", periods=10), mode='lines', name="TCN Prediction")
        # fig = px.line(y.squeeze())
        # fig.add_scatter(y=pred.squeeze(), mode='lines', name="pred")
        fig.show()



In [None]:
import plotly.express as px

# Plot forecast vs actual
fig = px.line(fore.rename("Prédiction"), title=f"{mrc} - {sector}")
fig.add_trace(go.Scatter(x=y.index, y=y["total_kwh"], mode="lines", name="Valeurs réelles"))
fig.update_layout(legend_title_text="Légende", title=f"Consommation mensuelle pour la MRC de {mrc} - Secteur {sector}", yaxis_title="Valeur (kWh)", xaxis_title="Date")
fig.show()


In [9]:
import pandas as pd 


df = pd.read_csv("results.csv")
df.head()

Unnamed: 0,MRC,SECTOR,mean_temp__month_rmse,mean_temp__month_mape
0,Abitibi,AGRICOLE,30310.2,0.039937
1,Matawinie,INDUSTRIEL,821941.3,0.170588
2,Matawinie,COMMERCIAL,666445.5,0.041837
3,Matawinie,AGRICOLE,178803.6,0.088196
4,Les Moulins,RÉSIDENTIEL,7458700.0,0.042321


In [10]:
# Get mean MAPE for each sector
df.groupby('SECTOR')['mean_temp__month_mape'].mean()

SECTOR
AGRICOLE          0.073025
COMMERCIAL        0.059453
INDUSTRIEL        0.154510
INSTITUTIONNEL    0.058509
RÉSIDENTIEL       0.054587
Name: mean_temp__month_mape, dtype: float64

In [11]:
# Get mean MAPE for each MRC

df.groupby('MRC')['mean_temp__month_mape'].mean().sort_values(ascending=False)

MRC
Lac-Saint-Jean-Est                  0.443822
La Haute-Gaspésie                   0.266998
Mékinac                             0.263477
La Rivière-du-Nord                  0.224387
Les Sources                         0.181747
                                      ...   
Montcalm                            0.034846
Portneuf                            0.034746
Kamouraska                          0.033210
Rimouski-Neigette                   0.032436
Administration régionale Kativik         NaN
Name: mean_temp__month_mape, Length: 88, dtype: float64

In [13]:
import plotly.express as px

px.box(df, x='SECTOR', y='mean_temp__month_mape')