In [1]:
import torch
import torch.nn as nn
from utilities import DataSet
import pandas as pd
from dl_models.GCN_based_model import graphconv,MGCN_conv

#### Init : 

In [2]:
freq  ='3min'
freq_min = int(freq.split('min')[0])

time_step_per_hour = int(60/freq_min) #3min agg

historical_len = 7
Days = 1
Weeks = 1
step_ahead = 10


# Load Data

### Load Dataset :

In [3]:
csv_path = 'data/Sub_Tram_11_2019_03_2020/1_month_df_subway.csv'
subway_nov = pd.read_csv(csv_path)

#### Keep only lane 'A'. Fill empty value by '0' :

In [4]:
sub_df = subway_nov[subway_nov['lane'] =='A'][['COD_TRG','Flow','VAL_DATE']]
sub_df = sub_df.groupby(['COD_TRG','VAL_DATE']).sum()
sub_df = sub_df.reset_index()
sub_df.VAL_DATE = pd.to_datetime(sub_df.VAL_DATE) 

# Reindex date
start,end = sub_df.VAL_DATE.iloc[0],sub_df.VAL_DATE.iloc[-1]
date_index = pd.date_range(start = start,end = end, freq = '3min')
sub_df = sub_df.pivot(index = 'VAL_DATE',columns = 'COD_TRG',values = 'Flow')
sub_df = sub_df.reindex(date_index).fillna(0)

#Reindex columns :
stations = ['PER','AMP','BEL','COR','HOT','FOC','MAS','CHA','REP','GRA','FLA','CUS','BON','SOI']
sub_df = sub_df[stations]
sub_df.head()

COD_TRG,PER,AMP,BEL,COR,HOT,FOC,MAS,CHA,REP,GRA,FLA,CUS,BON,SOI
2019-11-01 00:00:00,10.0,10.0,29.0,14.0,43.0,2.0,13.0,4.0,6.0,19.0,2.0,0.0,1.0,1.0
2019-11-01 00:03:00,28.0,2.0,18.0,32.0,41.0,3.0,7.0,16.0,2.0,6.0,0.0,2.0,2.0,1.0
2019-11-01 00:06:00,19.0,5.0,23.0,16.0,41.0,5.0,10.0,7.0,1.0,9.0,1.0,0.0,0.0,0.0
2019-11-01 00:09:00,19.0,3.0,29.0,8.0,64.0,0.0,12.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0
2019-11-01 00:12:00,13.0,0.0,25.0,18.0,34.0,3.0,3.0,15.0,1.0,1.0,0.0,3.0,0.0,1.0


# Build Inputs : 

### Build Feature Vector :  

In [5]:
Ut = DataSet(sub_df,time_step_per_hour=time_step_per_hour)
norm_Ut = Ut.normalize()    # Normalize before getting the "Feature vector"  (or "Feature Tensor")
(X,Y,dates_verif) = norm_Ut.get_feature_vect(step_ahead,historical_len,Days,Weeks)
print('Feature vector shape: ',X.shape)   # Nb Sample, Nb Nodes, Sequence Length
dates_verif.head()

Feature vector shape:  torch.Size([10560, 14, 9])


Unnamed: 0,t-3360,t-480,t-7,t-6,t-5,t-4,t-3,t-2,t-1,t
3360,2019-11-01 00:00:00,2019-11-07 00:00:00,2019-11-07 23:12:00,2019-11-07 23:15:00,2019-11-07 23:18:00,2019-11-07 23:21:00,2019-11-07 23:24:00,2019-11-07 23:27:00,2019-11-07 23:30:00,2019-11-08 00:00:00
3361,2019-11-01 00:03:00,2019-11-07 00:03:00,2019-11-07 23:15:00,2019-11-07 23:18:00,2019-11-07 23:21:00,2019-11-07 23:24:00,2019-11-07 23:27:00,2019-11-07 23:30:00,2019-11-07 23:33:00,2019-11-08 00:03:00
3362,2019-11-01 00:06:00,2019-11-07 00:06:00,2019-11-07 23:18:00,2019-11-07 23:21:00,2019-11-07 23:24:00,2019-11-07 23:27:00,2019-11-07 23:30:00,2019-11-07 23:33:00,2019-11-07 23:36:00,2019-11-08 00:06:00
3363,2019-11-01 00:09:00,2019-11-07 00:09:00,2019-11-07 23:21:00,2019-11-07 23:24:00,2019-11-07 23:27:00,2019-11-07 23:30:00,2019-11-07 23:33:00,2019-11-07 23:36:00,2019-11-07 23:39:00,2019-11-08 00:09:00
3364,2019-11-01 00:12:00,2019-11-07 00:12:00,2019-11-07 23:24:00,2019-11-07 23:27:00,2019-11-07 23:30:00,2019-11-07 23:33:00,2019-11-07 23:36:00,2019-11-07 23:39:00,2019-11-07 23:42:00,2019-11-08 00:12:00


### Build Adjacency Matrices :

In [11]:
A_indep = torch.diag(torch.ones(len(Ut.df.columns)))   # Matrice d'adjacence identité, personne n'est connecté avec personne
A_Neighbors = torch.sum(torch.stack([torch.diag(torch.ones(len(Ut.df.columns)-abs(i)),i) for i in [-1,0,1]]),dim =0)   #Une seule ligne de métro, donc tri-diagonale
A_learnable = torch.nn.Parameter(torch.randn(len(Ut.df.columns),len(Ut.df.columns)),requires_grad=True)   #Matrice d'adjacence apprentissable

gcnconv_matrix = A_indep.unsqueeze(0)
# Then convert into "Laplacian Matrix", or with "random_walk Matrix", or with another one ...
#A_indep = 
#A_Neighbors =
#A_learnable = 

## GCN

In [8]:
T,N,L = X.shape # [ Number of available time-slot ,Number of spatial unities, Historical Length ]
x = X.unsqueeze(1) # add the channel dimension (here, only "flow')
x_b = x[:32]   # Select only one batch 
n_adj =  A_indep.shape[0]

In [7]:
GCN = graphconv(c_in = x.shape[1], c_out = 64, K=2, graph_conv_act_func = 'relu',enable_bias=True)  # K =2 dans MRGNN car considère Pattern et Adj matrix
B, C, N, L = x_b.shape
K = 1
c_out = 64


print(f'B,C,N,L : [{B},{C},{N},{L}]')

B,C,N,L : [32,1,14,9]


In [12]:
c_out = 16
mgcn_model = MGCN_conv(L,c_out,n_adj)
x_b = X[:32]
AXW,embedding,reshaped_embedding = mgcn_model(x_b,gcnconv_matrix)

print("L'embedding a d'abord été opéré sur la dernière dimension (X*W, temporelle), Puis la convolution (A*(XW)) a sommmé les embedding de chacun des voisins (ou Noeud en lien avec le noeud tagret).")
print(f'X.shape: {x.shape}, AXW.shape:{AXW.shape}')

RuntimeError: einsum(): subscript b has size 448 for operand 1 which does not broadcast with previously seen size 32

In [9]:
GCN(x_b,A_Neighbors)

ValueError: not enough values to unpack (expected 4, got 2)

## GCN Détaillé : 

In [8]:
weight = nn.Parameter(torch.FloatTensor(K,C,c_out))
bias = nn.Parameter(torch.FloatTensor(c_out))
print(f'x_b: {x_b.shape}, W: {weight.shape}, Cause there are {K} adjacency matrices (K), {C} C_in and {c_out} C_out')
x_b = x_b.reshape(-1, C)  #[B, C_in, L, N] -> [BLN, C_in]
print(f'reshaped x_b: {x_b.shape}, Cause we have to flatten x_b along the Channel axis, and then pass through a SPATIAL embedding (Linear layer, on C_in of each sample,nodes,historical element)')

# Embedding on C_in:  X*W
embedd_c_in = torch.einsum('ab, cbd->cad',x_b,weight)   # [BLN,C_in], [K,C_in,C_out] -> [K,BLN,C_out]n  Propose K embedding de C_in
print('shape of reshaped_xb after K embedding on C_in: ',embedd_c_in.shape)
reshaped_embedd_c_in = embedd_c_in.view(K, B*L,N,-1)  #[K,BLN,C_out] ->  [K,BL,N,C_out] 
print('Embedded feature vect reshaped: ',reshaped_embedd_c_in.shape, '\n')

# Concat Adj Matrix 
batched_adj_matrix = A_indep.repeat(1,B*L,1,1)
print('Adjacency matrix A_indep: ',A_indep.shape,'batched Multi adj_matrix: ',batched_adj_matrix.shape)

# Convolution A*(XW)
convolutionned = torch.einsum('ecab,ecbd->ecad',batched_adj_matrix,reshaped_embedd_c_in)  #[K,BL,N1,N2] ,[K,BL,N2,C_out]  -> [K,BL,N1,C_out] 
print('Convolution A*(XW): ',convolutionned.shape)

#Add bias: 
convolutionned_n_biased = convolutionned + bias

# Reshape and Permute: 
H =convolutionned_n_biased.view(K,B,L,-1,c_out).permute(0,1,4,3,2)
print(f'Reshaped convolution output: {H.shape}')

x_b: torch.Size([32, 1, 14, 9]), W: torch.Size([1, 1, 64]), Cause there are 1 adjacency matrices (K), 1 C_in and 64 C_out
reshaped x_b: torch.Size([4032, 1]), Cause we have to flatten x_b along the Channel axis, and then pass through a SPATIAL embedding (Linear layer, on C_in of each sample,nodes,historical element)
shape of reshaped_xb after K embedding on C_in:  torch.Size([1, 4032, 64])
Embedded feature vect reshaped:  torch.Size([1, 288, 14, 64]) 

Adjacency matrix A_indep:  torch.Size([14, 14]) batched Multi adj_matrix:  torch.Size([1, 288, 14, 14])
Convolution A*(XW):  torch.Size([1, 288, 14, 64])
Reshaped convolution output: torch.Size([1, 32, 64, 14, 9])
