# This notebook reproduce the model introspection results
**This notebook has as prerequisite the successful execution of the experiment : [02-train_HM_final.py](../02-train_HM_final.py)**

Thus, having the [models/HM/HM.pt](../models/HM/HM.pt) file is mandatory.

Preface

In [1]:
# Snippet to point to the project root directory
# This is useful when you want to import modules from the project root directory
import os
os.chdir("..")
print(os.getcwd())

/home/juagudelo/HOMEdev/DF_HM


In [2]:
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader
import matplotlib as mlp
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.manifold import TSNE
from tqdm import tqdm
from utilities import data_pretreatment_hm
from toolbox import charge_model

Defining the hyperparameters

In [3]:
batch_size = 128
output_weeks = 6
hidden_dim = 490
n_layers = 2
ffnn_layers = 2
dropout = 0.1
lr = 7e-5
epochs = 9
clip = 5
embed_dim = [3, 3, 3, 3, 3, 3, 3]
embed_dropout = 0.4

Preparing the hyperparameters dict

In [4]:
hyperparameters_dict = {}

hp_name_list = ["batch_size", "output_weeks",
                "hidden_dim", "n_layers",
                "ffnn_layers", "dropout",
                "lr", "epochs", "clip",
                "embed_dim", "embed_dropout"]

for idx, hp in enumerate([batch_size, output_weeks,
                          hidden_dim, n_layers,
                          ffnn_layers, dropout,
                          lr, epochs, clip,
                          embed_dim, embed_dropout]):
    # Add the hyperparameters to the hyperparameters dictionary
    hyperparameters_dict[hp_name_list[idx]] = hp

hyperparameters_dict["ablation_TS"] = False
hyperparameters_dict["ablation_tabular"] = False
hyperparameters_dict["ablation_attention"] = False

Importing data

In [5]:
data = data_pretreatment_hm()
X_tabular_train = data["X_tabular_train"]
X_tabular_cat_train = data["X_tabular_cat_train"]
X_time_train = data["X_time_train"]
y_target_train = data["y_target_train"]
X_tabular_valid = data["X_tabular_validation"]
X_tabular_cat_valid = data["X_tabular_cat_validation"]
X_time_valid = data["X_time_validation"]
y_target_valid = data["y_target_validation"]
valid_fips = data["valid_fips"]
X_tabular_test = data["X_tabular_test"]
X_tabular_cat_test = data["X_tabular_cat_test"]
X_time_test = data["X_time_test"]
y_target_test = data["y_target_test"]
test_fips = data["test_fips"]

train_data = TensorDataset(
    torch.tensor(X_time_train),
    torch.tensor(X_tabular_train),
    torch.tensor(X_tabular_cat_train),
    torch.tensor(y_target_train[:, :output_weeks]),
)
valid_data = TensorDataset(
    torch.tensor(X_time_valid),
    torch.tensor(X_tabular_valid),
    torch.tensor(X_tabular_cat_valid),
    torch.tensor(y_target_valid[:, :output_weeks]),
)

train_loader = DataLoader(
    train_data, batch_size=batch_size, drop_last=False
)

valid_loader = DataLoader(
    valid_data, shuffle=False, batch_size=batch_size, drop_last=False
)

test_data = TensorDataset(
    torch.tensor(X_time_test),
    torch.tensor(X_tabular_test),
    torch.tensor(X_tabular_cat_test),
    torch.tensor(y_target_test[:, :output_weeks]),
)

test_loader = DataLoader(
    test_data, shuffle=False, batch_size=batch_size, drop_last=False
)

100%|██████████| 40/40 [00:40<00:00,  1.02s/it]
100%|██████████| 23/23 [00:00<00:00, 622.16it/s]
100%|██████████| 40/40 [00:00<00:00, 118.94it/s]
100%|██████████| 23/23 [00:00<00:00, 10351.86it/s]
100%|██████████| 40/40 [00:00<00:00, 119.15it/s]
100%|██████████| 23/23 [00:00<00:00, 10178.20it/s]


Loading the model 

In [6]:
model, device = charge_model(kind_of_model="HM",
                             model_path="models/HM/HM.pt",
                             hyperparams=hyperparameters_dict,
                             dim_info= {"static_dim": X_tabular_train.shape[1],
                                        "n_tf": X_time_train.shape[-1],
                                        "list_cat": [len(np.unique(X_tabular_cat_train[:,i])) + 1 for i in range(X_tabular_cat_train.shape[1])]
                                        },
                             )

Using device: cpu
NVIDIA T1000 8GB


# Embeddings visualization

### Inference

In [7]:
fips = []
embeddings_all = []
cat_data_all = []
target_all = []
# On extrait les embebbings sur l'ensemble de test
model.eval()
with torch.no_grad():
    for x, static, catego ,y in tqdm(
    test_loader,
    desc=" Inference",
    ):
        val_h = tuple([each.data.to(device) for each in model.init_hidden(len(x), device)])
        x, static, cat, y = x.to(device), static.to(device), catego.to(device), y.to(device)
        # On obtient les embebbings pour chaque batch
        embeddings = [emb(cat[:, i]) for i, emb in enumerate(model.embeddings)]
        x_cat = torch.cat(embeddings, dim=1)
        fips.append(static[:,14].cpu().numpy())
        embeddings_all.append(x_cat.cpu().numpy())
        cat_data_all.append(cat.cpu().numpy())
        target_all.append(y.cpu().numpy())
# Finalment on concatène les embebbings de tous les batchs
fips = np.concatenate(fips)
embeddings_all = np.concatenate(embeddings_all)
cat_data_all = np.concatenate(cat_data_all)
target_all = np.concatenate(target_all)
print(embeddings_all.shape, cat_data_all.shape, target_all.shape)

 Inference: 100%|██████████| 20/20 [00:00<00:00, 343.36it/s]

(2477, 21) (2477, 7) (2477, 6)





### Preparing the data resulting from the inference to make a t-SNE

In [8]:
df_embed = pd.DataFrame(embeddings_all)
target = pd.DataFrame(target_all, columns=[f"week_{i}" for i in range(1, 7)])
target = target.round().astype(int)
cat_data = pd.DataFrame(cat_data_all, columns=[f"SQ{i+1}" for i in range(cat_data_all.shape[1])])

### Performing t-SNE

In [None]:
tsne = TSNE(n_components=2, perplexity=110, random_state=42, verbose = 1)
X_embedded = tsne.fit_transform(df_embed)

[t-SNE] Computing 361 nearest neighbors...
[t-SNE] Indexed 2477 samples in 0.000s...
[t-SNE] Computed neighbors for 2477 samples in 0.117s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2477
[t-SNE] Computed conditional probabilities for sample 2000 / 2477
[t-SNE] Computed conditional probabilities for sample 2477 / 2477
[t-SNE] Mean sigma: 0.000000
[t-SNE] KL divergence after 250 iterations with early exaggeration: 39.122368
[t-SNE] KL divergence after 1000 iterations: -0.402381


In [10]:
num_colors = len(cat_data["SQ1"].unique())
color_map_25 = plt.cm.tab20(np.linspace(0, 1, num_colors))
from matplotlib.colors import ListedColormap
color_map_25 = ListedColormap(color_map_25)

In [11]:
mlp.rc('font', **{'family':'serif', 'serif':['Computer Modern Roman']})
params = {'backend': 'pdf',
          'axes.labelsize': 22,
          'font.size': 22,
          'legend.fontsize': 16,
          'xtick.labelsize': 18,
          'ytick.labelsize': 18,
          'text.usetex': True,
          'axes.unicode_minus': True}
mlp.rcParams.update(params)

In [12]:
plt.figure(figsize=(11.5, 10))
plt.scatter(X_embedded[:, 0], X_embedded[:, 1], label='Other Clusters', c=cat_data["SQ1"], cmap=color_map_25, s=15)
handles = [plt.Line2D([0], [0], marker='o',
                      color='w',
                      markerfacecolor=color_map_25(i),
                      markersize=13) for i in range(num_colors)]
plt.legend(handles=handles,
           labels=[i+1 for i in list(np.sort(cat_data["SQ1"].unique()))],
           title = "Nutrient \n availability \n scores")
plt.xticks([])
plt.yticks([])
plt.tight_layout()
plt.savefig("results/t-SNE_plot.pdf")

# Attention curve visualization

### Inference over the lstm + softmax

In [13]:
attention_all = []
cat_data_all = []
num_data_all = []
target_all = []
# Evaluar el modelo en el conjunto de prueba
model.eval()
with torch.no_grad():
    for x, static, catego ,y in tqdm(
    valid_loader,
    desc="Predictions :",
    ):
        val_h = tuple([each.data.to(device) for each in model.init_hidden(len(x), device)])
        x, static, cat, y = x.to(device), static.to(device), catego.to(device), y.to(device)
        # Obtener los pesos de atención
        x = x.to(dtype=torch.float32)
        lstm_out, _ = model.lstm(x, val_h)
        attention_weights = torch.softmax(model.attention(lstm_out), dim=1)

        attention_all.append(attention_weights.cpu().numpy())
        cat_data_all.append(cat.cpu().numpy())
        num_data_all.append(static.cpu().numpy())
        target_all.append(y.cpu().numpy())

attention_all = np.concatenate(attention_all)
cat_data_all = np.concatenate(cat_data_all)
num_data_all = np.concatenate(num_data_all)
target_all = np.concatenate(target_all)

Predictions :: 100%|██████████| 20/20 [00:04<00:00,  4.51it/s]


## Formating the inference results

In [14]:
unique_attentions = np.unique(attention_all, axis=0)
print(unique_attentions.shape)

(2455, 180, 1)


In [15]:
attention_all = attention_all.reshape(-1, attention_all.shape[-2])
attention_all = attention_all.transpose()
att = pd.DataFrame(attention_all).reset_index(drop=False)
att =att.melt(id_vars='index', value_name='attention')

In [16]:
plt.figure(figsize=(10, 8))
sns.lineplot(att, x='index', y='attention')
sns.despine()
plt.xlabel("Days")
plt.xticks(range(0, 181, 20))
plt.ticklabel_format(axis="y", style="sci", scilimits=(0, 0))
plt.ylabel("Mean attention weight")
plt.tight_layout()
plt.savefig("results/attention_weights.pdf")