# Modèles multimodaux - Voting

## README
Ce notebook permet la création et l'évaluation de modèles de voting.

Il réalise tout d'abord certaines opérations préalables (chapitre 1)

Le chapitre 2 crée et évalue un modèle de voting simple, par moyennage des prédictions de 2 modèles (1 images et 1 texte)

Le chapitre 3 propose une version avancée, avec pondérations, apprises ou non, des résultats des deux modèles utilisés

## 1. Préparation

In [None]:
import sys
from pathlib import Path

project_root = Path().resolve().parent
if not project_root in [Path(p).resolve() for p in sys.path]:
    sys.path.append(str(project_root))

from src import PATHS, LABELS

In [None]:
import os
import time
import pickle
import joblib
import numpy as np
import pandas as pd
import seaborn as sns
from tqdm import tqdm
from matplotlib import pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

from src.visualization.visualize import visual_classification_report

## 2. Chargement des données

In [None]:
documents = pd.read_parquet(PATHS.metadata / "df_data_sets.parquet")
data_sets = pd.read_parquet(PATHS.metadata / "df_data_sets.parquet")
labels = pd.read_parquet(PATHS.metadata / "df_encoded_labels.parquet")

In [None]:
documents.shape, data_sets.shape, labels.shape

In [None]:
X_train = documents[data_sets.data_set == "train"].index
y_train = labels[data_sets.data_set == "train"].label

X_val = documents[data_sets.data_set == "val"].index
y_val = labels[data_sets.data_set == "val"].label

X_test = documents[data_sets.data_set == "test"].index
y_test = labels[data_sets.data_set == "test"].label

In [None]:
del documents, data_sets, labels

## 3. Chargement des modèles

In [None]:
from src.models.model_wrappers import ModelWrapperFactory, AGG_FN

In [None]:
ModelWrapperFactory.get_registered()

In [None]:
txt_model = ModelWrapperFactory.load_existing('Text-based Logistic Regressor')
img_model = ModelWrapperFactory.load_existing('Image-based LGBM')

## 5. Voting simple

### 5.1. Averaging 

In [None]:
multimodal_voter = ModelWrapperFactory.make_mmo_voter_wrapper(
    name="Averaging Voter",
    model_wrappers=[txt_model, img_model],
    agg_fn=AGG_FN.AVERAGE
)
multimodal_voter.visual_report()

### 5.2. Max value

In [None]:
multimodal_voter = ModelWrapperFactory.make_mmo_voter_wrapper(
    name="Max Voter",
    model_wrappers=[txt_model, img_model],
    agg_fn=AGG_FN.MAX
)
multimodal_voter.visual_report()

### 5.3. Weighted
Nous allons utiliser le jeu de validation pour déterminer la meilleure répartition des poids entre les modèles image et texte.

In [None]:

accuracies = []
for alpha in tqdm(np.linspace(0,1, 101)):
    multimodal_voter = ModelWrapperFactory.make_mmo_voter_wrapper(
        name="Weighted Voter",
        model_wrappers=[txt_model, img_model],
        agg_fn=AGG_FN.WEIGHTED,
        weights = [alpha, 1-alpha]
    )
    accuracies.append([alpha, accuracy_score(y_val, multimodal_voter.predict(X_val))])

In [None]:
plt.figure(figsize=(8,8))
plt.plot((0,0.49),(0.8421, 0.846), 'r--', lw=1)
plt.plot((0.49,0.49),(0.55, 0.846), 'r--', lw=1)
plt.plot(
    tuple(r[0] for r in accuracies),
    tuple(r[1] for r in accuracies)
)

plt.xticks(list(plt.xticks()[0]) + [0.49])
plt.yticks(list(plt.yticks()[0]) + [0.846])

# Colorer le tick x = 0.49 en rouge
for label in plt.gca().get_xticklabels():
    if label.get_text() == '0.49':
        label.set_color('red')

# Colorer le tick y = 0.8421 en rouge
for label in plt.gca().get_yticklabels():
    if label.get_text() == '0.846':
        label.set_color('red')
plt.xlim(0, 1)
plt.ylim(0.55, 0.87)
plt.grid()
plt.xlabel("Ratio\n(0 = text only // 1 = image only)")
plt.ylabel("Exactitude (données de validation)")
plt.title("Weighted voter - Exactitude en fonction du poids texte/image")

In [None]:
print("ratio |accuracy")
print("------+--------")
for al, ac in accuracies[40:60]:
    print(f"{al:.2f}  | {100*ac:.2f}%")

In [None]:
alpha = 0.49
multimodal_voter = ModelWrapperFactory.make_mmo_voter_wrapper(
    name="0.49-Weighted Voter",
    model_wrappers=[txt_model, img_model],
    agg_fn=AGG_FN.WEIGHTED,
    weights = [alpha, 1-alpha]
)

multimodal_voter.visual_report()

## 6. Pondération par classe

In [None]:
multimodal_voter = ModelWrapperFactory.make_mmo_voter_wrapper(
    name="Class-Weighted Voter",
    model_wrappers=[txt_model, img_model],
    agg_fn=AGG_FN.CLASS_WEIGHTED,
    weights=[
        txt_model.performance_summary.precisions,
        img_model.performance_summary.precisions]
)
multimodal_voter.visual_report()

In [None]:
txt_weights = np.array(txt_model.performance_summary.precisions)
img_weights = np.array(img_model.performance_summary.precisions)
summed_weights = txt_weights + img_weights
txt_weights /= summed_weights
img_weights /= summed_weights

indices = np.arange(len(txt_weights))

plt.bar(indices, np.ones(16), alpha = 0.75) # pour "sauter la couleur bleue et retrouver les couleurs des graphes précédents
plt.bar(indices, img_weights, bottom=txt_weights, label='image model', alpha = 0.75)
plt.bar(indices, txt_weights, label='text model', alpha = 0.75)

plt.ylabel('Poids')
plt.xlabel('Classe')
plt.title('Valeurs des poids par classe associés aux modèles constitutifs')
plt.xticks(indices)
plt.ylim(0, 1.05)
plt.legend(loc='lower right')
plt.grid(axis='y', linestyle='--', alpha=0.7)

plt.tight_layout()
plt.show()


In [None]:
print(" class | txt weight | img weight ")
print("-------+------------+------------")
for c, (t, i) in enumerate(zip(txt_weights, img_weights)):
    print(f"   {c:02d}  |    {t:.2f}    |    {i:.2f}")