# Modèles multimodaux autonomes

## README
Ce notebook permet la création et l'évaluation de modèles multimodaux autonomes.

Il réalise tout d'abord certaines opérations préalables (chapitre 1), dont la définition des variables globales d'exécution

## 1. Préparation

In [None]:
import sys
from pathlib import Path

project_root = Path().resolve().parent
if not project_root in [Path(p).resolve() for p in sys.path]:
    sys.path.append(str(project_root))

from src import PATHS

In [None]:
import os
import time
import pickle
import numpy as np
import pandas as pd
import seaborn as sns
from tqdm import tqdm
from matplotlib import pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.linear_model import LogisticRegression
from src.models.multimodal import MultiModalVoter, MultiModalClassWeightedVoter, MultiModalCompositeModel
from src.visualization.visualize import visual_classification_report
from lightgbm import LGBMClassifier
from src.models.model_wrappers import ModelWrapperFactory

## 2. Chargement des données

In [None]:
documents = pd.read_parquet(PATHS.metadata / "df_documents.parquet")
data_sets = pd.read_parquet(os.path.join(PATHS.metadata, "df_data_sets.parquet"))
labels = pd.read_parquet(os.path.join(PATHS.metadata, "df_encoded_labels.parquet"))
documents.shape, data_sets.shape, labels.shape

In [None]:
## pour ne travailler que sur un échantillon :
sample = pd.read_parquet(os.path.join(PATHS.metadata, 'samples', 'df_documents_sample_4k_3.parquet'))
documents = sample.join(documents)
data_sets = sample.join(data_sets)
labels = sample.join(labels)
documents.shape, data_sets.shape, labels.shape

In [None]:
X_train = documents[data_sets.data_set == "train"].index
y_train = labels[data_sets.data_set == "train"].label

X_val = documents[data_sets.data_set == "val"].index
y_val = labels[data_sets.data_set == "val"].label

X_test = documents[data_sets.data_set == "test"].index
y_test = labels[data_sets.data_set == "test"].label

## 3. Création du wrapper

In [None]:
ModelWrapperFactory.get_registered()

In [None]:
txt_model = ModelWrapperFactory.load_existing('Text-based Logistic Regressor')
img_model = ModelWrapperFactory.load_existing('Image-based LGBM')
model_wrappers = [txt_model, img_model]
clf = LogisticRegression()
model = MultiModalCompositeModel(model_wrappers, clf)

## 4. Apprentissage

In [None]:
start_time = time.time()
model.fit(X_train, y_train)
end_time = time.time()
elapsed = end_time - start_time
print(f" Temps d'exécution total : {elapsed / 60:.2f} minutes ({elapsed:.1f} secondes)")

In [None]:
path = PATHS.models / "mmo_comp_logreg_on_img-lgbm+txt-logreg.joblib"
model.save(path)

## 5. Evaluation

In [None]:
name = "Multimodal Composite LogReg on img-LGBM + txt-LogReg"
path = PATHS.models / "mmo_comp_logreg_on_img-lgbm+txt-logreg.joblib"

wrapper = ModelWrapperFactory.make_mmo_composite_wrapper(name, path)
wrapper.visual_report()