---
**Titre** : Cross-Modalité & Réunion Tardive  
**Auteur** : Dmitrašinović Théotime  
**Date** : 07/11/2023  
  
Il existe plusieurs méthodes de réunification des modalités:
1. Réunion précoce
2. ***Réunion tardive***
3. Réunion intermédiaire  
  
**But** :  
Dans ce NoteBook nous allons traiter la ***Réunion Tardive***.    
Il s'agit de réunir les probabilités de classification après la Classification.


**Etapes**:
1. Charger les caractéristiques des différentes modalités
2. Réunir les caractéristiques de différentes manières
3. Classifier la réunion des caractéristiques

*Des classes python sont définies pour faciliter les combinaisons.*


---

## Google Drive mount

In [1]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [2]:
import pickle
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import plotly.express as px

In [3]:
# DataFrame des individus labélisés
CSVLabelisedDataPath = "/content/gdrive/MyDrive/Projet_Multimedia/download/CSV/indice_vid_labelled.csv"
df_labels = pd.read_csv(CSVLabelisedDataPath)

In [4]:
# On supprime une vidéo car non présente dans les embeddings vidéo
df_labels.drop(df_labels[df_labels['name'] == '-_YoeHOTJBI4'].index, inplace=True)

In [5]:
# CORPUS
class Corpus:
  #df_labels = pd.read_csv(CSVLabelisedDataPath)
  def __init__(self, embeddings, separation, l, s, c, df_labels):
    self.df_labels = df_labels
    self.nom = '||Label:' + l + " ||Separation: " + s + " ||CARA: " + c
    self.nom_sep = s
    self.nom_label = l
    self.nom_cara = c

    self.embeddings = embeddings
    # separation
    self.train, self.test, self.valid = separation(self.df_labels.copy(deep=True))
    # jointure avec les embeddings
    self.join()
    # Séparation de X et y
    self.Xy()
    self.X = self.df_labels.set_index('name').join(self.embeddings).drop(columns=['label'])
    self.y = self.df_labels.label

  def join(self):
    self.train = self.train.set_index('name').join(self.embeddings)
    self.test = self.test.set_index('name').join(self.embeddings)
    self.valid = self.valid.set_index('name').join(self.embeddings)

  def Xy(self):
    self.Xtrain = self.train.drop(columns=['label'])
    self.ytrain = self.train.label
    self.Xtest = self.test.drop(columns=['label'])
    self.ytest = self.test.label
    self.Xvalid = self.valid.drop(columns=['label'])
    self.yvalid = self.valid.label

# CLASSIF
class Classif:

  def __init__(self, corpus, nom_model, accuracy, predic_proba):
    self.corpus = corpus
    self.nom = self.corpus.nom + ' ||Model_Classif:' + nom_model
    self.nom_model = nom_model
    self.accuracy = accuracy
    self.predic_proba =  predic_proba

# FONCTIONS
def split_TTV(df):
  # Split the data into train, test, and validation sets while maintaining the class distribution
  # Adjust the test_size and random_state parameters as needed
  train, temp = train_test_split(df, train_size=0.6, stratify=df.label, random_state=42)
  test, val = train_test_split(temp, test_size=0.5, stratify=temp.label, random_state=42)

  # You can access the subsets like this
  #print("Training set:", train.shape)
  #print("Testing set:", test.shape)
  #print("Validation set:", val.shape)
  return train, test, val

#Séparation 1
def not_balanced(df):
  return split_TTV(df)

#Séparation 2
def delHalfEduc(df_labels):
  # on supprime la moitié des individus avec label Education car sûr représentés

  subset_to_del = df_labels[df_labels['label']=="Education"].sample(len(df_labels[df_labels['label']=="Education"])//2, random_state=42)
  df_labels = df_labels.drop(subset_to_del.index)
  return split_TTV(df_labels)
#Séparation 2
def delHalfAccess(df_labels):
  # on supprime la moitié des individus avec label Education car sûr représentés

  subset_to_del = df_labels[df_labels['label']=="accessibility"].sample(len(df_labels[df_labels['label']=="accessibility"])//2, random_state=42)
  df_labels = df_labels.drop(subset_to_del.index)
  return split_TTV(df_labels)


#Séparation 3
# SurSampling x2 et x3
def surSamplingBasic(df, seuil=4):
  vc = df['label'].value_counts()
  sx2 = int(np.ceil(len(df)*0.02))
  lx2 = vc[vc < sx2].index
  sx3 = int(np.ceil(len(df)*0.05))
  lx3 = vc[vc < sx3].index
  # créer une copie du df auquel on rajoute des nouveaux individus
  balanced_df = df.copy(deep=True)
  for l in lx2:
    label_data = df[df['label'] == l]
    sx2 = min(len(label_data)*2, sx2)
    to_add_subset = label_data.sample(sx2-len(label_data), replace=True, random_state=42)
    balanced_df = pd.concat([balanced_df, to_add_subset])
  for l in lx3:
    label_data = df[df['label'] == l]
    sx3 = min(len(label_data)*3, sx3)
    to_add_subset = label_data.sample(sx3-len(label_data), replace=True, random_state=42)
    balanced_df = pd.concat([balanced_df, to_add_subset])
  return balanced_df
def sursamplingx2x3(df, seuil=0.05):
  train, test, val = split_TTV(df)
  train = surSamplingBasic(train, seuil)
  test = surSamplingBasic(test, seuil)
  valid = surSamplingBasic(val, seuil)
  return train, test, valid

#Séparation 4
# SurSampling et SousSampling
def SurEtSousSampling(df):
  # on supprime la moitié des individus avec label Education car sûr représentés
  train, test, valid = delHalfEduc(df)
  train = surSamplingBasic(train, 0.05)
  test = surSamplingBasic(test, 0.05)
  valid = surSamplingBasic(valid, 0.05)
  return train, test, valid

#Séparation 5
# changement de label pour les classes minoritaires
def redistribution(df):
  switch_label = {"Gaming":"Entertainment", "Travel & Events":"People & Blogs", "Nonprofits & Activism": "News & Politics"}
  true_Voiture_label = {"-AwRAfxBub9M":"Film & Animation",
                        "-1VM2eLhvsSM":"Film & Animation",
                        "-DfPMxdHZKsw":"People & Blogs",
                        "-XsVV0t_cS7Q":"People & Blogs",
                        "-GHDz-XDD8OU":"Film & Animation",
                        "-PXJnZMF3ucc":"People & Blogs",
                        "-CeN6PMEOQEA":"People & Blogs",
                        "-q6rAllJAdWk":"People & Blogs",
                        "-jHCA59ia_NE":"People & Blogs",
                        "-VsJKPdOuzZw":"Comedy"}
  for old_label in switch_label:
    df.loc[df.label==old_label, ['label']] = switch_label[old_label]
  for id in true_Voiture_label:
    df.loc[df.name==id, ['label']] = true_Voiture_label[id]
  # on delete Autos & Vehicles
  df.drop(df[df['label']=="Autos & Vehicles"].index, inplace=True)
  return df
def redisEtSurSous(df):
  df = redistribution(df)
  return SurEtSousSampling(df)



from sklearn.metrics import confusion_matrix
def show_conf_matrix(y_test, pred):
  list_cat= list(set(y_test))
  conf_matrix = confusion_matrix(y_test, pred, labels=list_cat)
  cf = [[i/sum(ligne) for i in ligne] for ligne in conf_matrix]
  data = cf
  fig = px.imshow(data,
                  labels=dict(y="Vrai Catégorie", x="Catégorie prédite", color="Productivity"),
                  x=list_cat,
                  y=list_cat
                )
  fig.update_xaxes(side="top")
  fig.show()

# Les différents Corpus

In [7]:
# LABELS
CSV_labels = {"Labels Normaux":df_labels}

# METHODES DE SEPARATION DU DATASET
Separations = {"Not balanced":not_balanced,
               "Halfed education":delHalfEduc,
               "Sur Sample x2x3":sursamplingx2x3,
               "Sur & Sous Sample":SurEtSousSampling,
               "Redistribution Small labels & Sur Sous Sample":redisEtSurSous}
#Separations = {"Not balanced":not_balanced}

In [8]:
# Recherche des meilleurs hyperparamètres
from sklearn.model_selection import GridSearchCV
parameters = {'kernel':('linear', 'poly', 'rbf', 'sigmoid', 'precomputed'), 'C':[1, 5, 10, 15], 'gamma':['scale', 'auto'] }

# pipeline & normalisation
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
# SVC & OneVsRest & NaiveBayes
from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier
from sklearn.naive_bayes import GaussianNB


MODELS = {'SVC-GridSearch':make_pipeline(GridSearchCV(SVC(gamma='auto', probability=True), parameters))}


MODELS = {'SVC':make_pipeline(StandardScaler(), SVC(gamma='auto', probability=True)),
          '1vsRestSVC':make_pipeline(StandardScaler(), OneVsRestClassifier(SVC(gamma='auto', probability=True))),
          'Naive_Bayes':make_pipeline(StandardScaler(),GaussianNB())
          }

# Probabilités de Classification

## Proba Audio

In [9]:
# Les embeddings Audio
# VGGish
AverageEmbeddingsPath = "/content/gdrive/MyDrive/Projet_Multimedia/download/Embeddings/Audio_Embeddings/VGGish/embeddings_average_VGGish.pkl"
MaxPoolEmbeddingsPath = "/content/gdrive/MyDrive/Projet_Multimedia/download/Embeddings/Audio_Embeddings/VGGish/embeddings_max_pooling_VGGish.pkl"
# YAMNet
AveragePredictionsPath = "/content/gdrive/MyDrive/Projet_Multimedia/download/Embeddings/Audio_Embeddings/YAMNet/ALL_predictions_YAMNet.pkl"

def getEmbesDF(path):
  with open(path, "rb") as fp:
      embeddings = pickle.load(fp)
  df_embes = pd.DataFrame.from_dict((embeddings), orient='index')
  return df_embes

# Les 3 classiques
df_avg_embes = getEmbesDF(AverageEmbeddingsPath)
df_max_pool_embes = getEmbesDF(MaxPoolEmbeddingsPath)
df_avg_preds = getEmbesDF(AveragePredictionsPath)

# DICO RECAPITULATIF AVEC LEGENDE
AUDIO_EMBES = {"Average VGGish":df_avg_embes,
               "Max_pool VGGish":df_max_pool_embes,
               "Average Environment Sound":df_avg_preds}


# Constructions de toutes les combinaisons de Séparations et Caractéristiques extraites de l'Audio avec potentielement labels différents
List_Corpus_Embes_Audio = []
for l in CSV_labels:
  for s in Separations:
    for c in AUDIO_EMBES:
      List_Corpus_Embes_Audio.append(Corpus(embeddings=AUDIO_EMBES[c], separation=Separations[s], l=l, s=s, c=c, df_labels=CSV_labels[l]))
      print(List_Corpus_Embes_Audio[-1].nom)
print("\n-", len(CSV_labels)*len(Separations)*len(AUDIO_EMBES), "Combinaisons possibles pour lesquelles on va essayer chaque modèle de Classification")

||Label:Labels Normaux ||Separation: Not balanced ||CARA: Average VGGish
||Label:Labels Normaux ||Separation: Not balanced ||CARA: Max_pool VGGish
||Label:Labels Normaux ||Separation: Not balanced ||CARA: Average Environment Sound
||Label:Labels Normaux ||Separation: Halfed education ||CARA: Average VGGish
||Label:Labels Normaux ||Separation: Halfed education ||CARA: Max_pool VGGish
||Label:Labels Normaux ||Separation: Halfed education ||CARA: Average Environment Sound
||Label:Labels Normaux ||Separation: Sur Sample x2x3 ||CARA: Average VGGish
||Label:Labels Normaux ||Separation: Sur Sample x2x3 ||CARA: Max_pool VGGish
||Label:Labels Normaux ||Separation: Sur Sample x2x3 ||CARA: Average Environment Sound
||Label:Labels Normaux ||Separation: Sur & Sous Sample ||CARA: Average VGGish
||Label:Labels Normaux ||Separation: Sur & Sous Sample ||CARA: Max_pool VGGish
||Label:Labels Normaux ||Separation: Sur & Sous Sample ||CARA: Average Environment Sound
||Label:Labels Normaux ||Separation: Red

In [None]:
from tqdm.notebook import tqdm_notebook

# tous les corpus et tous les modèles
List_Classif_Audio = []
for corpus in tqdm_notebook(List_Corpus_Embes_Audio, desc='Corpus'):
  for nom_model in tqdm_notebook(MODELS, desc='Models', leave=False):
    model = MODELS[nom_model]
    model.fit(corpus.Xtrain, corpus.ytrain)
    predic_proba = model.predict_proba(corpus.X)
    accuracy = model.score(corpus.Xtest, corpus.ytest)
    List_Classif_Audio.append(Classif(corpus, nom_model, accuracy, predic_proba))

Corpus:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
List_Corpus_Embes_Audio[0].Xtest

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,118,119,120,121,122,123,124,125,126,127
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
-84TouqfIsiI,-0.317788,0.046923,0.126741,-0.626000,-0.107470,-0.402861,0.009373,-0.387678,-0.279874,-0.346340,...,-0.674818,0.322648,-0.128375,0.186918,-0.109746,0.204856,-0.188464,-0.385685,0.527368,0.148123
-HkKRouJqGCg,-0.269944,-0.054643,-0.137082,0.295747,-0.050220,-0.071286,-0.042822,-0.105079,-0.175809,-0.056513,...,-0.092364,-0.033017,-0.118352,-0.031682,-0.033670,-0.104144,0.205717,-0.162219,0.228777,0.177694
-5yGNbyAmkVY,-0.159975,-0.142702,0.132698,0.363610,-0.134945,-0.608175,-0.368546,0.053818,-0.408286,-0.376216,...,-0.268506,0.109308,-0.087392,-0.286156,-0.315084,-0.163723,0.087576,-0.259518,0.049263,-0.161526
-jnaPpgK33Lo,-0.059307,0.246905,-0.017827,-1.022627,0.429280,-0.630799,-0.212256,-0.596609,-0.480968,-0.436278,...,-1.074093,0.058553,-0.282394,0.436431,-0.060606,-0.140736,-0.092484,0.057580,0.303812,0.159018
-jcHt29II6UA,-0.377350,-0.022149,0.698666,-0.726763,0.037554,-0.398928,-0.104971,-0.270677,-0.148561,-0.603327,...,-0.624004,0.285361,0.160752,0.035104,-0.158406,0.060315,-0.169938,-0.424167,0.390144,0.046708
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
-4b6ttHSgIFM,-0.298476,0.027462,-0.175030,-1.099011,-0.082205,-0.665951,0.057063,-0.580523,-0.953925,-0.735726,...,-1.530777,-0.028575,0.230128,0.463214,-0.158767,-0.279691,-0.094566,0.123167,1.104595,-0.340322
-Qb2xoiVM7UA,1.231742,-0.483357,-0.077537,-0.991329,-0.315203,-0.477319,-0.362519,-0.478443,-0.475210,0.303993,...,-1.181685,0.294064,-0.247198,-0.183573,-0.524138,0.636093,0.645040,0.126108,-0.120095,-0.670969
-MouujVqnak4,-0.482712,-0.063473,0.399974,-0.416122,-0.109497,-0.597875,-0.147897,-0.259774,-0.503601,-0.366440,...,-0.601590,0.006242,-0.026698,-0.128416,-0.002509,0.028399,-0.271283,-0.491138,0.234140,0.000160
-Ujg7vcIa7kM,-0.085764,0.054661,-0.094694,-0.680653,0.098632,-0.037554,-0.395932,-0.346314,-0.703809,-0.800123,...,-1.157290,0.632938,-0.126773,0.772151,-0.309215,-0.068643,0.814695,0.184597,0.214609,-0.195487


In [None]:
List_Corpus_Embes_Audio[0].X

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,118,119,120,121,122,123,124,125,126,127
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
-KHQk1_Vq69E,0.243650,-0.180969,0.072254,-0.230461,-0.048902,-0.281404,-0.481016,-0.264177,-0.388678,-0.085953,...,-0.599094,0.095622,-0.221983,-0.235143,-0.189304,0.374680,0.363151,0.318704,0.207495,-0.273022
-mG4Y2Snygfk,-0.155183,-0.182912,0.727288,-0.423727,-0.152796,-0.793291,0.059086,-0.292574,-0.965840,-0.513033,...,-0.631031,-0.115519,-0.138996,0.013345,-0.502213,0.292724,-0.257679,-0.644201,-0.101513,-0.119992
-OdEfx8up7wM,-0.460901,0.125163,0.237003,-0.832720,-0.049428,-0.734300,0.123526,-0.538296,-0.355696,-0.540775,...,-0.894422,0.134754,0.454434,0.005242,0.222413,-0.007404,-0.254512,-0.312417,0.640228,-0.073490
-Opbfh4wNu7Q,-0.352345,0.115966,0.084890,-0.673339,-0.102766,-0.470543,0.098630,-0.468396,-0.267452,-0.234936,...,-0.709823,0.072326,-0.288329,0.370245,-0.071344,0.011924,-0.320420,-0.320643,0.498847,0.529764
-A5340xbbQJU,-0.359609,0.009762,0.288097,-0.566833,0.045186,-0.462260,0.166459,-0.307142,-0.286494,-0.346548,...,-0.067332,-0.161344,0.060424,-0.033002,-0.259420,-0.048369,-0.102566,-0.387203,0.217318,0.003480
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
-ymc30meWzfg,0.095368,-0.052058,0.047134,-0.782422,0.376781,0.164262,-0.302934,-0.453255,-0.467201,-0.582196,...,-1.059904,0.248109,-0.128543,0.524558,-0.224435,0.003444,0.430018,0.161990,0.194885,-0.154108
-h7HGWRTGqyg,0.300503,-0.245172,0.294290,-0.713791,-0.293060,-0.693551,-0.376811,-0.477851,-0.621941,-0.109527,...,-1.057379,0.013129,0.182209,-0.100952,-0.141378,0.276118,-0.140132,-0.243263,0.105526,-0.335769
-XVRLTfN1WYw,-0.310879,0.079943,0.447005,-0.411251,0.426900,-0.473754,0.105915,0.026987,-0.326816,-0.595102,...,0.179060,-0.068115,0.005854,0.029373,-0.443495,-0.094631,-0.094392,-0.584419,0.178212,-0.122618
-jTtH7Fq1trw,-0.241761,0.017688,0.304524,-0.588790,0.284942,-0.620436,0.020650,-0.300169,-0.554043,-0.536467,...,-0.295863,-0.037014,-0.110379,0.000770,-0.253064,-0.065521,-0.051388,-0.487066,0.150851,-0.036252


## Proba Video

In [10]:
VideoEmbeddingsPath = "/content/gdrive/MyDrive/Projet_Multimedia/download/Embeddings/Video_Embeddings/"
#YOLO
# moyenne
df_YOLO_avgProba = getEmbesDF(VideoEmbeddingsPath + "all_embeddings_avgProba.pkl")
df_YOLO_avgProba.rename(columns={k:i for i,k in enumerate(df_YOLO_avgProba.columns)}, inplace=True)
# max
df_YOLO_maxProba = getEmbesDF(VideoEmbeddingsPath + "all_embeddings_maxProba.pkl")
df_YOLO_maxProba.rename(columns={k:i for i,k in enumerate(df_YOLO_maxProba.columns)}, inplace=True)
# sentence BERT
df_YOLO_sentenceBERT = getEmbesDF(VideoEmbeddingsPath + "all_embeddings_sentenceBERT.pkl")

# DICO RECAPITULATIF AVEC LEGENDE
VIDEO_EMBES = {"YOLO_avgProba":df_YOLO_avgProba,
               "YOLO_maxProba":df_YOLO_maxProba,
               "YOLO_sentenceBERT":df_YOLO_sentenceBERT
               }



# Constructions de toutes les combinaisons de Séparations et Caractéristiques extraites de l'Audio avec potentielement labels différents
List_Corpus_Embes_Video = []
for l in CSV_labels:
  for s in Separations:
    for c in VIDEO_EMBES:
      List_Corpus_Embes_Video.append(Corpus(embeddings=VIDEO_EMBES[c], separation=Separations[s], l=l, s=s, c=c, df_labels=CSV_labels[l]))
      print(List_Corpus_Embes_Video[-1].nom)
print("\n-", len(CSV_labels)*len(Separations)*len(VIDEO_EMBES), "Combinaisons possibles pour lesquelles on va essayer chaque modèle de Classification")

||Label:Labels Normaux ||Separation: Not balanced ||CARA: YOLO_avgProba
||Label:Labels Normaux ||Separation: Not balanced ||CARA: YOLO_maxProba
||Label:Labels Normaux ||Separation: Not balanced ||CARA: YOLO_sentenceBERT
||Label:Labels Normaux ||Separation: Halfed education ||CARA: YOLO_avgProba
||Label:Labels Normaux ||Separation: Halfed education ||CARA: YOLO_maxProba
||Label:Labels Normaux ||Separation: Halfed education ||CARA: YOLO_sentenceBERT
||Label:Labels Normaux ||Separation: Sur Sample x2x3 ||CARA: YOLO_avgProba
||Label:Labels Normaux ||Separation: Sur Sample x2x3 ||CARA: YOLO_maxProba
||Label:Labels Normaux ||Separation: Sur Sample x2x3 ||CARA: YOLO_sentenceBERT
||Label:Labels Normaux ||Separation: Sur & Sous Sample ||CARA: YOLO_avgProba
||Label:Labels Normaux ||Separation: Sur & Sous Sample ||CARA: YOLO_maxProba
||Label:Labels Normaux ||Separation: Sur & Sous Sample ||CARA: YOLO_sentenceBERT
||Label:Labels Normaux ||Separation: Redistribution Small labels & Sur Sous Sample |

In [None]:
# tous les corpus et tous les modèles
List_Classif_Video = []
for corpus in tqdm_notebook(List_Corpus_Embes_Video, desc='Corpus'):
  for nom_model in tqdm_notebook(MODELS, desc='Models', leave=False):
    model = MODELS[nom_model]
    model.fit(corpus.Xtrain, corpus.ytrain)
    predic_proba = model.predict_proba(corpus.X)
    accuracy = model.score(corpus.Xtest, corpus.ytest)
    List_Classif_Video.append(Classif(corpus, nom_model, accuracy, predic_proba))

Corpus:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

## Proba Texte

In [11]:
TexteEmbeddingsPath = "/content/gdrive/MyDrive/Projet_Multimedia/download/Embeddings/Texte_Embeddings/"
# pretrained classifier
#df_DistilledBERT =
# complex
df_NPNet = getEmbesDF(TexteEmbeddingsPath + "all_embeddings_MPnet.pkl")
# classic
df_sentenceBERT = getEmbesDF(TexteEmbeddingsPath + "all_embeddings_sentenceBERT_on_Text.pkl")

# DICO RECAPITULATIF AVEC LEGENDE
TEXT_EMBES = {"NPNet":df_NPNet,
              }

# Constructions de toutes les combinaisons de Séparations et Caractéristiques extraites de l'Audio avec potentielement labels différents
List_Corpus_Embes_Text = []
for l in CSV_labels:
  for s in Separations:
    for c in TEXT_EMBES:
      List_Corpus_Embes_Text.append(Corpus(embeddings=TEXT_EMBES[c], separation=Separations[s], l=l, s=s, c=c, df_labels=CSV_labels[l]))
      print(List_Corpus_Embes_Text[-1].nom)
print("\n-", len(CSV_labels)*len(Separations)*len(TEXT_EMBES), "Combinaisons possibles pour lesquelles on va essayer chaque modèle de Classification")

||Label:Labels Normaux ||Separation: Not balanced ||CARA: NPNet
||Label:Labels Normaux ||Separation: Halfed education ||CARA: NPNet
||Label:Labels Normaux ||Separation: Sur Sample x2x3 ||CARA: NPNet
||Label:Labels Normaux ||Separation: Sur & Sous Sample ||CARA: NPNet
||Label:Labels Normaux ||Separation: Redistribution Small labels & Sur Sous Sample ||CARA: NPNet

- 5 Combinaisons possibles pour lesquelles on va essayer chaque modèle de Classification


In [None]:
# tous les corpus et tous les modèles
List_Classif_Text = []
for corpus in tqdm_notebook(List_Corpus_Embes_Text, desc='Corpus'):
  for nom_model in tqdm_notebook(MODELS, desc='Models', leave=False):
    model = MODELS[nom_model]
    model.fit(corpus.Xtrain, corpus.ytrain)
    predic_proba = model.predict_proba(corpus.X)
    accuracy = model.score(corpus.Xtest, corpus.ytest)
    List_Classif_Text.append(Classif(corpus, nom_model, accuracy, predic_proba))

Corpus:   0%|          | 0/1 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
List_Classif_Text[2].predic_proba[0]

array([2.92871728e-021, 1.98857419e-103, 9.95619350e-058, 1.15917920e-088,
       7.82700891e-088, 9.04139606e-225, 1.91630725e-145, 2.54111872e-160,
       8.94186656e-066, 1.46009499e-036, 1.00000000e+000, 1.84438257e-121,
       1.15884261e-086, 1.69166231e-127, 5.54867705e-126])

In [None]:
List_Classif_Text[0].predic_proba[0]

array([0.00942253, 0.01936109, 0.08631464, 0.05303372, 0.06457656,
       0.00506326, 0.00932372, 0.00521889, 0.01343738, 0.027396  ,
       0.66157494, 0.0136709 , 0.01335384, 0.00871845, 0.00953408])

# Les concaténations des probabilités des différentes prédictions

In [None]:
List_Classif_Audio[0].corpus.nom

'||Label:Labels Normaux ||Separation: Not balanced ||CARA: Average VGGish'

In [None]:
List_Classif_Video[0].corpus.nom

'||Label:Labels Normaux ||Separation: Not balanced ||CARA: YOLO_avgProba'

In [None]:
def memeCorpus(c1, c2, c3):
  memeLabel = c1.corpus.nom_label==c2.corpus.nom_label and c1.corpus.nom_label==c3.corpus.nom_label
  memeSeparation = c1.corpus.nom_sep==c2.corpus.nom_sep and c1.corpus.nom_sep==c3.corpus.nom_sep
  return memeLabel and memeSeparation

In [None]:
COMBIS = {}
for audio_classif in List_Classif_Audio:
  for video_classif in List_Classif_Video:
    for text_classif in List_Classif_Text:
      if memeCorpus(audio_classif, video_classif, text_classif):
        txt ='||Corpus:'+audio_classif.corpus.nom_label+' & '+audio_classif.corpus.nom_sep+'  ||Probas: |A:'+audio_classif.corpus.nom_cara +' IN '+audio_classif.nom_model+ ' |V:'+video_classif.corpus.nom_cara+' IN '+video_classif.nom_model+ ' |T:'+text_classif.corpus.nom_cara+' IN '+text_classif.nom_model
        COMBIS[txt] = [audio_classif, video_classif, text_classif]

In [None]:
def df_dic_concat(ac, vc, tc, df_labels):
  conca = np.hstack((ac.predic_proba, vc.predic_proba, tc.predic_proba))
  dico = {name:proba for name, proba in zip(df_labels.name, conca)}
  return pd.DataFrame.from_dict((dico), orient='index')

In [None]:
# Constructions de toutes les combinaisons de Séparations et Caractéristiques extraites de l'Audio avec potentielement labels différents
List_Corpus = []
for combi in COMBIS:
  ac, vc, tc = COMBIS[combi]
  l, s, cara, = ac.corpus.nom_label, ac.corpus.nom_sep, ac.corpus.nom_cara+', '+vc.corpus.nom_cara+' & '+tc.corpus.nom_cara

  List_Corpus.append(Corpus(embeddings=df_dic_concat(ac, vc, tc, CSV_labels[l]), separation=Separations[s], l=l, s=s, c=combi, df_labels=CSV_labels[l]))
  print(List_Corpus[-1].nom)
print("\n-", len(List_Corpus), "Combinaisons possibles pour lesquelles on va essayer chaque modèle de Classification")

||Label:Labels Normaux ||Separation: Not balanced ||CARA: ||Corpus:Labels Normaux & Not balanced  ||Probas: |A:Average VGGish IN SVC |V:YOLO_avgProba IN SVC |T:NPNet IN SVC
||Label:Labels Normaux ||Separation: Not balanced ||CARA: ||Corpus:Labels Normaux & Not balanced  ||Probas: |A:Average VGGish IN SVC |V:YOLO_avgProba IN SVC |T:NPNet IN 1vsRestSVC
||Label:Labels Normaux ||Separation: Not balanced ||CARA: ||Corpus:Labels Normaux & Not balanced  ||Probas: |A:Average VGGish IN SVC |V:YOLO_avgProba IN SVC |T:NPNet IN Naive_Bayes
||Label:Labels Normaux ||Separation: Not balanced ||CARA: ||Corpus:Labels Normaux & Not balanced  ||Probas: |A:Average VGGish IN SVC |V:YOLO_avgProba IN 1vsRestSVC |T:NPNet IN SVC
||Label:Labels Normaux ||Separation: Not balanced ||CARA: ||Corpus:Labels Normaux & Not balanced  ||Probas: |A:Average VGGish IN SVC |V:YOLO_avgProba IN 1vsRestSVC |T:NPNet IN 1vsRestSVC
||Label:Labels Normaux ||Separation: Not balanced ||CARA: ||Corpus:Labels Normaux & Not balanced  |

In [None]:
# tous les corpus et tous les modèles
List_Classif = []
for corpus in tqdm_notebook(List_Corpus, desc='Corpus'):
  for nom_model in tqdm_notebook(MODELS, desc='Models', leave=False):
    model = MODELS[nom_model]
    model.fit(corpus.Xtrain, corpus.ytrain)
    predictions = model.predict(corpus.Xtest)
    accuracy = model.score(corpus.Xtest, corpus.ytest)
    List_Classif.append(Classif(corpus, nom_model, accuracy, predictions))

Corpus:   0%|          | 0/243 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

Models:   0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
print(len(List_Classif))
for i, classif in enumerate(List_Classif):
  if classif.accuracy > 0.53:
    print(classif.nom)
    print("accuracy:",classif.accuracy)
    show_conf_matrix(classif.corpus.ytest, classif.predic_proba)

729
||Label:Labels Normaux ||Separation: Not balanced ||CARA: ||Corpus:Labels Normaux & Not balanced  ||Probas: |A:Average VGGish IN SVC |V:YOLO_maxProba IN SVC |T:NPNet IN 1vsRestSVC ||Model_Classif:1vsRestSVC
accuracy: 0.5372807017543859


||Label:Labels Normaux ||Separation: Not balanced ||CARA: ||Corpus:Labels Normaux & Not balanced  ||Probas: |A:Average VGGish IN SVC |V:YOLO_maxProba IN Naive_Bayes |T:NPNet IN 1vsRestSVC ||Model_Classif:1vsRestSVC
accuracy: 0.5328947368421053


||Label:Labels Normaux ||Separation: Not balanced ||CARA: ||Corpus:Labels Normaux & Not balanced  ||Probas: |A:Average VGGish IN 1vsRestSVC |V:YOLO_avgProba IN SVC |T:NPNet IN 1vsRestSVC ||Model_Classif:1vsRestSVC
accuracy: 0.5394736842105263


||Label:Labels Normaux ||Separation: Not balanced ||CARA: ||Corpus:Labels Normaux & Not balanced  ||Probas: |A:Average VGGish IN 1vsRestSVC |V:YOLO_avgProba IN 1vsRestSVC |T:NPNet IN 1vsRestSVC ||Model_Classif:SVC
accuracy: 0.5307017543859649


||Label:Labels Normaux ||Separation: Not balanced ||CARA: ||Corpus:Labels Normaux & Not balanced  ||Probas: |A:Average VGGish IN 1vsRestSVC |V:YOLO_avgProba IN 1vsRestSVC |T:NPNet IN 1vsRestSVC ||Model_Classif:1vsRestSVC
accuracy: 0.5350877192982456


||Label:Labels Normaux ||Separation: Not balanced ||CARA: ||Corpus:Labels Normaux & Not balanced  ||Probas: |A:Average VGGish IN 1vsRestSVC |V:YOLO_maxProba IN SVC |T:NPNet IN 1vsRestSVC ||Model_Classif:1vsRestSVC
accuracy: 0.5482456140350878


||Label:Labels Normaux ||Separation: Not balanced ||CARA: ||Corpus:Labels Normaux & Not balanced  ||Probas: |A:Average VGGish IN 1vsRestSVC |V:YOLO_maxProba IN 1vsRestSVC |T:NPNet IN 1vsRestSVC ||Model_Classif:SVC
accuracy: 0.5307017543859649


||Label:Labels Normaux ||Separation: Not balanced ||CARA: ||Corpus:Labels Normaux & Not balanced  ||Probas: |A:Average VGGish IN 1vsRestSVC |V:YOLO_maxProba IN 1vsRestSVC |T:NPNet IN 1vsRestSVC ||Model_Classif:1vsRestSVC
accuracy: 0.5394736842105263


||Label:Labels Normaux ||Separation: Not balanced ||CARA: ||Corpus:Labels Normaux & Not balanced  ||Probas: |A:Average VGGish IN 1vsRestSVC |V:YOLO_maxProba IN Naive_Bayes |T:NPNet IN 1vsRestSVC ||Model_Classif:SVC
accuracy: 0.5328947368421053


||Label:Labels Normaux ||Separation: Not balanced ||CARA: ||Corpus:Labels Normaux & Not balanced  ||Probas: |A:Average VGGish IN 1vsRestSVC |V:YOLO_maxProba IN Naive_Bayes |T:NPNet IN 1vsRestSVC ||Model_Classif:1vsRestSVC
accuracy: 0.5328947368421053


||Label:Labels Normaux ||Separation: Not balanced ||CARA: ||Corpus:Labels Normaux & Not balanced  ||Probas: |A:Average VGGish IN 1vsRestSVC |V:YOLO_sentenceBERT IN Naive_Bayes |T:NPNet IN SVC ||Model_Classif:1vsRestSVC
accuracy: 0.5350877192982456


||Label:Labels Normaux ||Separation: Not balanced ||CARA: ||Corpus:Labels Normaux & Not balanced  ||Probas: |A:Max_pool VGGish IN SVC |V:YOLO_avgProba IN SVC |T:NPNet IN SVC ||Model_Classif:1vsRestSVC
accuracy: 0.5307017543859649


In [None]:
acc = [c.accuracy for c in List_Classif]
i_sorted = np.argsort(acc)

max(acc)

0.5482456140350878

# AUtre

In [None]:
for classif in List_Classif:
  print(classif.nom)
  print(len(classif.predic_proba))
  print(len(classif.predic_proba[0]))

||Label:Labels Normaux ||Separation: Not balanced ||CARA: Average VGGish ||Model_Classif:SVC
456
15
||Label:Labels Normaux ||Separation: Not balanced ||CARA: Average VGGish ||Model_Classif:1vsRestSVC
456
15
||Label:Labels Normaux ||Separation: Not balanced ||CARA: Average VGGish ||Model_Classif:Naive_Bayes
456
15
||Label:Labels Normaux ||Separation: Halfed education ||CARA: Average VGGish ||Model_Classif:SVC
383
15
||Label:Labels Normaux ||Separation: Halfed education ||CARA: Average VGGish ||Model_Classif:1vsRestSVC
383
15
||Label:Labels Normaux ||Separation: Halfed education ||CARA: Average VGGish ||Model_Classif:Naive_Bayes
383
15
||Label:Labels Normaux ||Separation: Sur Sample x2x3 ||CARA: Average VGGish ||Model_Classif:SVC
567
15
||Label:Labels Normaux ||Separation: Sur Sample x2x3 ||CARA: Average VGGish ||Model_Classif:1vsRestSVC
567
15
||Label:Labels Normaux ||Separation: Sur Sample x2x3 ||CARA: Average VGGish ||Model_Classif:Naive_Bayes
567
15
||Label:Labels Normaux ||Separatio

In [None]:
corpus = List_Corpus[0]
print(corpus.nom)
model = MODELS['Naive_Bayes']
model.fit(corpus.Xtrain, corpus.ytrain)
predictions = model.predict(corpus.Xtest)
predic_proba = model.predict_proba(corpus.Xtest)

||Label:Labels Normaux ||Separation: Not balanced ||CARA: Average VGGish


In [None]:
predic_proba[0]

array([7.03034447e-25, 4.52704242e-18, 4.96172128e-26, 9.97764792e-08,
       9.99994734e-01, 5.16607409e-06, 1.26679972e-31, 2.56455648e-10,
       8.36064806e-26, 1.35836045e-17, 6.50857745e-22, 8.36506677e-24,
       5.93689893e-15, 6.85040041e-42, 1.09060573e-32])

In [None]:
List_Classif[0].nom

'||Label:Labels Normaux ||Separation: Not balanced ||CARA: Average VGGish ||Model_Classif:SVC'

In [None]:
List_Classif[0].predictions

array(['Film & Animation', 'Education', 'Education', 'Education',
       'Film & Animation', 'Education', 'Education', 'Education',
       'Education', 'Film & Animation', 'Film & Animation',
       'Film & Animation', 'People & Blogs', 'Education', 'Education',
       'Film & Animation', 'People & Blogs', 'Education', 'Entertainment',
       'Education', 'Education', 'Education', 'Education', 'Education',
       'Education', 'Film & Animation', 'Education', 'Education',
       'Film & Animation', 'Education', 'Film & Animation', 'Education',
       'Film & Animation', 'Entertainment', 'Education', 'Education',
       'Film & Animation', 'Howto & Style', 'Education', 'Education',
       'People & Blogs', 'Education', 'Film & Animation',
       'Film & Animation', 'Education', 'Entertainment', 'Education',
       'Film & Animation', 'Education', 'People & Blogs',
       'Film & Animation', 'Education', 'Comedy', 'Education',
       'People & Blogs', 'Education', 'Education', 'People & B