In [2]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import KNeighborsClassifier
from unidecode import unidecode
from sklearn.preprocessing import MinMaxScaler

1 - Préparer la base qui contient les informations sur les acteurs et réalisateurs

In [3]:
# Ouverture et inspection de la base
df_crew = pd.read_csv(r"C:\Users\emman\Desktop\Projet2-0305\df_crew_en_salle.csv",sep=',')

In [4]:
df_crew.columns

Index(['tconst', 'primaryTitle', 'title', 'directors', 'nconst', 'category',
       'primaryName', 'deathYear'],
      dtype='object')

In [5]:
df_crew.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 111945 entries, 0 to 111944
Data columns (total 8 columns):
 #   Column        Non-Null Count   Dtype 
---  ------        --------------   ----- 
 0   tconst        111945 non-null  object
 1   primaryTitle  111945 non-null  object
 2   title         111945 non-null  object
 3   directors     111945 non-null  object
 4   nconst        111945 non-null  object
 5   category      111945 non-null  object
 6   primaryName   111945 non-null  object
 7   deathYear     111945 non-null  int64 
dtypes: int64(1), object(7)
memory usage: 6.8+ MB


In [6]:
df_crew.head(15)

Unnamed: 0,tconst,primaryTitle,title,directors,nconst,category,primaryName,deathYear
0,tt0016029,The Little Colonel,Le petit colonel,nm0124877,nm0000859,actor,Lionel Barrymore,1954
1,tt0016029,The Little Colonel,Le petit colonel,nm0124877,nm0124877,director,David Butler,1979
2,tt0016029,The Little Colonel,Le petit colonel,nm0124877,nm0000073,actress,Shirley Temple,2014
3,tt0016029,The Little Colonel,Le petit colonel,nm0124877,nm0892867,actress,Evelyn Venable,1993
4,tt0016029,The Little Colonel,Le petit colonel,nm0124877,nm0517099,actor,John Lodge,1985
5,tt0020298,Queen Kelly,La reine Kelly,"nm0002233,nm0092915",nm0039276,actress,Sylvia Ashton,1940
6,tt0020298,Queen Kelly,La reine Kelly,"nm0002233,nm0092915",nm0654262,actress,Seena Owen,1966
7,tt0020298,Queen Kelly,La reine Kelly,"nm0002233,nm0092915",nm0002233,director,Erich von Stroheim,1957
8,tt0020298,Queen Kelly,La reine Kelly,"nm0002233,nm0092915",nm0841797,actress,Gloria Swanson,1983
9,tt0020298,Queen Kelly,La reine Kelly,"nm0002233,nm0092915",nm0126430,actor,Walter Byron,1972


In [6]:
# regarder le nombre de nconst unique qui nous donneront le nombre de actrices/acteurs/réalisateurs présents dans la base, sans doublon
df_crew['nconst'].unique().shape

(50033,)

In [8]:
## Supprimer les nconst des acteurs et actrices qui apparaissent 1 seule fois, car peu utile dans le système de recommandation

# Compter le nombre d'occurrences de chaque nconst dans le dataframe
nconst_counts = df_crew['nconst'].value_counts()

# Créer une liste des nconst qui n'apparaissent une seule fois
unpopular_nconsts = nconst_counts[nconst_counts == 1].index.tolist()

print(f"Il y a {len(unpopular_nconsts)} qui n'apparaissent qu'une seule fois")

Il y a 33520 qui n'apparaissent qu'une seule fois


In [10]:
# Sélectionner les lignes correspondantes où la catégorie est différente de "directors". 
# Nous souhaitons ici conserver les réalisateurs car certains acteurs sont aussi réalisateurs et on risquerait de perdre des acteurs importants en les supprimants
index_to_drop = df_crew.loc[(df_crew['nconst'].isin(unpopular_nconsts)) & (df_crew['category'] != 'director')].index

# Supprimer les lignes correspondantes du dataframe
df_crew = df_crew.drop(index_to_drop)

print(df_crew)



           tconst         primaryTitle                           title  \
0       tt0016029   The Little Colonel                Le petit colonel   
1       tt0016029   The Little Colonel                Le petit colonel   
2       tt0016029   The Little Colonel                Le petit colonel   
3       tt0016029   The Little Colonel                Le petit colonel   
4       tt0016029   The Little Colonel                Le petit colonel   
...           ...                  ...                             ...   
111935  tt9911196  The Marriage Escape  De Beentjes van Sint-Hildegard   
111936  tt9911196  The Marriage Escape  De Beentjes van Sint-Hildegard   
111940  tt9916362                Coven        Les sorcières d'Akelarre   
111941  tt9916362                Coven        Les sorcières d'Akelarre   
111942  tt9916362                Coven        Les sorcières d'Akelarre   

        directors     nconst  category         primaryName  deathYear  
0       nm0124877  nm0000859     actor 

In [11]:
# Vérifier le nombre de nconst unique
df_crew['nconst'].unique().shape

(23562,)

In [10]:
# Vérifier la nouvelle shape de df_crew
df_crew.shape

(85474, 8)

In [11]:
# Vérifier le nombre de film restant après le nettoyage
df_crew['tconst'].unique().shape

(22229,)

In [12]:
# Calculer le nombe de film perdu
films_perdus = 22303 - 22229
print(f'A la suite de ce nettoyage, nous avons perdu {films_perdus} films')

A la suite de ce nettoyage, nous avons perdu 74 films


In [13]:
# Faire le get_dummies sur les acteurs/actrices/réalisateurs pour pouvoir les représenter en donées numériques pour les intéger dans le ML
dummies = df_crew['nconst'].str.get_dummies(',')

In [14]:
# On devrait avoir 23562 colonnes à notre df_crew après le get dummies
dummies.shape

(85474, 23562)

In [15]:
dummies.head()

Unnamed: 0,nm0000001,nm0000002,nm0000003,nm0000004,nm0000005,nm0000006,nm0000007,nm0000008,nm0000009,nm0000010,...,nm9937520,nm9950440,nm9953971,nm9957570,nm9958352,nm9970487,nm9974129,nm9974256,nm9990866,nm9993616
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [16]:
# Vérification du nombre de Nan
dummies.isna().sum()

nm0000001    0
nm0000002    0
nm0000003    0
nm0000004    0
nm0000005    0
            ..
nm9970487    0
nm9974129    0
nm9974256    0
nm9990866    0
nm9993616    0
Length: 23562, dtype: int64

In [17]:
# Assemblage des actrices/acteurs/réalisateurs de cinéma au film qu'ils ont fait.
df_crew_dummies = pd.concat([df_crew, dummies], axis = 1, ignore_index=False)

In [18]:
df_crew_dummies.shape

(85474, 23570)

In [19]:
# Vérification que la concatenisation s'est bien passée, on regarde le nombre de Nan
df_crew_dummies.isna().sum().max()

0

In [20]:
df_crew_dummies.head()

Unnamed: 0,tconst,primaryTitle,title,directors,nconst,category,primaryName,deathYear,nm0000001,nm0000002,...,nm9937520,nm9950440,nm9953971,nm9957570,nm9958352,nm9970487,nm9974129,nm9974256,nm9990866,nm9993616
0,tt0016029,The Little Colonel,Le petit colonel,nm0124877,nm0000859,actor,Lionel Barrymore,1954,0,0,...,0,0,0,0,0,0,0,0,0,0
1,tt0016029,The Little Colonel,Le petit colonel,nm0124877,nm0124877,director,David Butler,1979,0,0,...,0,0,0,0,0,0,0,0,0,0
2,tt0016029,The Little Colonel,Le petit colonel,nm0124877,nm0000073,actress,Shirley Temple,2014,0,0,...,0,0,0,0,0,0,0,0,0,0
3,tt0016029,The Little Colonel,Le petit colonel,nm0124877,nm0892867,actress,Evelyn Venable,1993,0,0,...,0,0,0,0,0,0,0,0,0,0
4,tt0016029,The Little Colonel,Le petit colonel,nm0124877,nm0517099,actor,John Lodge,1985,0,0,...,0,0,0,0,0,0,0,0,0,0


In [21]:
# Créer un dataframe avec tconst unique
df_dummies_gpby = df_crew_dummies.groupby('tconst')

In [22]:
# On vérifie le type du résultat
type(df_dummies_gpby)

pandas.core.groupby.generic.DataFrameGroupBy

In [23]:
# Convertir le DataFrameGroupBy en dataframe
gpby_to_frame = df_dummies_gpby.aggregate(np.sum)

  gpby_to_frame = df_dummies_gpby.aggregate(np.sum)


In [24]:
# Vérification du type du résultat
type(gpby_to_frame)

pandas.core.frame.DataFrame

In [25]:
# Vérification du nombre de film et du nombre d'acteurs
gpby_to_frame.shape

(22229, 23563)

In [26]:
gpby_to_frame.head()

Unnamed: 0_level_0,deathYear,nm0000001,nm0000002,nm0000003,nm0000004,nm0000005,nm0000006,nm0000007,nm0000008,nm0000009,...,nm9937520,nm9950440,nm9953971,nm9957570,nm9958352,nm9970487,nm9974129,nm9974256,nm9990866,nm9993616
tconst,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
tt0016029,9925,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
tt0020298,7849,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
tt0021079,9918,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
tt0021128,1980,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
tt0021309,7896,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [28]:
# drop la colonne death year
df_crew_dummies_final = gpby_to_frame.drop(columns=['deathYear'], axis = 1)

In [29]:
# Vérification de la suppression 
df_crew_dummies_final.head()

Unnamed: 0_level_0,nm0000001,nm0000002,nm0000003,nm0000004,nm0000005,nm0000006,nm0000007,nm0000008,nm0000009,nm0000010,...,nm9937520,nm9950440,nm9953971,nm9957570,nm9958352,nm9970487,nm9974129,nm9974256,nm9990866,nm9993616
tconst,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
tt0016029,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
tt0020298,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
tt0021079,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
tt0021128,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
tt0021309,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [30]:
# Vérification des Nans
df_crew_dummies_final.isna().sum().max()

0

In [31]:
# Exporter en csv
df_crew_dummies_final.to_csv('df_crew_dummies.csv')

In [32]:
df_crew_dummies_final = pd.read_csv(r"C:\Users\emman\Desktop\Projet2-0305\df_crew_dummies.csv",sep=',')

In [33]:
df_crew_dummies_final.head()

Unnamed: 0,tconst,nm0000001,nm0000002,nm0000003,nm0000004,nm0000005,nm0000006,nm0000007,nm0000008,nm0000009,...,nm9937520,nm9950440,nm9953971,nm9957570,nm9958352,nm9970487,nm9974129,nm9974256,nm9990866,nm9993616
0,tt0016029,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,tt0020298,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,tt0021079,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,tt0021128,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,tt0021309,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [36]:
# Mettre les tconst en index
df_crew_dummies_final.set_index('tconst', inplace=True)

In [37]:
df_crew_dummies_final.head()

Unnamed: 0_level_0,nm0000001,nm0000002,nm0000003,nm0000004,nm0000005,nm0000006,nm0000007,nm0000008,nm0000009,nm0000010,...,nm9937520,nm9950440,nm9953971,nm9957570,nm9958352,nm9970487,nm9974129,nm9974256,nm9990866,nm9993616
tconst,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
tt0016029,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
tt0020298,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
tt0021079,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
tt0021128,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
tt0021309,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [38]:
# sauvagarder en .pickle pour gagner de la place
df_crew_dummies_final.to_pickle("./crew_dummies.pkl")

In [40]:
df_crew_ml = pd.read_pickle(r"C:\Users\emman\Desktop\Projet2-0305\crew_dummies.pkl")

In [41]:
df_crew_ml.head()

Unnamed: 0_level_0,nm0000001,nm0000002,nm0000003,nm0000004,nm0000005,nm0000006,nm0000007,nm0000008,nm0000009,nm0000010,...,nm9937520,nm9950440,nm9953971,nm9957570,nm9958352,nm9970487,nm9974129,nm9974256,nm9990866,nm9993616
tconst,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
tt0016029,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
tt0020298,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
tt0021079,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
tt0021128,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
tt0021309,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [60]:
# Enlever les films perdus lors de l'allégement de la BDD crew dans la BDD film, il ne doit rester que 22229 film et non 22303
# On va extraire les tconst de df_crew_ml pour les merge avec le df_film_en_salle, il ne restera donc que les films présents dans les 2 BDD
tconst_to_keep = df_crew_ml.index

In [61]:
len(tconst_to_keep)

22229

In [65]:
tconst_to_keep_frame = tconst_to_keep.to_frame()

In [67]:
type(tconst_to_keep_frame)

pandas.core.frame.DataFrame

In [69]:
tconst_to_keep_frame.head()

Unnamed: 0_level_0,tconst
tconst,Unnamed: 1_level_1
tt0016029,tt0016029
tt0020298,tt0020298
tt0021079,tt0021079
tt0021128,tt0021128
tt0021309,tt0021309


In [73]:
# Dropper l'index
tconst_to_keep_frame.reset_index(drop=True, inplace=True)

In [51]:
# charger le csv film 
df_film = pd.read_csv(r"C:\Users\emman\Desktop\Projet2-0305\df_film_en_salle_ok.csv",sep=',')

In [52]:
df_film.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22303 entries, 0 to 22302
Data columns (total 31 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   tconst          22303 non-null  object 
 1   primaryTitle    22303 non-null  object 
 2   startYear       22303 non-null  int64  
 3   runtimeMinutes  22303 non-null  float64
 4   title           22303 non-null  object 
 5   averageRating   22303 non-null  float64
 6   numVotes        22303 non-null  float64
 7   Action          22303 non-null  int64  
 8   Adventure       22303 non-null  int64  
 9   Animation       22303 non-null  int64  
 10  Biography       22303 non-null  int64  
 11  Comedy          22303 non-null  int64  
 12  Crime           22303 non-null  int64  
 13  Documentary     22303 non-null  int64  
 14  Drama           22303 non-null  int64  
 15  Family          22303 non-null  int64  
 16  Fantasy         22303 non-null  int64  
 17  Film-Noir       22303 non-null 

In [54]:
df_film.head(5)

Unnamed: 0,tconst,primaryTitle,startYear,runtimeMinutes,title,averageRating,numVotes,Action,Adventure,Animation,...,Musical,Mystery,News,Romance,Sci-Fi,Sport,Thriller,War,Western,UnknownGenre
0,tt0016029,The Little Colonel,1935,81.0,Le petit colonel,7.0,1775.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,tt0020298,Queen Kelly,1932,101.0,La reine Kelly,7.1,3301.0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,tt0021079,Little Caesar,1931,79.0,Le petit César,7.2,13810.0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
3,tt0021128,Mary,1931,78.0,Mary,5.8,919.0,0,0,0,...,0,1,0,0,0,0,1,0,0,0
4,tt0021309,The Story of the Fox,1937,63.0,Le roman de Renard,7.7,1417.0,0,1,1,...,0,0,0,0,0,0,0,0,0,0


In [74]:
# Supprimer les tconst perdus lors du nettoyage de la base crew grâce à un merge
df_film_clean = pd.merge(tconst_to_keep_frame,
                         df_film,
                         how='inner',
                         left_on = 'tconst',
                         right_on = 'tconst'
    )

In [75]:
# Vérification
df_film_clean.shape

(22229, 31)

In [76]:
df_film_clean.isna().sum().max()

0

In [84]:
# Créer une colonne titre sans accent pour rendre le titre insensible à la casse
df_film_clean['title_not_case_sensitive'] = df_film_clean['title'].apply(unidecode)

In [85]:
df_film_clean.head()

Unnamed: 0,tconst,primaryTitle,startYear,runtimeMinutes,title,averageRating,numVotes,Action,Adventure,Animation,...,Mystery,News,Romance,Sci-Fi,Sport,Thriller,War,Western,UnknownGenre,title_not_case_sensitive
0,tt0016029,The Little Colonel,1935,81.0,Le petit colonel,7.0,1775.0,0,0,0,...,0,0,0,0,0,0,0,0,0,Le petit colonel
1,tt0020298,Queen Kelly,1932,101.0,La reine Kelly,7.1,3301.0,0,0,0,...,0,0,0,0,0,0,0,0,0,La reine Kelly
2,tt0021079,Little Caesar,1931,79.0,Le petit César,7.2,13810.0,1,0,0,...,0,0,0,0,0,0,0,0,0,Le petit Cesar
3,tt0021128,Mary,1931,78.0,Mary,5.8,919.0,0,0,0,...,1,0,0,0,0,1,0,0,0,Mary
4,tt0021309,The Story of the Fox,1937,63.0,Le roman de Renard,7.7,1417.0,0,1,1,...,0,0,0,0,0,0,0,0,0,Le roman de Renard


In [87]:
# Faire de même avec les majuscules pour le rendre insensible à la casse
df_film_clean['title_not_case_sensitive'] = df_film_clean['title_not_case_sensitive'].str.lower()

In [88]:
# Vérification 
df_film_clean.head()

Unnamed: 0,tconst,primaryTitle,startYear,runtimeMinutes,title,averageRating,numVotes,Action,Adventure,Animation,...,Mystery,News,Romance,Sci-Fi,Sport,Thriller,War,Western,UnknownGenre,title_not_case_sensitive
0,tt0016029,The Little Colonel,1935,81.0,Le petit colonel,7.0,1775.0,0,0,0,...,0,0,0,0,0,0,0,0,0,le petit colonel
1,tt0020298,Queen Kelly,1932,101.0,La reine Kelly,7.1,3301.0,0,0,0,...,0,0,0,0,0,0,0,0,0,la reine kelly
2,tt0021079,Little Caesar,1931,79.0,Le petit César,7.2,13810.0,1,0,0,...,0,0,0,0,0,0,0,0,0,le petit cesar
3,tt0021128,Mary,1931,78.0,Mary,5.8,919.0,0,0,0,...,1,0,0,0,0,1,0,0,0,mary
4,tt0021309,The Story of the Fox,1937,63.0,Le roman de Renard,7.7,1417.0,0,1,1,...,0,0,0,0,0,0,0,0,0,le roman de renard


In [98]:
df_film_clean.columns

Index(['tconst', 'primaryTitle', 'title', 'title_not_case_sensitive',
       'startYear', 'runtimeMinutes', 'averageRating', 'numVotes', 'Action',
       'Adventure', 'Animation', 'Biography', 'Comedy', 'Crime', 'Documentary',
       'Drama', 'Family', 'Fantasy', 'Film-Noir', 'History', 'Horror', 'Music',
       'Musical', 'Mystery', 'News', 'Romance', 'Sci-Fi', 'Sport', 'Thriller',
       'War', 'Western', 'UnknownGenre', 'primaryTitle_not_case_sensitive'],
      dtype='object')

In [100]:
df_film_clean.head()

Unnamed: 0,tconst,primaryTitle,primaryTitle_not_case_sensitive,title,title_not_case_sensitive,startYear,runtimeMinutes,averageRating,numVotes,Action,...,Musical,Mystery,News,Romance,Sci-Fi,Sport,Thriller,War,Western,UnknownGenre
0,tt0016029,The Little Colonel,the little colonel,Le petit colonel,le petit colonel,1935,81.0,7.0,1775.0,0,...,0,0,0,0,0,0,0,0,0,0
1,tt0020298,Queen Kelly,queen kelly,La reine Kelly,la reine kelly,1932,101.0,7.1,3301.0,0,...,0,0,0,0,0,0,0,0,0,0
2,tt0021079,Little Caesar,little caesar,Le petit César,le petit cesar,1931,79.0,7.2,13810.0,1,...,0,0,0,0,0,0,0,0,0,0
3,tt0021128,Mary,mary,Mary,mary,1931,78.0,5.8,919.0,0,...,0,1,0,0,0,0,1,0,0,0
4,tt0021309,The Story of the Fox,the story of the fox,Le roman de Renard,le roman de renard,1937,63.0,7.7,1417.0,0,...,0,0,0,0,0,0,0,0,0,0


In [101]:
# Vérification des Nan au cas où 
df_film_clean.isna().sum().max()

0

In [110]:
# Setter l'index sur le tconst 
df_film_clean.set_index('tconst',inplace=True)
# ERREUR car lancée à 2 reprises par erreur

KeyError: "None of ['tconst'] are in the columns"

In [113]:
# Exporter df_film_clean en pickle pour l'exploiter en ML
df_film_clean.to_pickle("./movie_clean.pkl")

In [114]:
# Read le pickle pour voir si tout fonctionne 
df_movie_ml = pd.read_pickle(r"C:\Users\emman\Desktop\Projet2-0305\BDD_ML\movie_clean.pkl")

In [115]:
df_movie_ml.head()

Unnamed: 0_level_0,primaryTitle,primaryTitle_not_case_sensitive,title,title_not_case_sensitive,startYear,runtimeMinutes,averageRating,numVotes,Action,Adventure,...,Musical,Mystery,News,Romance,Sci-Fi,Sport,Thriller,War,Western,UnknownGenre
tconst,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
tt0016029,The Little Colonel,the little colonel,Le petit colonel,le petit colonel,1935,81.0,7.0,1775.0,0,0,...,0,0,0,0,0,0,0,0,0,0
tt0020298,Queen Kelly,queen kelly,La reine Kelly,la reine kelly,1932,101.0,7.1,3301.0,0,0,...,0,0,0,0,0,0,0,0,0,0
tt0021079,Little Caesar,little caesar,Le petit César,le petit cesar,1931,79.0,7.2,13810.0,1,0,...,0,0,0,0,0,0,0,0,0,0
tt0021128,Mary,mary,Mary,mary,1931,78.0,5.8,919.0,0,0,...,0,1,0,0,0,0,1,0,0,0
tt0021309,The Story of the Fox,the story of the fox,Le roman de Renard,le roman de renard,1937,63.0,7.7,1417.0,0,1,...,0,0,0,0,0,0,0,0,0,0


In [116]:
# Merger le df_movie et le df_crew pour avoir une seule base de données avec toutes les infos dessus, films, genres, crew...
df_movie_crew_ml = df_movie_ml.join(df_crew_ml,how='inner')

In [117]:
df_movie_crew_ml.shape

(22229, 23594)

In [118]:
df_movie_crew_ml.isna().sum().max()

0

In [137]:
# Changer index pour le ML. reset l'index sur le tconst
df_movie_crew_ml = df_movie_crew_ml.reset_index()

In [141]:
df_movie_crew_ml.drop(columns='index', inplace = True)

In [142]:
df_movie_crew_ml.head()

Unnamed: 0,tconst,primaryTitle,primaryTitle_not_case_sensitive,title,title_not_case_sensitive,startYear,runtimeMinutes,averageRating,numVotes,Action,...,nm9937520,nm9950440,nm9953971,nm9957570,nm9958352,nm9970487,nm9974129,nm9974256,nm9990866,nm9993616
0,tt0016029,The Little Colonel,the little colonel,Le petit colonel,le petit colonel,1935,81.0,7.0,1775.0,0,...,0,0,0,0,0,0,0,0,0,0
1,tt0020298,Queen Kelly,queen kelly,La reine Kelly,la reine kelly,1932,101.0,7.1,3301.0,0,...,0,0,0,0,0,0,0,0,0,0
2,tt0021079,Little Caesar,little caesar,Le petit César,le petit cesar,1931,79.0,7.2,13810.0,1,...,0,0,0,0,0,0,0,0,0,0
3,tt0021128,Mary,mary,Mary,mary,1931,78.0,5.8,919.0,0,...,0,0,0,0,0,0,0,0,0,0
4,tt0021309,The Story of the Fox,the story of the fox,Le roman de Renard,le roman de renard,1937,63.0,7.7,1417.0,0,...,0,0,0,0,0,0,0,0,0,0


In [143]:
# Setter l'index sur le titre en FR insensible à la casse pour le ML 
df_movie_crew_ml.set_index('title_not_case_sensitive', drop=False, inplace=True)

In [144]:
df_movie_crew_ml.head()

Unnamed: 0_level_0,tconst,primaryTitle,primaryTitle_not_case_sensitive,title,title_not_case_sensitive,startYear,runtimeMinutes,averageRating,numVotes,Action,...,nm9937520,nm9950440,nm9953971,nm9957570,nm9958352,nm9970487,nm9974129,nm9974256,nm9990866,nm9993616
title_not_case_sensitive,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
le petit colonel,tt0016029,The Little Colonel,the little colonel,Le petit colonel,le petit colonel,1935,81.0,7.0,1775.0,0,...,0,0,0,0,0,0,0,0,0,0
la reine kelly,tt0020298,Queen Kelly,queen kelly,La reine Kelly,la reine kelly,1932,101.0,7.1,3301.0,0,...,0,0,0,0,0,0,0,0,0,0
le petit cesar,tt0021079,Little Caesar,little caesar,Le petit César,le petit cesar,1931,79.0,7.2,13810.0,1,...,0,0,0,0,0,0,0,0,0,0
mary,tt0021128,Mary,mary,Mary,mary,1931,78.0,5.8,919.0,0,...,0,0,0,0,0,0,0,0,0,0
le roman de renard,tt0021309,The Story of the Fox,the story of the fox,Le roman de Renard,le roman de renard,1937,63.0,7.7,1417.0,0,...,0,0,0,0,0,0,0,0,0,0


In [145]:
# Exporter ce DF total en pickle
df_movie_crew_ml.to_pickle("./df_movie_crew_ml.pkl")

In [27]:
# Lecture du pickle global
global_ml = pd.read_pickle(r"C:\Users\emman\Desktop\Projet2-0305\BDD_ML\df_movie_crew_ml.pkl")

In [28]:
global_ml.head()

Unnamed: 0_level_0,tconst,primaryTitle,primaryTitle_not_case_sensitive,title,title_not_case_sensitive,startYear,runtimeMinutes,averageRating,numVotes,Action,...,nm9937520,nm9950440,nm9953971,nm9957570,nm9958352,nm9970487,nm9974129,nm9974256,nm9990866,nm9993616
title_not_case_sensitive,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
le petit colonel,tt0016029,The Little Colonel,the little colonel,Le petit colonel,le petit colonel,1935,81.0,7.0,1775.0,0,...,0,0,0,0,0,0,0,0,0,0
la reine kelly,tt0020298,Queen Kelly,queen kelly,La reine Kelly,la reine kelly,1932,101.0,7.1,3301.0,0,...,0,0,0,0,0,0,0,0,0,0
le petit cesar,tt0021079,Little Caesar,little caesar,Le petit César,le petit cesar,1931,79.0,7.2,13810.0,1,...,0,0,0,0,0,0,0,0,0,0
mary,tt0021128,Mary,mary,Mary,mary,1931,78.0,5.8,919.0,0,...,0,0,0,0,0,0,0,0,0,0
le roman de renard,tt0021309,The Story of the Fox,the story of the fox,Le roman de Renard,le roman de renard,1937,63.0,7.7,1417.0,0,...,0,0,0,0,0,0,0,0,0,0


In [29]:
# Pour rendre les recommandations plus qualitatives on en enlève les films qui ont une note moyenne de moins de 4.
# La note de 4 a été choisi, après lecture de plusieurs avis sur des sites différents(alloCiné, Senscritique), globalement le 4 été ok mais pas sensationel
global_ml_good = global_ml[global_ml['averageRating'] >= 4]

In [30]:
# Vérification
global_ml_good.shape

(21314, 23595)

In [31]:
global_ml_good.isna().sum().max()

0

In [12]:
# Standardiser les données
# MinMaxScaler va mettre les données entre 0 et 1, de ce fait, les colonnes aux données binaires(genres et acteurs/actrices/directors), 
# à 1 auront plus de poid que les colonnes year, averge rate, runtimeminutes, qui sont moins importantes
# Un essai a été fait avec StandardScaler mais le résultat final n'était pas aussi précis qu'avec MinMaxScaler()

## Création d'un objet MinMaxScaler
scaler = MinMaxScaler()

In [17]:
# Séléction des colonnes à standadiser. Nous avons supprimé plus haut les colonnes dont avait plus besoin, 
# pour le ML on prendra donc toutes les données numériques
to_standardize = global_ml_good.select_dtypes('number')

In [18]:
# On applique l'objet MinMax au dataframe numérique
array_standardized = scaler.fit_transform(to_standardize)

In [19]:
# Transformer l'array en dataframe
df_scaled = pd.DataFrame(array_standardized)

In [20]:
# Vérification du DF final
df_scaled.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,23580,23581,23582,23583,23584,23585,23586,23587,23588,23589
0,0.043478,0.029372,0.5,0.00065,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.01087,0.056075,0.516667,0.00121,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.026702,0.533333,0.005069,1.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.025367,0.3,0.000336,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.065217,0.00534,0.616667,0.000518,0.0,1.0,1.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [21]:
# Mettre les films en index pour permettre le Machine Learning
df_scaled = df_scaled.set_index(to_standardize.index)

In [22]:
# vérification 
df_scaled.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,23580,23581,23582,23583,23584,23585,23586,23587,23588,23589
title_not_case_sensitive,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
le petit colonel,0.043478,0.029372,0.5,0.00065,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
la reine kelly,0.01087,0.056075,0.516667,0.00121,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
le petit cesar,0.0,0.026702,0.533333,0.005069,1.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
mary,0.0,0.025367,0.3,0.000336,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
le roman de renard,0.065217,0.00534,0.616667,0.000518,0.0,1.0,1.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [24]:
# Exporter le df standardisé en .pickle
df_scaled.to_pickle("./df_scaled.pkl")

In [4]:
# Lire le df standardisé
scaled_df = pd.read_pickle(r"C:\Users\emman\Desktop\Projet2-0305\BDD_ML\df_scaled.pkl")

In [5]:
scaled_df.shape

(21314, 23590)

In [6]:
# Enlever les colonnes qui ont une somme de 0, ce sont les acteurs qui n'apparaissent plus suite à la suppression des films de moins de 4 en note
column_sums = scaled_df.sum()
columns_to_delete = column_sums[column_sums == 0].index
df_ml = scaled_df.drop(columns_to_delete, axis=1)

In [7]:
# Vérification
df_ml.shape

(21314, 22980)

In [8]:
df_ml.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,23579,23580,23581,23582,23583,23585,23586,23587,23588,23589
title_not_case_sensitive,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
le petit colonel,0.043478,0.029372,0.5,0.00065,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
la reine kelly,0.01087,0.056075,0.516667,0.00121,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
le petit cesar,0.0,0.026702,0.533333,0.005069,1.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
mary,0.0,0.025367,0.3,0.000336,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
le roman de renard,0.065217,0.00534,0.616667,0.000518,0.0,1.0,1.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
## NOUS EXPORTONS NOTRE DF FINAL QUI SERVIRA AU MACHINE LEARNING

# Exporter en .pickle
df_ml.to_pickle("./df_ml.pkl")

In [97]:
# Lire le pickle final
df_ml_final = pd.read_pickle(r"C:\Users\emman\Desktop\Projet2-0305\BDD_ML\df_ml.pkl")

2 - Préparation du dataframe qui nous servira pour l'affichage du résultat final sur streamlit

In [138]:
# On récupère les colonnes du DF qu'on veut afficher : année durée titre note
global_ml_good.columns

Index(['tconst', 'primaryTitle', 'primaryTitle_not_case_sensitive', 'title',
       'title_not_case_sensitive', 'startYear', 'runtimeMinutes',
       'averageRating', 'numVotes', 'Action',
       ...
       'nm9937520', 'nm9950440', 'nm9953971', 'nm9957570', 'nm9958352',
       'nm9970487', 'nm9974129', 'nm9974256', 'nm9990866', 'nm9993616'],
      dtype='object', length=23595)

In [243]:
to_show = global_ml_good[['title', 'startYear','averageRating', 'runtimeMinutes']]

In [244]:
to_show.head()

Unnamed: 0_level_0,title,startYear,averageRating,runtimeMinutes
title_not_case_sensitive,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
le petit colonel,Le petit colonel,1935,7.0,81.0
la reine kelly,La reine Kelly,1932,7.1,101.0
le petit cesar,Le petit César,1931,7.2,79.0
mary,Mary,1931,5.8,78.0
le roman de renard,Le roman de Renard,1937,7.7,63.0


In [245]:
# Renomme les colonnes pour plus de précision
to_show = to_show.rename(columns = {'title': 'Titre', 'startYear': 'Annee_sortie', 'averageRating': 'Note_moyenne','runtimeMinutes': 'Duree_moyenne_min'})

In [246]:
to_show.head()

Unnamed: 0_level_0,Titre,Annee_sortie,Note_moyenne,Duree_moyenne_min
title_not_case_sensitive,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
le petit colonel,Le petit colonel,1935,7.0,81.0
la reine kelly,La reine Kelly,1932,7.1,101.0
le petit cesar,Le petit César,1931,7.2,79.0
mary,Mary,1931,5.8,78.0
le roman de renard,Le roman de Renard,1937,7.7,63.0


In [247]:
to_show.shape

(21314, 4)

In [248]:
to_show.info()

<class 'pandas.core.frame.DataFrame'>
Index: 21314 entries, le petit colonel to les sorcieres d'akelarre
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Titre              21314 non-null  object 
 1   Annee_sortie       21314 non-null  int64  
 2   Note_moyenne       21314 non-null  float64
 3   Duree_moyenne_min  21314 non-null  float64
dtypes: float64(2), int64(1), object(1)
memory usage: 1.3+ MB


In [249]:
# Transformer les années en str pour améliorer l'affichage sur streamlit (éviter la virgule qui sépare)
to_show['Annee_sortie'] = to_show['Annee_sortie'].apply(str)

In [250]:
to_show.info()

<class 'pandas.core.frame.DataFrame'>
Index: 21314 entries, le petit colonel to les sorcieres d'akelarre
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Titre              21314 non-null  object 
 1   Annee_sortie       21314 non-null  object 
 2   Note_moyenne       21314 non-null  float64
 3   Duree_moyenne_min  21314 non-null  float64
dtypes: float64(2), object(2)
memory usage: 1.3+ MB


In [251]:
to_show.head()

Unnamed: 0_level_0,Titre,Annee_sortie,Note_moyenne,Duree_moyenne_min
title_not_case_sensitive,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
le petit colonel,Le petit colonel,1935,7.0,81.0
la reine kelly,La reine Kelly,1932,7.1,101.0
le petit cesar,Le petit César,1931,7.2,79.0
mary,Mary,1931,5.8,78.0
le roman de renard,Le roman de Renard,1937,7.7,63.0


In [252]:
# enregistrer le df to show
to_show.to_pickle("./to_show.pkl")

In [61]:
to_show_ml = pd.read_pickle(r'C:\Users\emman\Desktop\Projet2-0305\BDD_ML\to_show.pkl')

3 -  Début du Machine Learning

In [52]:
# 1 - Selectionne les colonnes qui rentreront dans l'apprentissage du Machine Learning.
X = df_ml_final.select_dtypes('number')

In [8]:
# 2 - Instancier le modèle des proches voisins. 
modelNN = NearestNeighbors(metric='cosine', n_jobs=-1)

Explication des arguments : 
metric = 'cosine', la mesure de similarité utilisée pour calculer la distance entre les données. La mesure de similarité utilisée est la distance cosinus. La distance cosinus mesure l'angle entre deux vecteurs.
n_jobs = -1, -1 signifie utiliser tous les processeurs. Permet d'accélérer le système de recommandations. 

Nous avons réalisé différents tests avant de choisir ces arguments, notamment : 
- hamming
- manhattan 
- euclide
Cosine restait le plus précis
Nous avons aussi essayé plusieurs algorithm, notamment : 
- brute
- ball-tree
Nous avons laissé en "auto", les tests n'étant pas concluant. 

In [9]:
# Entraîner le modèle 
modelNN.fit(X)

In [10]:
# On stock notre film sur lequel on veut faire des recommandations dans une variable
film_reference = X.loc["avatar"].to_frame().T

In [11]:
# On regarde les proches voisins 
neigh_dist, neigh_index = modelNN.kneighbors(
    film_reference,
    n_neighbors = 6
)

In [12]:
print(neigh_dist)

[[1.11022302e-16 9.22588932e-02 3.82782948e-01 3.96622750e-01
  3.98139849e-01 3.98984214e-01]]


In [13]:
# On récupérer uniquement les index, on ne prend pas le premier car il correspond au film référence. Résultat un array d'index
recommande = neigh_index[0][1:]
recommande

array([13456, 11293, 12700,  4765,  9397], dtype=int64)

In [14]:
# Localiser les recommandations dans X 
films_reco = X.iloc[recommande]

In [15]:
# Extraction de l'index que l'on met dans une liste pour l'affichage des films reco dans le dataframe non standadisé, plus clair pour l'affichage 
liste_film_reco = films_reco.index.to_list()

In [16]:
# Affichage de l'exemple
liste_film_reco

["avatar: la voie de l'eau",
 "zack snyder's justice league",
 'aquaman',
 'amazons',
 'le royaume interdit']

In [17]:
# Affichage des films recommandés
to_show_ml.loc[liste_film_reco].reset_index(drop=True)

Unnamed: 0,Titre,Annee_sortie,Note_moyenne,Duree_moyenne
0,Avatar: la voie de l'eau,2022,7.7,192.0
1,Zack Snyder's Justice League,2021,8.0,242.0
2,Aquaman,2018,6.8,143.0
3,Amazons,1986,4.2,76.0
4,Le Royaume interdit,2008,6.5,104.0
