# Algorithmes de recommandation
---

## Preparation des donnees

In [522]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import re

In [523]:
data_films = pd.read_csv('Datasets/ml-latest-small/movies.csv')
data_notes = pd.read_csv('Datasets/ml-latest-small/ratings.csv')
data_tags = pd.read_csv('Datasets/ml-latest-small/tags.csv')
# data_links = pd.read_csv('Datasets/ml-latest-small/links.csv', index_col=0)

In [524]:
data_films.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [525]:
data_notes.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [526]:
data_tags.head()

Unnamed: 0,userId,movieId,tag,timestamp
0,2,60756,funny,1445714994
1,2,60756,Highly quotable,1445714996
2,2,60756,will ferrell,1445714992
3,2,89774,Boxing story,1445715207
4,2,89774,MMA,1445715200


In [527]:
genres = list(data_films['genres'])
genres = [ genre.split('|') for genre in genres ]
genres = [ genre for subgenre in genres for genre in subgenre if genre != '(no genres listed)']
genres = set(genres)
genres

{'Action',
 'Adventure',
 'Animation',
 'Children',
 'Comedy',
 'Crime',
 'Documentary',
 'Drama',
 'Fantasy',
 'Film-Noir',
 'Horror',
 'IMAX',
 'Musical',
 'Mystery',
 'Romance',
 'Sci-Fi',
 'Thriller',
 'War',
 'Western'}

for index, film_genres in enumerate(data_films['genres']):
    # print(index, film_genres)
    _genres = film_genres.split('|')
    for genre in genres:
        data_films.loc[index, genre] = 1 if genre in _genres else 0

In [528]:
data_films = data_films.join(data_films.genres.str.get_dummies('|'))

In [529]:
data_films.tail()

Unnamed: 0,movieId,title,genres,(no genres listed),Action,Adventure,Animation,Children,Comedy,Crime,...,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy,0,1,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy,0,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
9739,193585,Flint (2017),Drama,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation,0,1,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9741,193609,Andrew Dice Clay: Dice Rules (1991),Comedy,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [530]:
new = data_films.title.str.extract('(?P<title>.*?) \((?P<date>\d{4})\)$')

In [531]:
data_films['title'] = new['title']
data_films['date'] = new['date']

In [532]:
data_films.head()

Unnamed: 0,movieId,title,genres,(no genres listed),Action,Adventure,Animation,Children,Comedy,Crime,...,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western,date
0,1,Toy Story,Adventure|Animation|Children|Comedy|Fantasy,0,0,1,1,1,1,0,...,0,0,0,0,0,0,0,0,0,1995
1,2,Jumanji,Adventure|Children|Fantasy,0,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1995
2,3,Grumpier Old Men,Comedy|Romance,0,0,0,0,0,1,0,...,0,0,0,0,1,0,0,0,0,1995
3,4,Waiting to Exhale,Comedy|Drama|Romance,0,0,0,0,0,1,0,...,0,0,0,0,1,0,0,0,0,1995
4,5,Father of the Bride Part II,Comedy,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,1995


In [533]:
data_films.drop('genres', axis='columns', inplace=True)
data_films.drop('(no genres listed)', axis='columns', inplace=True)

In [534]:
data_films.to_csv('Datasets/movies_clean.csv', index=False)

In [535]:
data_films.tail()

Unnamed: 0,movieId,title,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,...,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western,date
9737,193581,Black Butler: Book of the Atlantic,1,0,1,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,2017
9738,193583,No Game No Life: Zero,0,0,1,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,2017
9739,193585,Flint,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,2017
9740,193587,Bungo Stray Dogs: Dead Apple,1,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2018
9741,193609,Andrew Dice Clay: Dice Rules,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1991


In [536]:
from sklearn.metrics.pairwise import cosine_similarity

# Calcul du coefficient de similarite
cos_sim = cosine_similarity(data_films.iloc[:,3:-1])

#### Top 5 des films les plus similaires a Toy Story

In [537]:
ts_top5 = np.argsort(cos_sim[0])[-5:][::-1]

In [538]:
data_films.iloc[ts_top5]

Unnamed: 0,movieId,title,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,...,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western,date
0,1,Toy Story,0,1,1,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1995
3000,4016,"Emperor's New Groove, The",0,1,1,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,2000
6948,65577,"Tale of Despereaux, The",0,1,1,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,2008
9430,166461,Moana,0,1,1,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,2016
6486,53121,Shrek the Third,0,1,1,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,2007


#### Normalisation des notes

In [539]:
mean_rating = data_notes['rating'].mean()

pref_matrix = data_notes[['userId', 'movieId', 'rating']].pivot(index='userId', columns='movieId', values='rating')

pref_matrix = pref_matrix - mean_rating

item_mean_rating = pref_matrix.mean(axis=0)
pref_matrix = pref_matrix - item_mean_rating

user_mean_rating = pref_matrix.mean(axis=1)
pref_matrix = pref_matrix - user_mean_rating


#### Utilisateur le plus proche de l'utilisateur 0

In [540]:
mat = pref_matrix.values
mat[0,:]

array([-0.73008043,         nan,  1.88952507, ...,         nan,
               nan,         nan])

In [541]:
k = 0
np.nansum((mat - mat[k,:])**2, axis=1).reshape(-1,1)
np.nansum((mat - mat[0,:])**2, axis=1)[1:].argmin()

11

In [542]:
np.nansum(mat[12]-mat[0])

0.0

In [543]:
np.where(~np.isnan(mat[12]) & np.isnan(mat[0]))

(array([304, 596], dtype=int64),)

In [544]:
mat[12][[304,596]]

array([-2.13265214, -0.89476547])

#### L'utilisateur 12 n'a pas aimé les 2 films que l'utilisateur 0 n'a pas vu

## Filtrage Collaboratif

In [545]:
data_films = pd.read_csv('Datasets/ml-latest-small/movies.csv')
data_notes = pd.read_csv('Datasets/ml-latest-small/ratings.csv')

In [546]:
notes_counts = data_notes.groupby('movieId')['rating'].count().sort_values(ascending=False)

In [547]:
notes_counts.head()

movieId
356     329
318     317
296     307
593     279
2571    278
Name: rating, dtype: int64

In [548]:
top = 500
notes = data_notes[data_notes['movieId'].isin((notes_counts).index[0:top])]
notes = notes.set_index(['movieId', 'userId'])

In [549]:
notes.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,rating,timestamp
movieId,userId,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1,4.0,964982703
3,1,4.0,964981247
6,1,4.0,964982224
47,1,5.0,964983815
50,1,5.0,964982931


In [550]:
prefs = notes['rating']

mean_0 = notes['rating'].mean()
prefs = prefs - mean_0

mean_i = prefs.groupby("movieId").mean()
prefs = prefs - mean_i

mean_u = prefs.groupby("userId").mean()
prefs = prefs - mean_u

pref_matrix = prefs.reset_index()[['userId', 'movieId', 'rating']].pivot(index='userId', columns='movieId', values='rating')

In [624]:
pref_matrix

movieId,1,2,3,5,6,7,10,11,16,17,...,96079,99114,106782,109374,109487,112852,116797,122882,122904,134130
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-0.581698,,0.079616,,-0.606847,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,-0.384519,1.142476,,-0.934008,,,1.239994,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,0.251673,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,-1.380050,,,,,-0.644305,,-1.130549,,0.26476,...,,,,,,,,,,
607,-0.000190,,,,,,,-0.750689,,,...,,,,,,,,,,
608,-1.171272,-1.18216,-1.009958,,,,0.753446,,0.822829,,...,,,,,,,,,,
609,-0.613812,,,,,,0.810906,,,,...,,,,,,,,,,


In [551]:
pref_matrix.fillna(0).values.min()

-4.393527454986408

In [552]:
pref_matrix.fillna(0).values.max()

3.248493836391921

### Factorisation de la matrice et utilisation de facteurs latents

In [553]:
import tensorflow as tf

In [554]:
from keras.layers import Input, Dense, Lambda
from keras.models import Model, load_model as keras_load_model
from keras import losses
from keras.callbacks import EarlyStopping

ENCODING_DIM = 25
ITEM_COUNT = 500

### Construction du recommender

In [555]:
input_layer = Input(shape=(ITEM_COUNT,))
# compression
encoded_layer = Dense(ENCODING_DIM, activation='linear', use_bias=False)(input_layer)
# decompression
decoded_layer = Dense(ITEM_COUNT, activation='linear', use_bias=False)(encoded_layer)

# Definition de sous-modèles

recommender = Model(input_layer, decoded_layer)

encoder = Model(input_layer, encoded_layer)

encoded_input = Input(shape=(ENCODING_DIM, ))
decoder = Model(encoded_input, recommender.layers[-1](encoded_input))

### Personnalisation de la fonction d'erreur

In [556]:
def lambda_mse(frac=0.8):
    """
    Fonction de coût spécialisée pour le modèle de recommandation.
    
    :param frac: Importance a donner aux nouvelles notes
    :return: Une fonction de cout a utiliser dans une couche Lambda
    """
    
    def lossfunc(xarray):
        x_in, y_true, y_pred = xarray
        zeros = tf.zeros_like(y_true)
        
        new_mask = tf.not_equal(x_in, y_true)
        known_mask = tf.not_equal(x_in, zeros)
        
        y_true_1 = tf.boolean_mask(y_true, new_mask)
        y_pred_1 = tf.boolean_mask(y_pred, new_mask)
        
        y_true_2 = tf.boolean_mask(y_true, known_mask)
        y_pred_2 = tf.boolean_mask(y_pred, known_mask)
        
        unknown_loss = losses.mean_squared_error(y_true, y_pred)
        known_loss = losses.mean_squared_error(y_true_2, y_pred_2)
        
        # On enleve les NaN
        unknown_loss = tf.where(tf.math.is_nan(unknown_loss), 0.0, unknown_loss)
        
        return frac*unknown_loss + (1.0 - frac) * known_loss
    
    return lossfunc

In [557]:
def final_loss(y_true, y_pred):
    """
    Support de Tensorflow.
    
    :param y_true: true value (pas utilisée mais demandée par Keras)
    :param y_pred: predicted value
    :return: y_pred
    """
    return y_pred

In [558]:
original_inputs = recommender.input
y_true_inputs = Input(shape=(ITEM_COUNT, ))

original_outputs = recommender.output
loss = Lambda(lambda_mse(0.8))([original_inputs, y_true_inputs, original_outputs])

wrapper_model = Model(inputs=[original_inputs, y_true_inputs], outputs=[loss])
wrapper_model.compile(optimizer='adadelta', loss=final_loss)

### Entrainement du modèle

#### Generation de données d'entrainement

In [559]:
def generate(pref_matrix, batch_size=64, mask_fraction=0.2):
    """
    Genère des triplets d'entrainement pour ce dataset.
    
    :param batch_size: Taille de chaque batch d'entrainement
    :param mask_fraction: Fraction de notes a cacher.
    :return: Un générateur qui renvoie des tuples de la forme ([X, y], zeros) de taille batch_size.
             X et y sont utilisés en entrée de notre recommender par la suite.
    
    """
    
    def select_and_mask(frac):
        def applier(row):
            row = row.copy()
            idx = np.where(row != 0)[0]
            if len(idx) > 0:
                masked = np.random.choice(idx, size=(int)(frac*len(idx)), replace=False)
                row[masked] = 0
            return row
        return applier
    
    indices = np.arange(pref_matrix.shape[0])
    batches_per_epoch = int(np.floor(len(indices)/batch_size))
    
    while True:
        np.random.shuffle(indices)
        
        for batch in range(batches_per_epoch):
            idx = indices[batch * batch_size:(batch+1)*batch_size]
            
            y = np.array(pref_matrix[idx,:])
            X = np.apply_along_axis(select_and_mask(frac=mask_fraction), axis=1, arr=y)
            
            yield [X, y], np.zeros(batch_size)

In [560]:
[X, y], _ = next(generate(pref_matrix.fillna(0).values))

In [561]:
len(X[X != 0])/len(y[y != 0])

0.8055446323240267

In [562]:
def fit(wrapper_model, pref_matrix, batch_size=64, mask_fraction=0.2, epochs=1, verbose=1, patience=0):
    stopper = EarlyStopping(monitor="loss", min_delta=0.00001, patience=patience, verbose=verbose)
    batches_per_epoch = int(np.floor(pref_matrix.shape[0]/batch_size))
    
    generator = generate(pref_matrix, batch_size, mask_fraction)
    
    history = wrapper_model.fit_generator(generator,
                                         steps_per_epoch=batches_per_epoch,
                                         epochs=epochs,
                                         callbacks= [stopper] if patience > 0 else []
                                         )
    return history

In [563]:
# On arrete l'entrainement apres 3 epochs sans amélioration
fit(wrapper_model, pref_matrix.fillna(0).values, batch_size=125, epochs=100, patience = 0)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.callbacks.History at 0x1ba0f3c1348>

### Prediction des notes

In [761]:
def predict(ratings, recommender, mean_0, mean_i):
    
    
    ratings = ratings['rating']
    ratings = ratings - mean_0
    ratings = ratings - mean_i
    mean_u = ratings.groupby('userId').mean()
    ratings = ratings - mean_u
   
    
    ids = pd.DataFrame(notes_counts[0:500].index)
    ratings = pd.DataFrame(ratings)
    ids = ids.merge(ratings.reset_index(), on='movieId', how='outer')
    
    pref_mat = ids[['userId', 'movieId', 'rating']].pivot(index='userId', columns='movieId', values='rating')
    pref_mat = pref_mat[1:]
    X = pref_mat.fillna(0).values
    y = recommender.predict(X)
    
    output = pd.DataFrame(y, index=pref_mat.index, columns=pref_mat.columns)
    
    output = output.add(mean_u, axis=0)
    output = output.add(mean_i, axis=1)
    output = output.add(mean_0)

    output = output.transpose()
    
    return output

In [719]:
sample_ratings = pd.DataFrame([
    {"userId": 1, "movieId": 2858, "rating": 1}, # american beauty
    {"userId": 1, "movieId": 225, "rating": 5},  # star wars
    {"userId": 1, "movieId": 480, "rating": 5},  # jurassic park
    {"userId": 1, "movieId": 593, "rating": 2},  # silence of the lambs
    {"userId": 1, "movieId": 2396, "rating": 2}, # shakespeare in love
    {"userId": 1, "movieId": 1197, "rating": 5}  # princess bride
]).set_index(["movieId", "userId"])

In [720]:
sample_ratings

Unnamed: 0_level_0,Unnamed: 1_level_0,rating
movieId,userId,Unnamed: 2_level_1
2858,1,1
225,1,5
480,1,5
593,1,2
2396,1,2
1197,1,5


#### Prevision des top 10 notes pour cet utilisateur

In [721]:
ratings = sample_ratings['rating']
ratings = ratings - mean_0
ratings = ratings - mean_i
ratings

movieId  userId
2858     1        -3.056373
225      1         1.461538
480      1         1.250000
593      1        -2.161290
2396     1        -1.777174
1197     1         0.767606
Name: rating, dtype: float64

In [722]:
mean_u = sample_ratings.groupby('userId').mean()['rating']
mean_u

userId
1    3.333333
Name: rating, dtype: float64

In [723]:
ratings = ratings - mean_u
ratings

movieId  userId
2858     1        -6.389706
225      1        -1.871795
480      1        -2.083333
593      1        -5.494624
2396     1        -5.110507
1197     1        -2.565728
Name: rating, dtype: float64

In [724]:
ids = pd.DataFrame(notes_counts[0:500].index)
ids

Unnamed: 0,movieId
0,356
1,318
2,296
3,593
4,2571
...,...
495,5060
496,47099
497,7254
498,4085


In [725]:
ratings = pd.DataFrame(ratings)
ids = ids.merge(ratings.reset_index(), on='movieId', how='outer')

In [726]:
pref_mat = ids[['userId', 'movieId', 'rating']].pivot(index='userId', columns='movieId', values='rating')

In [727]:
pref_mat = pref_mat[1:]
pref_mat

movieId,1,2,3,5,6,7,10,11,16,17,...,96079,99114,106782,109374,109487,112852,116797,122882,122904,134130
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1.0,,,,,,,,,,,...,,,,,,,,,,


In [728]:
X = pref_mat.fillna(0).values
y = recommender.predict(X)

In [733]:
output = pd.DataFrame(y, index=pref_mat.index, columns=pref_mat.columns)
    
output = output.add(mean_u, axis=0)
output = output.add(mean_i, axis=1)

output = output.transpose()

In [745]:
preds = output.sort_values(by=1, ascending=False).head(10)
preds["title"] = data_films[data_films.movieId.isin(preds.index)].title.values
preds

userId,1.0,title
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
2324,4.277583,Star Wars: Episode IV - A New Hope (1977)
6016,4.217945,Rear Window (1954)
904,4.140124,North by Northwest (1959)
1198,4.069198,Raiders of the Lost Ark (Indiana Jones and the...
58559,4.0671,Cool Hand Luke (1967)
7153,4.010831,Young Frankenstein (1974)
260,4.007597,Life Is Beautiful (La Vita è bella) (1997)
1278,3.993979,City of God (Cidade de Deus) (2002)
908,3.987057,"Lord of the Rings: The Return of the King, The..."
1276,3.971955,"Dark Knight, The (2008)"


In [751]:
sample_ratings_2 = pd.DataFrame([
    {"userId": 1, "movieId": 2858, "rating": 5}, # american beauty
    {"userId": 1, "movieId": 260, "rating": 1},  # star wars
    {"userId": 1, "movieId": 480, "rating": 1},  # jurassic park
    {"userId": 1, "movieId": 593, "rating": 4},  # silence of the lambs
    {"userId": 1, "movieId": 2396, "rating": 3}, # shakespeare in love
    {"userId": 1, "movieId": 1197, "rating": 1}  # princess bride
]).set_index(["movieId", "userId"])

In [765]:
y = predict(sample_ratings_2, recommender, mean_0,  mean_i)

In [766]:
y

userId,1.0
movieId,Unnamed: 1_level_1
1,2.294276
2,1.828054
3,1.693945
5,1.620913
6,2.443799
...,...
112852,2.525074
116797,2.386933
122882,2.141249
122904,2.206899


In [767]:
preds = y.sort_values(by=1, ascending=False).head(10)
preds["title"] = data_films[data_films.movieId.isin(preds.index)].title.values
preds

userId,1.0,title
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
296,3.039184,"Usual Suspects, The (1995)"
318,3.008782,Pulp Fiction (1994)
858,2.927386,"Shawshank Redemption, The (1994)"
2959,2.926354,Forrest Gump (1994)
50,2.876121,"Silence of the Lambs, The (1991)"
593,2.834388,"Godfather, The (1972)"
2329,2.834052,"Godfather: Part II, The (1974)"
2858,2.810937,American History X (1998)
1221,2.785413,American Beauty (1999)
356,2.773975,Fight Club (1999)
