# Table of content:
- [Import libraries](#0)
- [Load the Dataset](#1)
- [Preprocessing](#2)
- [Building the Models](#3)
    - [Content-based Filtering](#4)
    - [Collaborative Filtering](#5)
- [Reference](#6)

# Import libraries <a id = "0"></a> 

In [101]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
import seaborn as sns

from sklearn.cluster import AgglomerativeClustering, DBSCAN, KMeans
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler

import warnings
warnings.filterwarnings("ignore")

# Load the Dataset <a id = "1"></a>

<a href = 'https://www.kaggle.com/ayushimishra2809/movielens-dataset'>Link to the dataset in Kaggle.</a>

- **In movies.csv:**
    - movieId: The ID of the movie.	
    - title: The title of the movie
    - genres: Movie genres.

- **In ratings.csv:**
    - userId: The ID of the use.
    - movieId: The ID of the movie.
    - rating: The rating the user gave the movie.	
    - timestamp: The time the movie was rated.

In [102]:
movies_df = pd.read_csv('movies.csv')
print(f"Shape of movies dataset: {movies_df.shape}")
movies_df.head()

Shape of movies dataset: (9742, 3)


Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [103]:
print(f"Null values in movies dataset:\n{movies_df.isnull().sum()}")
print("-" * 15)
movies_df.info() 

Null values in movies dataset:
movieId    0
title      0
genres     0
dtype: int64
---------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9742 entries, 0 to 9741
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   movieId  9742 non-null   int64 
 1   title    9742 non-null   object
 2   genres   9742 non-null   object
dtypes: int64(1), object(2)
memory usage: 228.5+ KB


In [104]:
ratings_df = pd.read_csv('ratings.csv')
print(f"Shape of ratings dataset: {ratings_df.shape}")
ratings_df.head()

Shape of ratings dataset: (100836, 4)


Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [105]:
print(f"Null values in movies dataset:\n{ratings_df.isnull().sum()}")
print("-" * 15)
ratings_df.info() 

Null values in movies dataset:
userId       0
movieId      0
rating       0
timestamp    0
dtype: int64
---------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100836 entries, 0 to 100835
Data columns (total 4 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   userId     100836 non-null  int64  
 1   movieId    100836 non-null  int64  
 2   rating     100836 non-null  float64
 3   timestamp  100836 non-null  int64  
dtypes: float64(1), int64(3)
memory usage: 3.1 MB


# Preprocessing <a id = "2"></a>

In [106]:
movies_with_year = movies_df.copy()

movies_with_year['year'] = movies_df['title'].str.extract("(\(\d\d\d\d\))", expand = True)
movies_with_year['year'] = movies_with_year['year'].str.extract("(\d\d\d\d)", expand = True)
movies_with_year['year'] = pd.to_numeric(movies_with_year['year'], downcast = 'float' )

movies_with_year['title'] = movies_with_year['title'].str.replace("(\(\d\d\d\d\))", "")
movies_with_year['title'] = movies_with_year['title'].str.strip()

movies_with_year['genres'] = movies_with_year['genres'].str.split("|")

movies_with_year.head()

Unnamed: 0,movieId,title,genres,year
0,1,Toy Story,"[Adventure, Animation, Children, Comedy, Fantasy]",1995.0
1,2,Jumanji,"[Adventure, Children, Fantasy]",1995.0
2,3,Grumpier Old Men,"[Comedy, Romance]",1995.0
3,4,Waiting to Exhale,"[Comedy, Drama, Romance]",1995.0
4,5,Father of the Bride Part II,[Comedy],1995.0


In [107]:
movies_with_year_genres = movies_with_year.copy()
for index, row in movies_with_year_genres.iterrows():
    for genres in row['genres']:
        movies_with_year_genres.at[index, genres] = 1.0   
        
movies_with_year_genres.fillna(0.0, inplace = True)        
movies_with_year_genres.head()      

Unnamed: 0,movieId,title,genres,year,Adventure,Animation,Children,Comedy,Fantasy,Romance,Drama,Action,Crime,Thriller,Horror,Mystery,Sci-Fi,War,Musical,Documentary,IMAX,Western,Film-Noir,(no genres listed)
0,1,Toy Story,"[Adventure, Animation, Children, Comedy, Fantasy]",1995.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,Jumanji,"[Adventure, Children, Fantasy]",1995.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3,Grumpier Old Men,"[Comedy, Romance]",1995.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4,Waiting to Exhale,"[Comedy, Drama, Romance]",1995.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5,Father of the Bride Part II,[Comedy],1995.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [108]:
genres_table = movies_with_year_genres.drop(columns = ['title', 'genres', 'year'])
genres_table.set_index('movieId', inplace = True)
genres_list = genres_table.columns
genres_table.head()

Unnamed: 0_level_0,Adventure,Animation,Children,Comedy,Fantasy,Romance,Drama,Action,Crime,Thriller,Horror,Mystery,Sci-Fi,War,Musical,Documentary,IMAX,Western,Film-Noir,(no genres listed)
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [109]:
genres_weighted_with_ratings = ratings_df.merge(genres_table, on = 'movieId', copy = False)
genres_weighted_with_ratings.drop(columns = 'timestamp', inplace = True)

genres_weighted_with_ratings[list(genres_list)] = genres_weighted_with_ratings[list(genres_list)].multiply(genres_weighted_with_ratings['rating'],
                                                                                                           axis = 0)
genres_weighted_with_ratings.head()

Unnamed: 0,userId,movieId,rating,Adventure,Animation,Children,Comedy,Fantasy,Romance,Drama,Action,Crime,Thriller,Horror,Mystery,Sci-Fi,War,Musical,Documentary,IMAX,Western,Film-Noir,(no genres listed)
0,1,1,4.0,4.0,4.0,4.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,5,1,4.0,4.0,4.0,4.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,7,1,4.5,4.5,4.5,4.5,4.5,4.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,15,1,2.5,2.5,2.5,2.5,2.5,2.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,17,1,4.5,4.5,4.5,4.5,4.5,4.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [110]:
users_profile = genres_weighted_with_ratings.groupby(by = ['userId']).mean()
users_profile.drop(columns = 'movieId', inplace = True)
print(f"Number of users(profile): {users_profile.shape[0]}")
users_profile.head()

Number of users(profile): 610


Unnamed: 0_level_0,rating,Adventure,Animation,Children,Comedy,Fantasy,Romance,Drama,Action,Crime,Thriller,Horror,Mystery,Sci-Fi,War,Musical,Documentary,IMAX,Western,Film-Noir,(no genres listed)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.366379,1.607759,0.586207,0.823276,1.530172,0.87069,0.482759,1.327586,1.676724,0.844828,0.982759,0.25431,0.323276,0.728448,0.426724,0.443966,0.0,0.0,0.12931,0.021552,0.0
2,3.948276,0.431034,0.0,0.0,0.965517,0.0,0.155172,2.275862,1.5,1.310345,1.275862,0.103448,0.275862,0.534483,0.155172,0.0,0.448276,0.517241,0.12069,0.0,0.0
3,2.435897,0.769231,0.051282,0.064103,0.230769,0.346154,0.064103,0.307692,1.282051,0.025641,0.74359,0.961538,0.128205,1.615385,0.064103,0.012821,0.0,0.0,0.0,0.0,0.0
4,3.555556,0.490741,0.111111,0.175926,1.689815,0.324074,0.907407,1.935185,0.384259,0.476852,0.625,0.078704,0.37037,0.157407,0.115741,0.296296,0.037037,0.013889,0.175926,0.074074,0.0
5,3.636364,0.590909,0.590909,0.840909,1.181818,0.659091,0.772727,2.159091,0.636364,1.045455,0.727273,0.068182,0.090909,0.113636,0.227273,0.5,0.0,0.25,0.136364,0.0,0.0


# Building the Models <a id = "3"></a>

## Content-based Filtering <a id = "4"></a>
Content-based filtering uses item features to recommend other items similar to what the user likes, based on their previous actions or explicit feedback.

- **Advantages** 
   - The model doesn't need any data about other users, since the recommendations are specific to this user. This makes it easier to scale to a large number of users.
   - The model can capture the specific interests of a user, and can recommend niche items that very few other users are interested in.

- **Disadvantages**
    - Since the feature representation of the items are hand-engineered to some extent, this technique requires a lot of domain knowledge. Therefore, the model can only be as good as the hand-engineered features.
    - The model can only make recommendations based on existing interests of the user. In other words, the model has limited ability to expand on the users' existing interests.

In [111]:
def movie_rating(movies, ratings):
    input_movies = pd.DataFrame({
        'title': movies,
        'rating': ratings
        })
    
    return input_movies

def input_movie_rating(movies, ratings):
    movie_rating_df = movie_rating(movies, ratings)
    
    idmovie_rating_df = movies_with_year_genres[movies_with_year_genres['title'].isin(movies)]
    user_movie_genres = movie_rating_df.merge(idmovie_rating_df, on = 'title')
    user_movie_genres.drop(columns = ['genres', 'year'], inplace = True)
    user_movie_genres.set_index('movieId', inplace = True)
    
    return user_movie_genres

In [112]:
my_rating = input_movie_rating(['Rio Bravo', 'Vertigo', 'Modern Times', 
                                'To Be or Not to Be', 'Some Like It Hot'], 
                               [5.0, 5.0, 5.0, 5.0, 5.0])
my_rating

Unnamed: 0_level_0,title,rating,Adventure,Animation,Children,Comedy,Fantasy,Romance,Drama,Action,Crime,Thriller,Horror,Mystery,Sci-Fi,War,Musical,Documentary,IMAX,Western,Film-Noir,(no genres listed)
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
4329,Rio Bravo,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
903,Vertigo,5.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3462,Modern Times,5.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
946,To Be or Not to Be,5.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
910,Some Like It Hot,5.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [113]:
my_rating_by_genres = my_rating.copy()
my_rating_by_genres.drop(columns = ['title', 'rating'], inplace = True)
my_rating_by_genres = my_rating_by_genres.multiply(my_rating['rating'], axis = 'index')
my_profile = my_rating_by_genres.mean()
my_profile

Adventure             0.0
Animation             0.0
Children              0.0
Comedy                3.0
Fantasy               0.0
Romance               2.0
Drama                 3.0
Action                0.0
Crime                 1.0
Thriller              1.0
Horror                0.0
Mystery               1.0
Sci-Fi                0.0
War                   1.0
Musical               0.0
Documentary           0.0
IMAX                  0.0
Western               1.0
Film-Noir             0.0
(no genres listed)    0.0
dtype: float64

In [114]:
id_movies_recom = genres_table.multiply(my_profile, axis = 1).sum(axis = 1).sort_values(ascending = False)
id_movies_recom.keys()
movies_df[movies_df['movieId'].isin(id_movies_recom.head(10).keys().to_list())]

Unnamed: 0,movieId,title,genres
743,970,Beat the Devil (1953),Adventure|Comedy|Crime|Drama|Romance
1394,1912,Out of Sight (1998),Comedy|Crime|Drama|Romance|Thriller
2903,3893,Nurse Betty (2000),Comedy|Crime|Drama|Romance|Thriller
3460,4719,Osmosis Jones (2001),Action|Animation|Comedy|Crime|Drama|Romance|Th...
4655,6954,"Barbarian Invasions, The (Les invasions barbar...",Comedy|Crime|Drama|Mystery|Romance
5029,7831,Another Thin Man (1939),Comedy|Crime|Drama|Mystery|Romance
5033,7835,Song of the Thin Man (1947),Comedy|Crime|Drama|Musical|Mystery|Romance
7441,81132,Rubber (2010),Action|Adventure|Comedy|Crime|Drama|Film-Noir|...
8570,116799,Inherent Vice (2014),Comedy|Crime|Drama|Mystery|Romance
9106,144606,Confessions of a Dangerous Mind (2002),Comedy|Crime|Drama|Romance|Thriller


## Collaborative Filtering <a id = "5"></a>
To address some of the limitations of content-based filtering, collaborative filtering uses similarities between users and items simultaneously to provide recommendations. This allows for serendipitous recommendations; that is, collaborative filtering models can recommend an item to user A based on the interests of a similar user B. Furthermore, the embeddings can be learned automatically, without relying on hand-engineering of features.

- **Advantages** 
   - We don't need domain knowledge because the embeddings are automatically learned.
   - The model can help users discover new interests. In isolation, the ML system may not know the user is interested in a given item, but the model might still recommend it because similar users are interested in that item.
   - To some extent, the system needs only the feedback matrix to train a matrix factorization model. In particular, the system doesn't need contextual features. In practice, this can be used as one of multiple candidate generators.

- **Disadvantages**
    - The prediction of the model for a given (user, item) pair is the dot product of the corresponding embeddings. So, if an item is not seen during training, the system can't create an embedding for it and can't query the model with this item. This issue is often called the cold-start problem.
    - Side features are any features beyond the query or item ID. For movie recommendations, the side features might include country or age. Including available side features improves the quality of the model.

In [115]:
def input_movie_rating_without_genres(movies, ratings):
    movie_rating_df = movie_rating(movies, ratings)
    
    idmovie_rating_df = movies_with_year_genres[movies_with_year_genres['title'].isin(movies)]
    user_movie_genres = movie_rating_df.merge(idmovie_rating_df, on = 'title')
    user_movie_genres.drop(columns = ['genres', 'year'], inplace = True)
    user_movie_genres.drop(columns = genres_list.to_list(), inplace = True)
            
    return user_movie_genres

In [116]:
my_new_rating = input_movie_rating_without_genres(['Rio Bravo', 'Vertigo', 'Modern Times',
                                                   'To Be or Not to Be', 'Some Like It Hot'], 
                                                  [5.0, 4.5, 4.0, 4.5, 4.0])
my_new_rating

Unnamed: 0,title,rating,movieId
0,Rio Bravo,5.0,4329
1,Vertigo,4.5,903
2,Modern Times,4.0,3462
3,To Be or Not to Be,4.5,946
4,Some Like It Hot,4.0,910


In [117]:
rating_without_time = ratings_df.drop(columns = ['timestamp'])

users_with_shared_movies = rating_without_time[rating_without_time['movieId'].isin(my_new_rating['movieId'])]
user_groups_with_shared_movies = users_with_shared_movies.groupby('userId')
user_groups_with_shared_movies = sorted(user_groups_with_shared_movies,
                                        key = lambda x: len(x[1]), reverse = True)
user_groups_with_shared_movies[0] #101 users

(89,        userId  movieId  rating
 13565      89      910     1.5
 13572      89      946     0.5
 13629      89     3462     1.0)

<h3 align="left"> Pearson Correlation <h3/>

![alt text](https://wikimedia.org/api/rest_v1/media/math/render/svg/bd1ccc2979b0fd1c1aec96e386f686ae874f9ec0 "Pearson Correlation")

In [118]:
pearsonCorrelationDict = {}

for name, group in user_groups_with_shared_movies:
    
    group = group.sort_values(by='movieId')
    my_new_rating = my_new_rating.sort_values(by='movieId')
    
    nRatings = len(group)
  
    temp_df = my_new_rating[my_new_rating['movieId'].isin(group['movieId'].tolist())]
    tempRatingList = temp_df['rating'].tolist()
    tempGroupList = group['rating'].tolist()

    Sxx = sum([i**2 for i in tempRatingList]) - pow(sum(tempRatingList),2)/float(nRatings)
    Syy = sum([i**2 for i in tempGroupList]) - pow(sum(tempGroupList),2)/float(nRatings)
    Sxy = sum( i*j for i, j in zip(tempRatingList, tempGroupList)) - sum(tempRatingList)*sum(tempGroupList)/float(nRatings)
    

    if Sxx != 0 and Syy != 0:
        pearsonCorrelationDict[name] = Sxy/np.sqrt(Sxx*Syy)
    else:
        pearsonCorrelationDict[name] = 0      

In [119]:
pearson_df = pd.DataFrame.from_dict(pearsonCorrelationDict, orient = 'index')
pearson_df.columns = ['similarity index']
pearson_df['userId'] = pearson_df.index
pearson_df.index = range(len(pearson_df))
top_users = pearson_df.sort_values(by = 'similarity index', ascending = False)[0:10]
top_users

Unnamed: 0,similarity index,userId
9,1.0,50
8,1.0,19
21,1.0,275
15,1.0,168
23,1.0,387
19,1.0,221
20,1.0,260
30,1.0,603
4,0.866025,469
5,0.5,474


In [120]:
top_users_rating = top_users.merge(ratings_df, on = 'userId')
top_users_rating['weighted rating'] = top_users_rating['similarity index'] * top_users_rating['rating']
top_users_rating.head()

Unnamed: 0,similarity index,userId,movieId,rating,timestamp,weighted rating
0,1.0,50,1,3.0,1514238116,3.0
1,1.0,50,32,3.0,1523740563,3.0
2,1.0,50,111,4.0,1534178790,4.0
3,1.0,50,165,3.0,1514238058,3.0
4,1.0,50,296,4.0,1500573696,4.0


In [121]:
recom_movie_id = top_users_rating.groupby(by = 'movieId').mean()[[
    'weighted rating', 'similarity index']].sort_values(by = 'weighted rating', ascending = False)
recom_movie_id['movieId'] = recom_movie_id.index
recom_movie_id.index = range(len(recom_movie_id))

recom_movie_id.head(10)

Unnamed: 0,weighted rating,similarity index,movieId
0,5.0,1.0,4021
1,5.0,1.0,326
2,5.0,1.0,3822
3,5.0,1.0,1095
4,5.0,1.0,2068
5,5.0,1.0,4061
6,5.0,1.0,4116
7,5.0,1.0,1112
8,5.0,1.0,1280
9,5.0,1.0,4334


In [122]:
movies_df[movies_df['movieId'].isin(recom_movie_id['movieId'].head(10))]

Unnamed: 0,movieId,title,genres
284,326,To Live (Huozhe) (1994),Drama
834,1095,Glengarry Glen Ross (1992),Drama
845,1112,Palookaville (1996),Action|Comedy|Drama
979,1280,Raise the Red Lantern (Da hong deng long gao g...,Drama
1533,2068,Fanny and Alexander (Fanny och Alexander) (1982),Drama|Fantasy|Mystery
2856,3822,"Girl on the Bridge, The (Fille sur le pont, La...",Drama|Romance
3005,4021,Before Night Falls (2000),Drama
3033,4061,The Man in the Moon (1991),Drama|Romance
3067,4116,Hollywood Shuffle (1987),Comedy
3210,4334,Yi Yi (2000),Drama


# Reference <a id = "6"></a>

<a href = 'https://developers.google.com/machine-learning/recommendation'>Recommendation Systems by Google</a>