In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ast import literal_eval
from nltk.stem.snowball import SnowballStemmer
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer 
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity

import warnings;
warnings.simplefilter('ignore')

In [2]:
# basic recommender: we first try to recommend the top movies based on ratings
# or popularity or the Bayesian average of ratings and popularity 

# preprocess the movies metadata 
d_frame = pd.read_csv('./data/movies_metadata.csv')
keywords = pd.read_csv('./data/keywords.csv')
production = pd.read_csv('./data/credits.csv')
d_frame['id'] = pd.to_numeric(d_frame['id'], errors="coerce")
keywords['id'] = pd.to_numeric(keywords['id'], errors="coerce")
production['id'] = pd.to_numeric(production['id'], errors="coerce")

d_frame = d_frame.dropna(subset=['id'])
keywords = keywords.dropna(subset=['id'])
production = production.dropna(subset=['id'])

d_frame['id'] = d_frame['id'].astype('int')
keywords['id'] = keywords['id'].astype('int')
production['id'] = production['id'].astype('int')

d_frame = pd.merge(d_frame, keywords, on='id')
d_frame = pd.merge(d_frame, production, on='id')

In [3]:
d_frame_byratings = d_frame[d_frame['vote_count'] > 100].sort_values('vote_average', ascending=[False])
print("Top 10 of movies with more than 100 votes ranked by average user rating")
d_frame_byratings.head(10)

Top 10 of movies with more than 100 votes ranked by average user rating


Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,spoken_languages,status,tagline,title,video,vote_average,vote_count,keywords,cast,crew
10397,False,,13200000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,19404,tt0112870,hi,Dilwale Dulhania Le Jayenge,"Raj is a rich, carefree, happy-go-lucky second...",...,"[{'iso_639_1': 'hi', 'name': 'हिन्दी'}]",Released,Come... Fall In Love,Dilwale Dulhania Le Jayenge,False,9.1,661.0,"[{'id': 4344, 'name': 'musical'}]","[{'cast_id': 1, 'character': 'Raj Malhotra', '...","[{'credit_id': '57a3054a9251417c57000d7a', 'de..."
40253,False,,0,"[{'id': 99, 'name': 'Documentary'}]",,192040,tt0795176,en,Planet Earth,A documentary miniseries described by its make...,...,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,planet earth as you've never seen it before,Planet Earth,False,8.8,176.0,"[{'id': 11162, 'name': 'miniseries'}, {'id': 2...","[{'cast_id': 1, 'character': 'Narrator', 'cred...","[{'credit_id': '52fe4c9a9251416c910fa187', 'de..."
841,False,"{'id': 230, 'name': 'The Godfather Collection'...",6000000,"[{'id': 18, 'name': 'Drama'}, {'id': 80, 'name...",http://www.thegodfather.com/,238,tt0068646,en,The Godfather,"Spanning the years 1945 to 1955, a chronicle o...",...,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,An offer you can't refuse.,The Godfather,False,8.5,6024.0,"[{'id': 131, 'name': 'italy'}, {'id': 699, 'na...","[{'cast_id': 5, 'character': 'Don Vito Corleon...","[{'credit_id': '52fe422bc3a36847f80093db', 'de..."
41418,False,,0,"[{'id': 10749, 'name': 'Romance'}, {'id': 16, ...",https://www.funimationfilms.com/movie/yourname/,372058,tt5311514,ja,君の名は。,High schoolers Mitsuha and Taki are complete s...,...,"[{'iso_639_1': 'ja', 'name': '日本語'}]",Released,,Your Name.,False,8.5,1030.0,"[{'id': 6152, 'name': 'supernatural'}, {'id': ...","[{'cast_id': 28, 'character': 'Taki Tachibana ...","[{'credit_id': '58d4dec29251411fc502828e', 'de..."
314,False,,25000000,"[{'id': 18, 'name': 'Drama'}, {'id': 80, 'name...",,278,tt0111161,en,The Shawshank Redemption,Framed in the 1940s for the double murder of h...,...,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Fear can hold you prisoner. Hope can set you f...,The Shawshank Redemption,False,8.5,8358.0,"[{'id': 378, 'name': 'prison'}, {'id': 417, 'n...","[{'cast_id': 3, 'character': 'Andy Dufresne', ...","[{'credit_id': '52fe4231c3a36847f800b127', 'de..."
13371,False,,0,"[{'id': 80, 'name': 'Crime'}, {'id': 99, 'name...",,15584,tt1152758,en,Dear Zachary: A Letter to a Son About His Father,"In 2001, Andrew Bagby, a medical resident, is ...",...,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Dear Zachary: A Letter to a Son About His Father,False,8.4,146.0,"[{'id': 1157, 'name': 'wife husband relationsh...","[{'cast_id': 2, 'character': 'Himself (voice)'...","[{'credit_id': '52fe46699251416c7507769b', 'de..."
2231,False,,20000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,637,tt0118799,it,La vita è bella,A touching story of an Italian book seller of ...,...,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,"An unforgettable fable that proves love, famil...",Life Is Beautiful,False,8.3,3643.0,"[{'id': 131, 'name': 'italy'}, {'id': 483, 'na...","[{'cast_id': 7, 'character': 'Dora', 'credit_i...","[{'credit_id': '52fe4262c3a36847f801a10b', 'de..."
1205,False,,30000000,"[{'id': 18, 'name': 'Drama'}, {'id': 80, 'name...",,311,tt0087843,en,Once Upon a Time in America,A former Prohibition-era Jewish gangster retur...,...,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,"Crime, passion and lust for power - Sergio Leo...",Once Upon a Time in America,False,8.3,1104.0,"[{'id': 314, 'name': 'life and death'}, {'id':...","[{'cast_id': 3, 'character': ""David 'Noodles' ...","[{'credit_id': '52fe4236c3a36847f800c805', 'de..."
522,False,,22000000,"[{'id': 18, 'name': 'Drama'}, {'id': 36, 'name...",http://www.schindlerslist.com/,424,tt0108052,en,Schindler's List,The true story of how businessman Oskar Schind...,...,"[{'iso_639_1': 'de', 'name': 'Deutsch'}, {'iso...",Released,"Whoever saves one life, saves the world entire.",Schindler's List,False,8.3,4436.0,"[{'id': 1382, 'name': 'factory'}, {'id': 1631,...","[{'cast_id': 14, 'character': 'Oskar Schindler...","[{'credit_id': '52fe4241c3a36847f801024d', 'de..."
45842,False,,0,"[{'id': 18, 'name': 'Drama'}, {'id': 27, 'name...",,374430,tt3973198,en,Black Mirror: White Christmas,This feature-length special consists of three ...,...,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Black Mirror: White Christmas,False,8.3,211.0,"[{'id': 310, 'name': 'artificial intelligence'...","[{'cast_id': 2, 'character': 'Matt', 'credit_i...","[{'credit_id': '567fec389251414d980031da', 'de..."


In [4]:
# for our base movies dataframe, we filter for movies with an average rating of 7.0 or higher and 
# with the number of votes higher than the 70th percentile of all vote_count, OR popular movies
# with more than 90th percentile of vote_count and an average rating of 6.0 or higher 

v_counts = d_frame[d_frame['vote_count'].notnull()]['vote_count'].astype('int')
v_averages = d_frame[d_frame['vote_average'].notnull()]['vote_average'].astype('int')

v_70q = v_counts.quantile(0.70)
count_threshold = v_counts.quantile(0.90)

pop_rating_thres = 6.0
qua_rating_thres = 7.0 

# next we filter the movies based on our criteria 
filtered_movies = d_frame[(d_frame['vote_count'].notnull()) & (((d_frame['vote_count'] >= count_threshold) & (d_frame['vote_average'] >= pop_rating_thres)) | ((d_frame['vote_count'] >= v_70q) & (d_frame['vote_average'] >= qua_rating_thres)))]

# create base data frame based on selecting specific fields from filtered_movies 
base_df = filtered_movies[['id','title', 'genres', 'overview', 'tagline', 'vote_count', 'vote_average', 'keywords', 'crew', 'production_countries', 'production_companies']]

In [5]:
base_df.shape

(5484, 11)

In [6]:
# helper function to help us extract the director and writers 
# from the crew column
def director_and_writer(crew):
    crew_arr = []
    for rec in crew:
        if rec['job'] == 'Director' or rec['job'] == 'Screenplay':
            crew_arr.append(rec['name'].lower().replace(" ", ""))
    return crew_arr

In [7]:
# We process the genres, keywords, crew, production_countries, production_companies 
# columns to make the data format easier to parse and analyze 

# process genres column to make only names appear 
base_df['genres'] = base_df['genres'].fillna('[]').apply(literal_eval).apply(lambda x: [i['name'] for i in x] if isinstance(x, list) else [])

# process keywords column to make it only the tags
w_snow = SnowballStemmer('english')
base_df['keywords'] = base_df['keywords'].fillna('[]').apply(literal_eval).apply(lambda x: [j['name'] for j in x] if isinstance(x, list) else [])
base_df['keywords'] = base_df['keywords'].apply(lambda x: [w_snow.stem(w) for w in x])

# process crew to make it only director and writer 
base_df['crew'] = base_df['crew'].fillna('[]').apply(literal_eval).apply(director_and_writer)

# process production_countries to make it only the country names
base_df['production_countries'] = base_df['production_countries'].fillna('[]').apply(literal_eval).apply(lambda x: [i['name'].lower().replace(" ", "") for i in x] if isinstance(x, list) else [])

# process production_companies to make it only the company and studio names
base_df['production_companies'] = base_df['production_companies'].fillna('[]').apply(literal_eval).apply(lambda x: [i['name'].lower().replace(" ", "") for i in x] if isinstance(x, list) else [])

In [8]:
# df_by_va is base_df sorted by average audience ratings 
df_by_va = base_df.sort_values('vote_average', ascending=[False])

In [9]:
# df_by_nv is base_df sorted by popularity 
df_by_nv = base_df.sort_values('vote_count', ascending=[False])

In [10]:
# prepare coefficients for Bayesian average of ratings 
# and popularity calculation 

# compute the median score of base_df
median_rating = base_df['vote_average'].median()

# Use Bayesian average to compute a combined measure for average user rating + popularity 
overall_average_rating = base_df['vote_average'].mean()

average_counts = base_df['vote_count'].mean()

global_c = average_counts * overall_average_rating

In [11]:
def bayesian_average(mrow):
    cur_movie_rating = mrow['vote_average']
    total_votes = mrow['vote_count']
    b_avg = ((cur_movie_rating * total_votes) + global_c) / (average_counts + total_votes)
    return b_avg

In [12]:
base_df['bavg_rating'] = base_df.apply(bayesian_average, axis=1)

In [13]:
df_by_bavg = base_df.sort_values('bavg_rating', ascending=[False])

In [14]:
# a function to recommend the top movies from each genre 
# based on the user-chosen parameter 
def movies_by_genre(genre, query_type):
    match query_type:
        case "popularity":
            result = df_by_nv[(df_by_nv["genres"].notnull()) & (df_by_nv["genres"].apply(lambda x: genre in x))]
            return result
        case "ratings":
            result = df_by_va[(df_by_nv["genres"].notnull()) & (df_by_va["genres"].apply(lambda x: genre in x))]
            return result 
        case "bayesian":
            result = df_by_bavg[(df_by_nv["genres"].notnull()) & (df_by_bavg["genres"].apply(lambda x: genre in x))]
            return result 

In [15]:
# get top romance movies based on ratings 
movies_by_genre('Romance', 'ratings').head(10)

Unnamed: 0,id,title,genres,overview,tagline,vote_count,vote_average,keywords,crew,production_countries,production_companies
10397,19404,Dilwale Dulhania Le Jayenge,"[Comedy, Drama, Romance]","Raj is a rich, carefree, happy-go-lucky second...",Come... Fall In Love,661.0,9.1,[music],"[adityachopra, adityachopra]",[india],[yashrajfilms]
41418,372058,Your Name.,"[Romance, Animation, Drama]",High schoolers Mitsuha and Taki are complete s...,,1030.0,8.5,"[supernatur, romanc, school, star crossed lov,...",[makotoshinkai],[japan],[comixwavefilms]
11541,15804,A Brighter Summer Day,"[Crime, Drama, Romance]","A boy experiences first love, friendships and ...",,45.0,8.4,"[coming of ag, slow cinema]",[edwardyang],[taiwan],"[yang&hisgangfilmmakers, janebalfourfilms]"
8500,41391,Day of Wrath,"[Drama, Romance]","In a 17th-century Danish village, an old woman...",,41.0,8.3,"[denmark, witchcraft]","[carltheodordreyer, carltheodordreyer]",[denmark],[palladiumproductions]
46599,455661,In a Heartbeat,"[Family, Animation, Romance, Comedy]",A closeted boy runs the risk of being outed by...,The Heart Wants What The Heart Wants,146.0,8.3,"[love, teenag, lgbt, short]","[bethdavid, estebanbravo]",[unitedstatesofamerica],[ringlingcollegeofartanddesign]
33652,76115,The Phantom of the Opera at the Royal Albert Hall,"[Drama, Music, Romance]","A disfigured musical genius, hidden away in th...",,32.0,8.3,[music],"[nickmorris, laurenceconnor]",[unitedkingdom],"[universalpictures, thereallyusefultheatrecomp..."
7312,133919,Scenes from a Marriage,"[Drama, Romance]",Ten years of Marianne and Johan's relationship...,,44.0,8.3,"[midlife crisi, marriag, loneli, forty someth,...",[ingmarbergman],[sweden],[cinematographab]
351,13,Forrest Gump,"[Comedy, Drama, Romance]",A man with a low IQ has accomplished great thi...,"The world will never be the same, once you've ...",8147.0,8.2,"[vietnam veteran, hippi, mentally dis, run, ba...","[robertzemeckis, ericroth]",[unitedstatesofamerica],[paramountpictures]
3216,901,City Lights,"[Comedy, Drama, Romance]",City Lights is the first silent film that Char...,True Blind Love,444.0,8.2,"[suicide attempt, oper, blindness and impaired...","[charliechaplin, charliechaplin, harryclive, h...",[unitedstatesofamerica],[charleschaplinproductions]
6580,19542,The Red Shoes,"[Drama, Romance]","In this classic drama, Vicky Page is an aspiri...","Dance she did, and dance she must - between he...",124.0,8.2,"[new lov, ballet danc, music lov, ballet]","[michaelpowell, emericpressburger, emericpress...",[unitedkingdom],"[thearchers, independentproducers]"


In [16]:
# get top romance movies based on popularity
movies_by_genre('Romance', 'popularity').head(10)

Unnamed: 0,id,title,genres,overview,tagline,vote_count,vote_average,keywords,crew,production_countries,production_companies
351,13,Forrest Gump,"[Comedy, Drama, Romance]",A man with a low IQ has accomplished great thi...,"The world will never be the same, once you've ...",8147.0,8.2,"[vietnam veteran, hippi, mentally dis, run, ba...","[robertzemeckis, ericroth]",[unitedstatesofamerica],[paramountpictures]
1659,597,Titanic,"[Drama, Romance, Thriller]","84 years later, a 101-year-old woman named Ros...",Nothing on Earth could come between them.,7770.0,7.5,"[shipwreck, iceberg, ship, panic, titan, ocean...","[jamescameron, jamescameron]",[unitedstatesofamerica],"[paramountpictures, twentiethcenturyfoxfilmcor..."
43388,321612,Beauty and the Beast,"[Family, Fantasy, Romance]",A live-action adaptation of Disney's version o...,Be our guest.,5530.0,6.8,"[franc, magic, castl, fairy tal, music, curs, ...","[billcondon, stephenchbosky, evanspiliotopoulos]","[unitedkingdom, unitedstatesofamerica]","[waltdisneypictures, mandevillefilms]"
19920,82693,Silver Linings Playbook,"[Drama, Comedy, Romance]",After spending eight months in a mental instit...,Watch For The Signs,4840.0,7.0,"[danc, philadelphia, run, based on novel, depr...","[davido.russell, davido.russell]",[unitedstatesofamerica],[theweinsteincompany]
42048,313369,La La Land,"[Comedy, Drama, Music, Romance]","Mia, an aspiring actress, serves lattes to mov...",Here's to the fools who dream.,4745.0,7.9,"[jazz, danc, passion, music, cast, los angel, ...",[damienchazelle],[unitedstatesofamerica],"[summitentertainment, marcplattproductions, gi..."
23633,102651,Maleficent,"[Fantasy, Adventure, Action, Family, Romance]",The untold story of Disney's most iconic villa...,Don't believe the fairy tale.,4607.0,7.0,"[fairy tal, villain, sleeping beauti, dark fan...","[robertstromberg, lindawoolverton]",[unitedstatesofamerica],"[waltdisneypictures, waltdisneystudiosmotionpi..."
22365,152601,Her,"[Romance, Science Fiction, Drama]","In the not so distant future, Theodore, a lone...",A Spike Jonze Love Story,4215.0,7.9,"[artificial intellig, comput, love, loneli, tr...",[spikejonze],"[china, unitedstatesofamerica]","[warnerbros., annapurnapictures]"
42702,274870,Passengers,"[Adventure, Drama, Romance, Science Fiction]",A spacecraft traveling to a distant colony pla...,There is a reason they woke up.,4134.0,6.7,"[male nud, android, asteroid, isol, shower, nu...",[mortentyldum],[unitedstatesofamerica],"[columbiapictures, villageroadshowpictures, or..."
21111,64682,The Great Gatsby,"[Drama, Romance]",An adaptation of F. Scott Fitzgerald's Long Is...,Reserving judgments is a matter of infinite ho...,3885.0,7.3,"[based on novel, infidel, obsess, hope, 3d]","[bazluhrmann, bazluhrmann, craigpearce]","[unitedstatesofamerica, australia]","[villageroadshowpictures, bazmarkfilms, warner..."
23708,222935,The Fault in Our Stars,"[Romance, Drama]",Despite the tumor-shrinking medical miracle th...,One Sick Love Story,3868.0,7.6,"[amsterdam, based on novel, support group, can...","[scottneustadter, michaelh.weber, joshboone]",[unitedstatesofamerica],"[fox2000pictures, templehillentertainment, tsg..."


In [17]:
# get top romance movies based on Bayesian Average
movies_by_genre('Romance', 'bayesian').head(10)

Unnamed: 0,id,title,genres,overview,tagline,vote_count,vote_average,keywords,crew,production_countries,production_companies,bavg_rating
351,13,Forrest Gump,"[Comedy, Drama, Romance]",A man with a low IQ has accomplished great thi...,"The world will never be the same, once you've ...",8147.0,8.2,"[vietnam veteran, hippi, mentally dis, run, ba...","[robertzemeckis, ericroth]",[unitedstatesofamerica],[paramountpictures],8.113597
10397,19404,Dilwale Dulhania Le Jayenge,"[Comedy, Drama, Romance]","Raj is a rich, carefree, happy-go-lucky second...",Come... Fall In Love,661.0,9.1,[music],"[adityachopra, adityachopra]",[india],[yashrajfilms],8.074521
41418,372058,Your Name.,"[Romance, Animation, Drama]",High schoolers Mitsuha and Taki are complete s...,,1030.0,8.5,"[supernatur, romanc, school, star crossed lov,...",[makotoshinkai],[japan],[comixwavefilms],7.933377
42048,313369,La La Land,"[Comedy, Drama, Music, Romance]","Mia, an aspiring actress, serves lattes to mov...",Here's to the fools who dream.,4745.0,7.9,"[jazz, danc, passion, music, cast, los angel, ...",[damienchazelle],[unitedstatesofamerica],"[summitentertainment, marcplattproductions, gi...",7.796289
22365,152601,Her,"[Romance, Science Fiction, Drama]","In the not so distant future, Theodore, a lone...",A Spike Jonze Love Story,4215.0,7.9,"[artificial intellig, comput, love, loneli, tr...",[spikejonze],"[china, unitedstatesofamerica]","[warnerbros., annapurnapictures]",7.785031
7277,38,Eternal Sunshine of the Spotless Mind,"[Science Fiction, Drama, Romance]","Joel Barish, heartbroken that his girlfriend u...",You can erase someone from your mind. Getting ...,3758.0,7.9,"[deja vu, regret, jealousi, amnesia, dream, op...","[michelgondry, charliekaufman]",[unitedstatesofamerica],"[anonymouscontent, thisisthatproductions, focu...",7.773159
1153,11216,Cinema Paradiso,"[Drama, Romance]","A filmmaker recalls his childhood, when he fel...","A celebration of youth, friendship, and the ev...",834.0,8.2,"[sicili, cinema, film director, kiss, coming o...","[giuseppetornatore, giuseppetornatore]","[italy, france]",[raitreradiotelevisioneitaliana],7.692787
4884,194,Amélie,"[Comedy, Romance]","At a tiny Parisian café, the adorable yet pain...",One person can change your life forever.,3403.0,7.8,"[pari, love triangl, ghost train, sex-shop, sh...",[jean-pierrejeunet],"[france, germany]","[france3cinéma, claudieossardproductions, mmci...",7.678496
25183,266856,The Theory of Everything,"[Drama, Romance]",The Theory of Everything is the extraordinary ...,His Mind Changed Our World. Her Love Changed His.,3403.0,7.8,"[wife husband relationship, biographi, physici...","[jamesmarsh, anthonymccarten]",[unitedkingdom],[workingtitlefilms],7.678496
890,426,Vertigo,"[Mystery, Romance, Thriller]",A retired San Francisco detective suffering fr...,Alfred Hitchcock engulfs you in a whirlpool of...,1162.0,8.0,"[san francisco, sense of guilt, bachelor, obse...","[alfredhitchcock, aleccoppel, samuela.taylor]",[unitedstatesofamerica],"[paramountpictures, alfredj.hitchcockproductions]",7.656693


In [18]:
# Use TF-IDF Vectorizer to help us find similarities between movie descriptions 
def find_cosine_sim(df1, df1_column, df2, df2_column):
    # Use TF-IDF Vectorizer to find similarity scores between movie descriptions
    tfid_vec = TfidfVectorizer(analyzer='word', ngram_range=(1,3), stop_words='english')
    tfid_vec.fit(df1[df1_column])
    df_matrix = tfid_vec.transform(df1[df1_column])

    tfid_vec_2 = TfidfVectorizer(analyzer='word', ngram_range=(1,3), stop_words='english')
    tfid_vec_2.fit(df2[df2_column])
    df_matrix_2 = tfid_vec.transform(df2[df2_column])
    
    # calculate the cosine similarity score between the user defined movie and the movies in the db
    cos_score = linear_kernel(df_matrix, df_matrix_2)
    
    return cos_score

In [19]:
# next, we build a content-based recommender on the base movies 
# we use overview + keywords + tagline for the basis of our comparison 
base_df['overview'] = base_df['overview'].fillna('')
base_df['tagline'] = base_df['tagline'].fillna('')
base_df['kw_str'] = base_df['keywords'].apply(lambda x: ' '.join(x))
base_df['genres_str'] = base_df['genres'].apply(lambda x: ' '.join(x))
base_df['crew_str'] = base_df['crew'].apply(lambda x: ' '.join(x))
base_df['countries_str'] = base_df['production_countries'].apply(lambda x: ' '.join(x))
base_df['studios_str'] = base_df['production_companies'].apply(lambda x: ' '.join(x))
base_df['desc'] = base_df['overview'] + ' ' + base_df['tagline'] + ' ' + base_df['kw_str'] + ' ' + base_df['genres_str'] + ' ' + base_df['crew_str'] + ' ' + base_df['countries_str']
base_df['short_desc'] = base_df['overview'] + ' ' + base_df['tagline'] + ' ' + base_df['kw_str'] + ' ' + base_df['genres_str']
tfid_vec = TfidfVectorizer(analyzer='word', ngram_range=(1,3), stop_words='english')
tfid_vec.fit(base_df['desc'])
df_matrix = tfid_vec.transform(base_df['desc'])

# calculate the cosine similarity score between each movie 
cos_sim_score = linear_kernel(df_matrix, df_matrix)

In [20]:
base_df[base_df['title'] == 'La La Land']

Unnamed: 0,id,title,genres,overview,tagline,vote_count,vote_average,keywords,crew,production_countries,production_companies,bavg_rating,kw_str,genres_str,crew_str,countries_str,studios_str,desc,short_desc
42048,313369,La La Land,"[Comedy, Drama, Music, Romance]","Mia, an aspiring actress, serves lattes to mov...",Here's to the fools who dream.,4745.0,7.9,"[jazz, danc, passion, music, cast, los angel, ...",[damienchazelle],[unitedstatesofamerica],"[summitentertainment, marcplattproductions, gi...",7.796289,jazz danc passion music cast los angel pianist...,Comedy Drama Music Romance,damienchazelle,unitedstatesofamerica,summitentertainment marcplattproductions gilbe...,"Mia, an aspiring actress, serves lattes to mov...","Mia, an aspiring actress, serves lattes to mov..."


In [21]:
base_df = base_df.reset_index()
indices = pd.Series(base_df.index, index=base_df['title'])

In [22]:
# find similar movies by description
def sim_movies_by_desc(movie_id):
    idx = indices[movie_id]
    cos_arr = cos_sim_score[idx]
    scores = sorted(list(enumerate(cos_arr)), key=lambda x: x[1], reverse=True)
    top_match = [(x[0], x[1]*100) for x in scores[1:26]]
    movie_indices = [m[0] for m in top_match]
    match_dict = dict(top_match)
    matched_movies = base_df.iloc[movie_indices]
    matched_movies['desc_sim'] = matched_movies.index.map(match_dict) 
    return matched_movies

In [23]:
sim_movies_by_desc('Brokeback Mountain').head(25)

Unnamed: 0,index,id,title,genres,overview,tagline,vote_count,vote_average,keywords,crew,...,production_companies,bavg_rating,kw_str,genres_str,crew_str,countries_str,studios_str,desc,short_desc,desc_sim
1500,4977,10229,A Walk to Remember,"[Drama, Romance]","When the popular, restless Landon Carter is fo...",She didn't belong. She was misunderstood. And ...,1057.0,7.5,"[based on novel, theatre group, north carolina...","[adamshankman, karenjanszen, nicholassparks]",...,"[dinovipictures, pandorapictures, gaylordfilms...",7.329334,based on novel theatre group north carolina th...,Drama Romance,adamshankman karenjanszen nicholassparks,unitedstatesofamerica,dinovipictures pandorapictures gaylordfilms wa...,"When the popular, restless Landon Carter is fo...","When the popular, restless Landon Carter is fo...",5.818204
1301,4045,843,In the Mood for Love,"[Drama, Romance]",A melancholy story about the love between a wo...,"Feel the heat, keep the feeling burning, let t...",379.0,7.8,"[adulteri, lovesick, martial art, newspap, wif...","[wongkar-wai, wongkar-wai]",...,"[block2pictures, jettoneproduction]",7.327412,adulteri lovesick martial art newspap wife hus...,Drama Romance,wongkar-wai wongkar-wai,china hongkong,block2pictures jettoneproduction,A melancholy story about the love between a wo...,A melancholy story about the love between a wo...,5.687986
2196,8337,2288,Closer,"[Drama, Romance]","A witty, romantic, and very dangerous love sto...","If you believe in love at first sight, you nev...",845.0,6.7,"[father son relationship, love at first sight,...","[mikenichols, patrickmarber]",...,[columbiapictures],6.858485,father son relationship love at first sight ph...,Drama Romance,mikenichols patrickmarber,unitedstatesofamerica,columbiapictures,"A witty, romantic, and very dangerous love sto...","A witty, romantic, and very dangerous love sto...",4.945597
5364,43976,4369,Just a Question of Love,"[Drama, Romance]","After his gay cousin dies from hepatitis, youn...",,32.0,7.5,"[gay, love at first sight, lovesick, homophobi...",[christianfaure],...,[],7.079268,gay love at first sight lovesick homophobia ne...,Drama Romance,christianfaure,france,,"After his gay cousin dies from hepatitis, youn...","After his gay cousin dies from hepatitis, youn...",4.306328
2818,11596,1494,Curse of the Golden Flower,"[Action, Drama, Fantasy]",During China's Tang dynasty the emperor has ta...,Unspeakable secrets are hidden within the Forb...,206.0,6.6,"[poison, china, martial art, swordplay, fight,...","[zhangyimou, zhangyimou, chowyun-fat, bianzhih...",...,[beijingnewpicturefilmco.ltd.],6.950834,poison china martial art swordplay fight toxic...,Action Drama Fantasy,zhangyimou zhangyimou chowyun-fat bianzhihong ...,china hongkong,beijingnewpicturefilmco.ltd.,During China's Tang dynasty the emperor has ta...,During China's Tang dynasty the emperor has ta...,4.157479
888,2641,802,Lolita,"[Drama, Romance]",Humbert Humbert is a middle-aged British novel...,How did they ever make a movie of ...,409.0,7.3,"[sexual obsess, hotel, depress, loss of moth, ...","[stanleykubrick, stanleykubrick, jamesb.harris]",...,"[sevenartsproductions, alliedartists, transwor...",7.150622,sexual obsess hotel depress loss of moth small...,Drama Romance,stanleykubrick stanleykubrick jamesb.harris,unitedkingdom unitedstatesofamerica,sevenartsproductions alliedartists transworldp...,Humbert Humbert is a middle-aged British novel...,Humbert Humbert is a middle-aged British novel...,4.069271
301,1050,454,Romeo + Juliet,"[Drama, Romance]",In director Baz Luhrmann's contemporary take o...,My only love sprung from my only hate.,1406.0,6.7,"[shakespear, forbidden lov, gun viol, star cro...","[bazluhrmann, bazluhrmann, craigpearce]",...,"[bazmarkfilms, twentiethcenturyfoxfilmcorporat...",6.815607,shakespear forbidden lov gun viol star crossed...,Drama Romance,bazluhrmann bazluhrmann craigpearce,unitedstatesofamerica,bazmarkfilms twentiethcenturyfoxfilmcorporation,In director Baz Luhrmann's contemporary take o...,In director Baz Luhrmann's contemporary take o...,4.022069
2826,11635,3549,After the Wedding,[Drama],A manager of an orphanage in India is sent to ...,,102.0,7.2,"[copenhagen, daughter, marriage crisi, orphana...","[susannebier, andersthomasjensen]",...,[zentropaentertainments],7.077775,copenhagen daughter marriage crisi orphanag we...,Drama,susannebier andersthomasjensen,denmark sweden,zentropaentertainments,A manager of an orphanage in India is sent to ...,A manager of an orphanage in India is sent to ...,3.749402
5214,41418,372058,Your Name.,"[Romance, Animation, Drama]",High schoolers Mitsuha and Taki are complete s...,,1030.0,8.5,"[supernatur, romanc, school, star crossed lov,...",[makotoshinkai],...,[comixwavefilms],7.933377,supernatur romanc school star crossed lov anim...,Romance Animation Drama,makotoshinkai,japan,comixwavefilms,High schoolers Mitsuha and Taki are complete s...,High schoolers Mitsuha and Taki are complete s...,3.638107
70,228,10451,Eat Drink Man Woman,"[Comedy, Drama, Romance]",The film tells the story of a retired and wido...,,76.0,7.5,"[cook, sense of lif, daughter, date, famili]","[anglee, anglee, jamesschamus, wanghui-ling]",...,"[angleeproductions, goodmachine, centralmotion...",7.104165,cook sense of lif daughter date famili,Comedy Drama Romance,anglee anglee jamesschamus wanghui-ling,taiwan unitedstatesofamerica,angleeproductions goodmachine centralmotionpic...,The film tells the story of a retired and wido...,The film tells the story of a retired and wido...,3.501339


In [24]:
# overall_sim is the average of desc_sim and tags_sim, when 
# one of the similarity is lower than 25 percentile, this means 
# either the description or the tags is very different from the 
# source movie, then we make the overall similarity to be zero
def find_overall_sim(target_movie, desc_limit, tags_limit, desc_prop, tags_prop):
    if target_movie['desc_sim'] < desc_limit or target_movie['tags_sim'] < tags_limit:
        return 0
    else:
        return ((target_movie['desc_sim'] * desc_prop) + (target_movie['tags_sim'] * tags_prop))

In [25]:
# sort the list of movies by overall similarity scores 
def rank_movies_overall(movies_list, desc_prop, tags_prop):
    desc_25q = movies_list['desc_sim'].quantile(0.25)
    tags_25q = movies_list['tags_sim'].quantile(0.25)
    
    # overall_sim is the overall similarity of both desc_sim and tags_sim
    movies_list['overall_sim'] = movies_list.apply(find_overall_sim, axis=1, args=(desc_25q, tags_25q, desc_prop, tags_prop))
    
    # sort by overall_sim 
    sorted_movies = movies_list.sort_values(by=['overall_sim'], ascending=False)
    return sorted_movies

In [26]:
# come up with custom similarity scores on a scale of 1 - 10 between the source movie and target movies 
def calculate_score(target_movie, src_kw, src_gr, src_st, src_va):
    # source movie is the movie we wish to find similar movies for
    # we take the similarities in keyword (40%), genres(30%), studio (15%), and ratings (15%) into consideration
    maj_weight_factor = 4.0
    med_weight_factor = 3.0
    min_weight_factor = 1.5
    kw_sim = (len(np.intersect1d(src_kw, target_movie['keywords'])) / len(src_kw)) * maj_weight_factor
    gr_sim = (len(np.intersect1d(src_gr, target_movie['genres'])) / len(src_gr)) * med_weight_factor
    st_sim = (len(np.intersect1d(src_st, target_movie['production_companies'])) / len(src_st)) * min_weight_factor
    # formula for calculating rating similarity 1 - (abs(rating diff))/(source rating)
    va_sim = (1 - abs(src_va - target_movie['vote_average'])/src_va) * min_weight_factor
    tags_sim = gr_sim + kw_sim + st_sim + va_sim
    return tags_sim

In [27]:
def find_matches(movie_title, num_movies):
    target_movies = sim_movies_by_desc(movie_title).head(25)
    # find similaritie score for the movies returned 
    src = base_df[base_df['title'] == movie_title]
    
    # tags_sim is the similarity between genres, keywords, ratings, and popularity tags 
    target_movies['tags_sim'] = target_movies.apply(calculate_score, axis=1, args=(src['keywords'].to_list()[0], src['genres'].to_list()[0], src['production_companies'].to_list()[0], src['vote_average']))
    
    # sort the movies list by overall similarity score of tags and description
    sorted_movies = rank_movies_overall(target_movies, 0.20, 0.80)
    return sorted_movies.head(num_movies)

In [28]:
find_matches('Brokeback Mountain', 10)

Unnamed: 0,index,id,title,genres,overview,tagline,vote_count,vote_average,keywords,crew,...,kw_str,genres_str,crew_str,countries_str,studios_str,desc,short_desc,desc_sim,tags_sim,overall_sim
5364,43976,4369,Just a Question of Love,"[Drama, Romance]","After his gay cousin dies from hepatitis, youn...",,32.0,7.5,"[gay, love at first sight, lovesick, homophobi...",[christianfaure],...,gay love at first sight lovesick homophobia ne...,Drama Romance,christianfaure,france,,"After his gay cousin dies from hepatitis, youn...","After his gay cousin dies from hepatitis, youn...",4.306328,5.357143,5.14698
1301,4045,843,In the Mood for Love,"[Drama, Romance]",A melancholy story about the love between a wo...,"Feel the heat, keep the feeling burning, let t...",379.0,7.8,"[adulteri, lovesick, martial art, newspap, wif...","[wongkar-wai, wongkar-wai]",...,adulteri lovesick martial art newspap wife hus...,Drama Romance,wongkar-wai wongkar-wai,china hongkong,block2pictures jettoneproduction,A melancholy story about the love between a wo...,A melancholy story about the love between a wo...,5.687986,5.011429,5.14674
1500,4977,10229,A Walk to Remember,"[Drama, Romance]","When the popular, restless Landon Carter is fo...",She didn't belong. She was misunderstood. And ...,1057.0,7.5,"[based on novel, theatre group, north carolina...","[adamshankman, karenjanszen, nicholassparks]",...,based on novel theatre group north carolina th...,Drama Romance,adamshankman karenjanszen nicholassparks,unitedstatesofamerica,dinovipictures pandorapictures gaylordfilms wa...,"When the popular, restless Landon Carter is fo...","When the popular, restless Landon Carter is fo...",5.818204,4.785714,4.992212
2196,8337,2288,Closer,"[Drama, Romance]","A witty, romantic, and very dangerous love sto...","If you believe in love at first sight, you nev...",845.0,6.7,"[father son relationship, love at first sight,...","[mikenichols, patrickmarber]",...,father son relationship love at first sight ph...,Drama Romance,mikenichols patrickmarber,unitedstatesofamerica,columbiapictures,"A witty, romantic, and very dangerous love sto...","A witty, romantic, and very dangerous love sto...",4.945597,4.911429,4.918262
70,228,10451,Eat Drink Man Woman,"[Comedy, Drama, Romance]",The film tells the story of a retired and wido...,,76.0,7.5,"[cook, sense of lif, daughter, date, famili]","[anglee, anglee, jamesschamus, wanghui-ling]",...,cook sense of lif daughter date famili,Comedy Drama Romance,anglee anglee jamesschamus wanghui-ling,taiwan unitedstatesofamerica,angleeproductions goodmachine centralmotionpic...,The film tells the story of a retired and wido...,The film tells the story of a retired and wido...,3.501339,5.160714,4.828839
37,103,688,The Bridges of Madison County,"[Drama, Romance]",Photographer Robert Kincaid wanders into the l...,The path of Francesca Johnson's future seems d...,397.0,7.3,"[farewel, adulteri, love at first sight, photo...","[clinteastwood, richardlagravenese]",...,farewel adulteri love at first sight photograp...,Drama Romance,clinteastwood richardlagravenese,unitedstatesofamerica,amblinentertainment malpasoproductions warnerb...,Photographer Robert Kincaid wanders into the l...,Photographer Robert Kincaid wanders into the l...,3.077456,5.031429,4.640634
567,1665,547,The Horse Whisperer,"[Drama, Romance]",Based on the novel by the same name from Nicho...,,296.0,6.7,"[love triangl, new york, montana, attachment t...","[robertredford, ericroth, richardlagravenese]",...,love triangl new york montana attachment to na...,Drama Romance,robertredford ericroth richardlagravenese,unitedstatesofamerica,wildwoodenterprises touchstonepictures,Based on the novel by the same name from Nicho...,Based on the novel by the same name from Nicho...,3.461006,4.911429,4.621344
888,2641,802,Lolita,"[Drama, Romance]",Humbert Humbert is a middle-aged British novel...,How did they ever make a movie of ...,409.0,7.3,"[sexual obsess, hotel, depress, loss of moth, ...","[stanleykubrick, stanleykubrick, jamesb.harris]",...,sexual obsess hotel depress loss of moth small...,Drama Romance,stanleykubrick stanleykubrick jamesb.harris,unitedkingdom unitedstatesofamerica,sevenartsproductions alliedartists transworldp...,Humbert Humbert is a middle-aged British novel...,Humbert Humbert is a middle-aged British novel...,4.069271,4.745714,4.610426
315,1077,88,Dirty Dancing,"[Drama, Music, Romance]",Expecting the usual tedium that accompanies a ...,Have the time of your life.,1371.0,7.1,"[danc, sex, hotel, robberi, sister sister rela...",[emileardolino],...,danc sex hotel robberi sister sister relations...,Drama Music Romance,emileardolino,unitedstatesofamerica,greatamericanfilmslimitedpartnership vestronpi...,Expecting the usual tedium that accompanies a ...,Expecting the usual tedium that accompanies a ...,2.876085,4.991429,4.56836
301,1050,454,Romeo + Juliet,"[Drama, Romance]",In director Baz Luhrmann's contemporary take o...,My only love sprung from my only hate.,1406.0,6.7,"[shakespear, forbidden lov, gun viol, star cro...","[bazluhrmann, bazluhrmann, craigpearce]",...,shakespear forbidden lov gun viol star crossed...,Drama Romance,bazluhrmann bazluhrmann craigpearce,unitedstatesofamerica,bazmarkfilms twentiethcenturyfoxfilmcorporation,In director Baz Luhrmann's contemporary take o...,In director Baz Luhrmann's contemporary take o...,4.022069,4.625714,4.504985


In [29]:
MAX_VOTE_COUNT = base_df['vote_count'].max()

In [30]:
# come up with custom similarity scores on a scale of 1 - 10 between the source movie and target movies 
def find_sim_score(target_movie, src_kw, src_gr):
    # source movie is the movie we wish to find similar movies for
    # we take the similarities in keywords (40%), genres (30%), bayesian average(30%) into consideration
    # and take the average of this score with the desc_sim score 
    maj_weight_factor = 4.0
    med_weight_factor = 3.0
    min_weight_factor = 1.5
    perfect_score = 10
    kw_sim = (len(np.intersect1d(src_kw, target_movie['keywords'])) / len(src_kw)) * maj_weight_factor
    gr_sim = (len(np.intersect1d(src_gr, target_movie['genres'])) / len(src_gr)) * med_weight_factor
    # give higher ranks to movies with a higher bayesian average 
    va_sim = (target_movie['bavg_rating']/10) * med_weight_factor
    tags_sim = gr_sim + kw_sim + va_sim
    return tags_sim

In [31]:
# a movie recommender that recommends 10 films based on user input
def user_defined_recommender(input_genres, input_keywords, input_overview, num_movies):
    # parse and process input data
    genres = [input_genres]
    keywords = [input_keywords]
    overview = [input_overview]
    desc_obj = {'genres': genres, 'keywords': keywords, 'overview': overview}
    desc_df = pd.DataFrame(data=desc_obj)
    w_snow = SnowballStemmer('english')
    desc_df['keywords'] = desc_df['keywords'].apply(lambda x: [w_snow.stem(w) for w in x])
    desc_df['genres_str'] = desc_df['genres'].apply(lambda x: ' '.join(x))
    desc_df['keywords_str'] = desc_df['keywords'].apply(lambda x: ' '.join(x))
    desc_df['desc'] = desc_df['overview'] + ' ' + desc_df['genres_str'] + ' ' + desc_df['keywords_str']
    
    # Use TF-IDF Vectorizer to find similarity scores between movie descriptions
    cos_score = find_cosine_sim(base_df, 'short_desc', desc_df, 'desc')
    
    scores = sorted(list(enumerate(cos_score)), key=lambda x: x[1], reverse=True)
    top_match = [(x[0], (x[1][0] * 100)) for x in scores[1:26]]
    match_dict = dict(top_match)
    movie_indices = [m[0] for m in top_match]
    matched_movies = base_df.iloc[movie_indices]
    matched_movies['desc_sim'] = matched_movies.index.map(match_dict) 
    
    # tags_sim is the similarity between genres, keywords, ratings, and popularity tags 
    matched_movies['tags_sim'] = matched_movies.apply(find_sim_score, axis=1, args=(desc_df['keywords'].to_list()[0], desc_df['genres'].to_list()[0]))
    
    # sort the movies list by overall similarity score of tags and description
    sorted_movies = rank_movies_overall(matched_movies, 0.50, 0.50)
    
    return sorted_movies.head(num_movies)
    
    

In [32]:
# custom genres, keywords, and description inputs for a historical romance movie
# in the English countryside to test the user defined recommender
genres = ["Drama", "Romance", "Family", "History"]
keywords = ["Love", "Family", "Aristocracy", "England", "Lovers", "Fate", "Sadness", "Duty", "Period", "Historical"]
overview = "Set in the English countryside, this is a love story in the midst of a declining aristocratic family in the 18th century. Faye, deeply in love with her childhood sweetheart Dustin, must make the choice between duty to her family and following her heart."
user_defined_recommender(genres, keywords, overview, 10)

Unnamed: 0,index,id,title,genres,overview,tagline,vote_count,vote_average,keywords,crew,...,kw_str,genres_str,crew_str,countries_str,studios_str,desc,short_desc,desc_sim,tags_sim,overall_sim
3109,13102,12783,The Duchess,"[Drama, History, Romance]",A chronicle of the life of 18th century aristo...,Based on the incredible true story.,406.0,6.7,"[england, adulteri, duke, gambling debt, marri...","[andersthomasjensen, jeffreyhatcher, sauldibb,...",...,england adulteri duke gambling debt marriage c...,Drama History Romance,andersthomasjensen jeffreyhatcher sauldibb sau...,unitedkingdom,bimdistribuzione bbcfilms paramountvantage qwe...,A chronicle of the life of 18th century aristo...,A chronicle of the life of 18th century aristo...,5.438908,4.726988,5.082948
1755,6465,17920,I Capture the Castle,"[Drama, Romance]",A love story set in 1930s England that follows...,You can't choose who you fall in love with,26.0,7.1,[],"[timfywell, heidithomas]",...,,Drama Romance,timfywell heidithomas,unitedkingdom,isleofmanfilmcommission britishbroadcastingcor...,A love story set in 1930s England that follows...,A love story set in 1930s England that follows...,6.067657,3.61819,4.842923
656,1931,859,Dangerous Liaisons,"[Drama, Romance]",Dangerous Liaisons is the film based on the no...,Lust. Seduction. Revenge. The Game As You've N...,267.0,7.1,"[lovesick, sexual, cheat, cynic, ladykil, fian...","[stephenfrears, christopherhampton, choderlosd...",...,lovesick sexual cheat cynic ladykil fiancé arr...,Drama Romance,stephenfrears christopherhampton choderlosdela...,unitedkingdom unitedstatesofamerica,lorimarfilmentertainment warnerbros. nfhproduc...,Dangerous Liaisons is the film based on the no...,Dangerous Liaisons is the film based on the no...,5.52711,4.021235,4.774173
499,1378,9361,The Last of the Mohicans,"[Action, Adventure, Drama, History, Romance, War]",As the English and French soldiers battle for ...,The first American hero.,747.0,7.1,"[secret lov, mohawk, native american, 18th cen...","[michaelmann, michaelmann, christophercrowe]",...,secret lov mohawk native american 18th centuri...,Action Adventure Drama History Romance War,michaelmann michaelmann christophercrowe,unitedstatesofamerica,morgancreekproductions,As the English and French soldiers battle for ...,As the English and French soldiers battle for ...,5.104316,4.374209,4.739263
591,1792,18937,Quest for Camelot,"[Fantasy, Animation, Drama, Romance, Family]","During the times of King Arthur, Kayley is a b...",An evil knight gives nobility a bad name.,193.0,6.9,[],"[frederikduchau, kirkdemicco, williamschifrin,...",...,,Fantasy Animation Drama Romance Family,frederikduchau kirkdemicco williamschifrin jac...,unitedstatesofamerica,warnerbros.,"During the times of King Arthur, Kayley is a b...","During the times of King Arthur, Kayley is a b...",5.115013,4.357025,4.736019
3378,15010,13949,Persuasion,"[Drama, Romance]",Persuasion is the newest adaptation of the cla...,,52.0,7.1,"[love triangl, poetri, captain, navi, love]","[adrianshergold, simonburke]",...,love triangl poetri captain navi love,Drama Romance,adrianshergold simonburke,unitedkingdom unitedstatesofamerica,wgbh clerkenwellfilms,Persuasion is the newest adaptation of the cla...,Persuasion is the newest adaptation of the cla...,5.426796,4.018617,4.722706
1183,3573,6003,Romeo and Juliet,"[Drama, Romance]",Director Franco Zeffirelli's beloved version o...,No ordinary love story...,145.0,7.3,"[male nud, new lov, lover (female), love of on...","[francozeffirelli, francozeffirelli, francobru...",...,male nud new lov lover (female) love of one's ...,Drama Romance,francozeffirelli francozeffirelli francobrusat...,italy unitedkingdom,paramountpictures dinodelaurentiiscinematograf...,Director Franco Zeffirelli's beloved version o...,Director Franco Zeffirelli's beloved version o...,5.262556,4.030626,4.646591
2412,9483,363,Head-On,"[Drama, Romance]",Head On is German director Fatih Akin’s story ...,,111.0,7.3,"[alcohol, drug abus, religious educ, homicid, ...",[fatihakin],...,alcohol drug abus religious educ homicid ficti...,Drama Romance,fatihakin,germany turkey,corazóninternational arte panfilm wüstefilmpro...,Head On is German director Fatih Akin’s story ...,Head On is German director Fatih Akin’s story ...,4.824319,4.428034,4.626176
3388,15045,22971,Dear John,"[Drama, Romance, War]",Sergeant John Tyree is home on a 2 week leave ...,Love brought them together. Will fate tear the...,1302.0,6.6,"[based on novel, armi, love, u.s. soldi]","[lassehallström, jamielinden]",...,based on novel armi love u.s. soldi,Drama Romance War,lassehallström jamielinden,unitedstatesofamerica,screengems,Sergeant John Tyree is home on a 2 week leave ...,Sergeant John Tyree is home on a 2 week leave ...,5.317239,3.926681,4.62196
2300,8799,21035,Turkish Delight,"[Drama, Romance]","Sort of a cross between ""Love Story"" and an ea...",Turkish Delight is a cross between Love Story ...,34.0,7.0,"[sex, nuditi, love, sculptor]","[paulverhoeven, gerardsoeteman]",...,sex nuditi love sculptor,Drama Romance,paulverhoeven gerardsoeteman,netherlands,verenigdenederlandschefilmcompagnie(vnf),"Sort of a cross between ""Love Story"" and an ea...","Sort of a cross between ""Love Story"" and an ea...",5.1585,4.01687,4.587685
