In [2]:
import math

from IPython.display import display
import matplotlib.pyplot as plt
import pandas as pd

data_folder = "./ml-20m"

ratings_df = pd.read_csv(data_folder + "/ratings.csv")
movies_df = pd.read_csv(data_folder + "/movies.csv")

ratings_df = ratings_df.rename(columns={'userId': 'user', 'movieId': 'item'})
movies_df = movies_df.rename(columns={'movieId': 'item', 'title': 'title_original'})

In [3]:
def compute_title_year(row):
    # read the title of the specified row, and cast it to the str type,
    # then use the strip() method to eliminate the spaces from the beginning and the end of the string
    title_original = str(row['title_original']).strip()

    # select the substring from the fifth-to-last (included) character to the last (excluded)
    year = title_original[-5:-1]

    # use the isdigit method to determine if the substring selected is a number
    if year.isdigit():
        # remove the year from the title
        # select the substring from the beninning to the sixth-to-last (excluded)
        # then use the strip() method to remove spaces at the beginning and at the end of the string
        # finally, use the lower() method to obtain the title in lower case
        title = title_original[:-6].strip().lower()

        # return the year and the title
        return int(year), title
    else:
        # nothing to do, return 0 as year and the title
        return 0, title_original.lower()


movies_df[['year', 'title']] = movies_df.apply(compute_title_year, axis=1, result_type='expand')
movies_df = movies_df[['item', 'title', 'year', 'genres']]
display(movies_df)

Unnamed: 0,item,title,year,genres
0,1,toy story,1995,Adventure|Animation|Children|Comedy|Fantasy
1,2,jumanji,1995,Adventure|Children|Fantasy
2,3,grumpier old men,1995,Comedy|Romance
3,4,waiting to exhale,1995,Comedy|Drama|Romance
4,5,father of the bride part ii,1995,Comedy
...,...,...,...,...
27273,131254,kein bund für's leben,2007,Comedy
27274,131256,"feuer, eis & dosenbier",2002,Comedy
27275,131258,the pirates,2014,Adventure
27276,131260,rentun ruusu,2001,(no genres listed)


In [21]:
movies_df = movies_df.drop_duplicates(subset='title', keep='first')

# Join the ratings to the genres
ratings_genres_df = pd.merge(movies_df, ratings_df,  how='inner', left_on=['item'], right_on = ['item'])
ratings_genres_df = ratings_genres_df[['item', 'user', 'rating', 'title', 'genres']]

# keep only the user and genres
user_genres = ratings_genres_df.loc[:, ['user', 'genres']]

# Create a user only dataframe
user_df = user_genres.drop_duplicates(subset=['user'], keep='first')
user_df = user_df['user']
user_df = pd.DataFrame(user_df)
user_df = user_df.set_index('user')
user_df = user_df.sort_index(ascending=True)

user_genres = user_genres.set_index('user')
user_genres = user_genres.sort_values(by="user", ascending=True)

display(user_genres)
display(user_df)
display(ratings_df)

Unnamed: 0_level_0,genres
user,Unnamed: 1_level_1
1,Action|Crime|Thriller
1,Horror|Mystery|Thriller
1,Action|Drama|War
1,Drama|Horror|Mystery
1,Animation|Children|Fantasy
...,...
138493,Animation|Children|Musical
138493,Adventure|Comedy|Drama
138493,Action|Comedy|Sci-Fi
138493,Drama


1
2
3
4
5
...
138489
138490
138491
138492
138493


Unnamed: 0,user,item,rating,timestamp
0,1,2,3.5,1112486027
1,1,29,3.5,1112484676
2,1,32,3.5,1112484819
3,1,47,3.5,1112484727
4,1,50,3.5,1112484580
...,...,...,...,...
20000258,138493,68954,4.5,1258126920
20000259,138493,69526,4.5,1259865108
20000260,138493,69644,3.0,1260209457
20000261,138493,70286,5.0,1258126944


In [5]:
import numpy as np

def count_rated_genres(user_id, df):

    user_genre = df.loc[user_id,:]
    rated_genres = set()

    for genre in user_genre.genres:
        rated_genres.update(genre.split('|'))

    return len(rated_genres)

user_genres_rated = []
for user in user_df.index:
     user_genres_rated.append(count_rated_genres(user, user_genres))


In [6]:
user_genres_rated_df = pd.DataFrame({
    'user' : user_df.index.tolist(),
    'genres_rated' : user_genres_rated
})

display(user_genres_rated_df)

mean_genres_rated = np.mean(user_genres_rated)
print(mean_genres_rated)
user_genres_rated_df = user_genres_rated_df[user_genres_rated_df['genres_rated'] > mean_genres_rated]

display(user_genres_rated_df)

Unnamed: 0,user,genres_rated
0,1,17
1,2,18
2,3,18
3,4,15
4,5,17
...,...,...
138488,138489,17
138489,138490,18
138490,138491,14
138491,138492,17


16.360133725170225


Unnamed: 0,user,genres_rated
0,1,17
1,2,18
2,3,18
4,5,17
6,7,18
...,...,...
138486,138487,17
138488,138489,17
138489,138490,18
138491,138492,17


In [7]:
count_ratings = ratings_df.groupby(['user']).count()
selected = count_ratings['rating'] > count_ratings['rating'].mean()

expert_users = count_ratings.loc[selected]
expert_users = pd.DataFrame(expert_users)
expert_users = expert_users.reset_index()
expert_users = expert_users.merge(user_genres_rated_df, how='inner', left_on='user', right_on='user')
expert_users = expert_users.drop(['timestamp', 'item'], axis=1)
expert_users.rename(columns={'rating' : 'num_ratings'}, inplace=True)

display(expert_users)

Unnamed: 0,user,num_ratings,genres_rated
0,1,175,17
1,3,187,18
2,7,276,18
3,11,504,19
4,14,243,17
...,...,...,...
35870,138483,276,19
35871,138484,148,18
35872,138486,193,18
35873,138490,151,18


In [8]:

genre_set = set()
for genres in ratings_genres_df.genres:
    genre_set.update(genres.split('|'))


In [9]:
genre_df = ratings_genres_df[['item', 'genres']]
genre_df = pd.DataFrame(genre_df)

i = 2
for genre in genre_set:
    genre_df.insert(i, genre, False)
    i += 1

genre_df = genre_df.drop_duplicates(subset='item', keep='first')
genre_df = genre_df.set_index('item')

display(genre_df)

Unnamed: 0_level_0,genres,Documentary,Film-Noir,IMAX,War,Adventure,Sci-Fi,Action,Comedy,Musical,...,Horror,Fantasy,Crime,Children,(no genres listed),Romance,Drama,Thriller,Western,Animation
item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,Adventure|Animation|Children|Comedy|Fantasy,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,Adventure|Children|Fantasy,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,Comedy|Romance,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,Comedy|Drama|Romance,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
5,Comedy,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
131252,Comedy|Horror,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
131254,Comedy,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
131256,Comedy,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
131258,Adventure,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [10]:

for item in genre_df.index:
    genres = genre_df.loc[item, 'genres']
    genre_list = genres.split('|')
    for genre in genre_list:
        genre_df.at[item, genre] = True

genre_df = genre_df.drop(['genres'], axis=1)
genre_df = genre_df.reset_index()

display(genre_df)

Unnamed: 0,item,Documentary,Film-Noir,IMAX,War,Adventure,Sci-Fi,Action,Comedy,Musical,...,Horror,Fantasy,Crime,Children,(no genres listed),Romance,Drama,Thriller,Western,Animation
0,1,False,False,False,False,True,False,False,True,False,...,False,True,False,True,False,False,False,False,False,True
1,2,False,False,False,False,True,False,False,False,False,...,False,True,False,True,False,False,False,False,False,False
2,3,False,False,False,False,False,False,False,True,False,...,False,False,False,False,False,True,False,False,False,False
3,4,False,False,False,False,False,False,False,True,False,...,False,False,False,False,False,True,True,False,False,False
4,5,False,False,False,False,False,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25700,131252,False,False,False,False,False,False,False,True,False,...,True,False,False,False,False,False,False,False,False,False
25701,131254,False,False,False,False,False,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
25702,131256,False,False,False,False,False,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
25703,131258,False,False,False,False,True,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [11]:
tags_df = pd.DataFrame(pd.read_csv(data_folder + "/tags.csv"))
tags_df = tags_df.rename(columns={'movieId': 'item'})
tags_df = tags_df.drop(['timestamp', 'userId'], axis=1)
tags_df = tags_df.sort_values(by="item", ascending=True)
tags_df = tags_df.set_index('item')
# tags_df.index = np.arrange(len(tags_df.index))
display(tags_df)

Unnamed: 0_level_0,tag
item,Unnamed: 1_level_1
1,friendship
1,animation
1,animated
1,adventure
1,animation
...,...
131258,Korea
131258,bandits
131258,whale
131258,pirates


In [12]:
tags_set = set()
for tag in tags_df.tag:
    tag = repr(tag).strip().lower().replace('\'', '')
    tag = tag.replace('"', '')
    tags_set.add(tag)

print(tags_set)



In [147]:
random_user = expert_users.sample()
random_user = random_user['user']
seen_movies = pd.merge(random_user, ratings_genres_df, how='left', left_on='user', right_on='user')

favourite_movies = seen_movies.loc[seen_movies['rating'] == np.max(seen_movies['rating'])]

display(favourite_movies)
display(seen_movies)

Unnamed: 0,user,item,rating,title,genres
2,69462,32,5.0,twelve monkeys (a.k.a. 12 monkeys),Mystery|Sci-Fi|Thriller
3,69462,34,5.0,babe,Children|Drama
4,69462,58,5.0,"postman, the (postino, il)",Comedy|Drama|Romance
5,69462,110,5.0,braveheart,Action|Drama|War
12,69462,332,5.0,village of the damned,Horror|Sci-Fi
...,...,...,...,...,...
192,69462,3361,5.0,bull durham,Comedy|Drama|Romance
201,69462,3496,5.0,madame sousatzka,Drama
210,69462,3735,5.0,serpico,Crime|Drama
212,69462,3811,5.0,breaker morant,Drama|War


Unnamed: 0,user,item,rating,title,genres
0,69462,1,4.0,toy story,Adventure|Animation|Children|Comedy|Fantasy
1,69462,17,4.0,sense and sensibility,Drama|Romance
2,69462,32,5.0,twelve monkeys (a.k.a. 12 monkeys),Mystery|Sci-Fi|Thriller
3,69462,34,5.0,babe,Children|Drama
4,69462,58,5.0,"postman, the (postino, il)",Comedy|Drama|Romance
...,...,...,...,...,...
213,69462,3816,5.0,"official story, the (la historia oficial)",Drama
214,69462,3849,4.0,"spiral staircase, the",Thriller
215,69462,3871,2.0,shane,Drama|Western
216,69462,3911,4.0,best in show,Comedy


In [148]:
genre_set_list = list(genre_set)
genre_count = np.zeros(len(genre_set_list))

for genre in favourite_movies.genres:
    genre_list = genre.split('|')
    for i in range(len(genre_list)):
        for j in range(len(genre_set_list)):
            if genre_list[i] == genre_set_list[j]:
                genre_count[j] += 1


def find_favourite_genres(num_genres=3):
    favourite_genres = []
    while len(favourite_genres) < num_genres:
        if np.max(genre_count) == 0:
            break
        fav_genre = genre_set_list[np.argmax(genre_count)]
        genre_count[np.argmax(genre_count)] = 0
        if fav_genre == "(no genres listed)":
            continue
        favourite_genres.append(fav_genre)
        

    return favourite_genres

Method to find power set of favourite movies

In [149]:
def get_powerset(fav_genres):
  powerset = list(fav_genres)
  n = len(powerset)
  return [[powerset[k] for k in range(n) if i&1<<k] for i in range(2**n)]

find_genres = get_powerset(find_favourite_genres())
display(find_genres)

[[],
 ['Drama'],
 ['Comedy'],
 ['Drama', 'Comedy'],
 ['Romance'],
 ['Drama', 'Romance'],
 ['Comedy', 'Romance'],
 ['Drama', 'Comedy', 'Romance']]

In [150]:
def find_movies_with_genres(find_genres, df):

    items = []

    for genres in find_genres:
        if not genres:
                continue
        for genre in genres:
            temp = df.loc[(df[genre] == True)]
            items.extend(temp.item)

    out = df[df['item'].isin(items)]
    out.index = np.arange(len(out.index))

    return out

movies_with_genres = find_movies_with_genres(find_genres, genre_df)
display(movies_with_genres)

Unnamed: 0,item,Documentary,Film-Noir,IMAX,War,Adventure,Sci-Fi,Action,Comedy,Musical,...,Horror,Fantasy,Crime,Children,(no genres listed),Romance,Drama,Thriller,Western,Animation
0,1,False,False,False,False,True,False,False,True,False,...,False,True,False,True,False,False,False,False,False,True
1,3,False,False,False,False,False,False,False,True,False,...,False,False,False,False,False,True,False,False,False,False
2,4,False,False,False,False,False,False,False,True,False,...,False,False,False,False,False,True,True,False,False,False
3,5,False,False,False,False,False,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
4,7,False,False,False,False,False,False,False,True,False,...,False,False,False,False,False,True,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18449,131248,False,False,False,False,True,False,False,True,False,...,False,True,False,True,False,False,False,False,False,True
18450,131250,False,False,False,False,False,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
18451,131252,False,False,False,False,False,False,False,True,False,...,True,False,False,False,False,False,False,False,False,False
18452,131254,False,False,False,False,False,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False


In [151]:
unseen_movies = list(pd.concat([movies_df.item, seen_movies.item]).drop_duplicates(keep=False))
unseen_movies_ratings = ratings_df[ratings_df['item'].isin(unseen_movies)]

display(unseen_movies_ratings)

Unnamed: 0,user,item,rating,timestamp
0,1,2,3.5,1112486027
1,1,29,3.5,1112484676
3,1,47,3.5,1112484727
4,1,50,3.5,1112484580
5,1,112,3.5,1094785740
...,...,...,...,...
20000258,138493,68954,4.5,1258126920
20000259,138493,69526,4.5,1259865108
20000260,138493,69644,3.0,1260209457
20000261,138493,70286,5.0,1258126944


In [159]:
unseen_movies_with_genres = genre_df[genre_df['item'].isin(unseen_movies)]

display(unseen_movies_with_genres)

Unnamed: 0,item,Documentary,Film-Noir,IMAX,War,Adventure,Sci-Fi,Action,Comedy,Musical,...,Horror,Fantasy,Crime,Children,(no genres listed),Romance,Drama,Thriller,Western,Animation
1,2,False,False,False,False,True,False,False,False,False,...,False,True,False,True,False,False,False,False,False,False
2,3,False,False,False,False,False,False,False,True,False,...,False,False,False,False,False,True,False,False,False,False
3,4,False,False,False,False,False,False,False,True,False,...,False,False,False,False,False,True,True,False,False,False
4,5,False,False,False,False,False,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
5,6,False,False,False,False,False,False,True,False,False,...,False,False,True,False,False,False,False,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25700,131252,False,False,False,False,False,False,False,True,False,...,True,False,False,False,False,False,False,False,False,False
25701,131254,False,False,False,False,False,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
25702,131256,False,False,False,False,False,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
25703,131258,False,False,False,False,True,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [160]:
unseen_movies_ratings = unseen_movies_ratings.sort_values('item')
display(unseen_movies_ratings)

Unnamed: 0,user,item,rating,timestamp
0,1,2,3.5,1112486027
19785539,136989,2,2.0,1134521380
5592030,38455,2,2.5,1268028894
19785350,136988,2,3.0,1117060974
16921826,117053,2,3.5,1111491389
...,...,...,...,...
11528487,79570,131252,4.0,1427743255
11528488,79570,131254,4.0,1427743979
11528489,79570,131256,4.0,1427744888
4243883,28906,131258,2.5,1427745392


In [161]:
# display(unseen_movies_ratings.item == 29)

average_rating = {}
for item in list(unseen_movies_with_genres.item):
    item_rating = ratings_df.loc[ratings_df['item'] == item]
    average_rating[item] = round(np.mean(item_rating.rating), 1)


display(average_rating)



{2: 3.2,
 3: 3.2,
 4: 2.9,
 5: 3.1,
 6: 3.8,
 7: 3.4,
 8: 3.1,
 9: 3.0,
 10: 3.4,
 11: 3.7,
 12: 2.6,
 13: 3.3,
 14: 3.4,
 15: 2.7,
 16: 3.8,
 18: 3.4,
 19: 2.6,
 20: 2.9,
 21: 3.6,
 22: 3.3,
 23: 3.1,
 24: 3.2,
 25: 3.7,
 26: 3.6,
 27: 3.4,
 28: 4.1,
 29: 4.0,
 30: 3.6,
 31: 3.3,
 33: 3.0,
 35: 3.5,
 36: 3.9,
 37: 3.0,
 38: 2.9,
 39: 3.4,
 40: 3.6,
 41: 3.9,
 42: 3.1,
 43: 3.5,
 44: 2.7,
 45: 3.4,
 46: 3.3,
 47: 4.1,
 48: 2.9,
 49: 3.6,
 50: 4.3,
 51: 2.6,
 52: 3.5,
 53: 3.7,
 54: 2.9,
 55: 3.2,
 56: 1.9,
 57: 3.3,
 59: 3.6,
 60: 3.1,
 61: 3.3,
 62: 3.7,
 63: 3.1,
 64: 2.8,
 65: 2.4,
 66: 2.1,
 67: 3.4,
 68: 3.4,
 69: 3.6,
 70: 3.3,
 71: 2.4,
 72: 3.5,
 73: 3.8,
 74: 3.3,
 75: 2.5,
 76: 3.0,
 77: 3.4,
 78: 3.2,
 79: 3.1,
 80: 3.7,
 81: 3.3,
 82: 4.0,
 83: 3.7,
 84: 3.4,
 85: 3.5,
 86: 3.5,
 87: 2.4,
 88: 3.1,
 89: 3.1,
 90: 3.4,
 92: 2.9,
 93: 2.5,
 94: 3.5,
 95: 3.1,
 96: 3.7,
 97: 4.0,
 98: 2.5,
 99: 3.1,
 100: 3.2,
 101: 3.8,
 102: 2.4,
 103: 3.3,
 104: 3.4,
 105: 3.3,
 106: 3.4,
 

In [162]:
avg_movie_rating = list(average_rating.values())
top_rating = np.max(avg_movie_rating)

top_movies = []
for item in average_rating:
    if average_rating[item] < top_rating - (top_rating/10):
        continue
    top_movies.append(item)

display(top_movies)

[3226,
 26718,
 26911,
 27328,
 27914,
 32230,
 40404,
 46651,
 48235,
 49063,
 52365,
 54326,
 56513,
 56548,
 62206,
 66008,
 68265,
 68314,
 69361,
 71702,
 72235,
 72866,
 73139,
 73365,
 77344,
 78329,
 79601,
 79838,
 79842,
 79866,
 80135,
 80193,
 80465,
 80467,
 80560,
 80683,
 81117,
 81501,
 81738,
 81906,
 81959,
 82326,
 86023,
 86055,
 86077,
 86085,
 86719,
 87061,
 87358,
 87644,
 88488,
 89083,
 89133,
 89246,
 89313,
 90170,
 90286,
 90895,
 91007,
 91211,
 92483,
 92520,
 92783,
 92956,
 92963,
 93187,
 93652,
 93707,
 93967,
 94407,
 94431,
 94657,
 94681,
 94737,
 94806,
 94949,
 94972,
 95517,
 95837,
 95977,
 95979,
 96631,
 96702,
 96704,
 96757,
 96950,
 97092,
 97184,
 97763,
 97789,
 97872,
 98275,
 98328,
 98335,
 98693,
 98761,
 98861,
 99243,
 99325,
 99450,
 99489,
 99939,
 100099,
 100262,
 100266,
 100315,
 100521,
 100743,
 100830,
 101188,
 101292,
 101329,
 101538,
 101632,
 101717,
 101986,
 102119,
 102158,
 102219,
 102340,
 102526,
 102596,
 1026

In [141]:
def clean_tags(row):
    return str(row['tag']).strip().lower()

tags_df['tag'] = tags_df.apply(clean_tags, axis=1, result_type='expand')
display(tags_df)

Unnamed: 0_level_0,tag
item,Unnamed: 1_level_1
1,friendship
1,animation
1,animated
1,adventure
1,animation
...,...
131258,korea
131258,bandits
131258,whale
131258,pirates


In [165]:
tags_genome_df = pd.read_csv(data_folder + "/genome-tags.csv")
tags_score_df = pd.read_csv(data_folder + "/genome-scores.csv")

tags_score_df = tags_score_df.rename(columns={'movieId': 'item'})

display(tags_genome_df)
display(tags_score_df)

Unnamed: 0,tagId,tag
0,1,007
1,2,007 (series)
2,3,18th century
3,4,1920s
4,5,1930s
...,...,...
1123,1124,writing
1124,1125,wuxia
1125,1126,wwii
1126,1127,zombie


Unnamed: 0,item,tagId,relevance
0,1,1,0.02500
1,1,2,0.02500
2,1,3,0.05775
3,1,4,0.09675
4,1,5,0.14675
...,...,...,...
11709763,131170,1124,0.58775
11709764,131170,1125,0.01075
11709765,131170,1126,0.01575
11709766,131170,1127,0.11450


In [166]:
favourite_movies_list = list(favourite_movies.item)

fav_tag_set = set()
for item in favourite_movies_list:
    movie_tags = tags_df.loc[tags_df.index == item]

    fav_tag_set.update(list(movie_tags.tag))

display(len(fav_tag_set))

1794

In [167]:
def dice_coefficient(fav_tag, movie_tag):
    return (2 * len(fav_tag.intersection(movie_tag))) / (len(fav_tag) + len(movie_tag))

movie_similarity = {}
for item in top_movies:
    tag_set = set()
    movie_tags = tags_df.loc[tags_df.index == item]
    tag_set.update(list(movie_tags.tag))
    movie_similarity[item] = dice_coefficient(fav_tag_set, tag_set)

display(movie_similarity)

recommend_movie = max(movie_similarity, key=movie_similarity.get)

display(movies_df.loc[movies_df.item == recommend_movie])

{3226: 0.0,
 26718: 0.0,
 26911: 0.0,
 27328: 0.0,
 27914: 0.004449388209121246,
 32230: 0.004439511653718091,
 40404: 0.0,
 46651: 0.002224694104560623,
 48235: 0.0,
 49063: 0.0,
 52365: 0.0,
 54326: 0.0022234574763757642,
 56513: 0.0,
 56548: 0.0,
 62206: 0.0,
 66008: 0.0,
 68265: 0.0,
 68314: 0.0,
 69361: 0.0,
 71702: 0.0,
 72235: 0.0,
 72866: 0.0011129660545353367,
 73139: 0.0011142061281337048,
 73365: 0.00443213296398892,
 77344: 0.0011142061281337048,
 78329: 0.0,
 79601: 0.0,
 79838: 0.0,
 79842: 0.0,
 79866: 0.0,
 80135: 0.0,
 80193: 0.0,
 80465: 0.0,
 80467: 0.0,
 80560: 0.0,
 80683: 0.0,
 81117: 0.0,
 81501: 0.0,
 81738: 0.0,
 81906: 0.0,
 81959: 0.0,
 82326: 0.0,
 86023: 0.0,
 86055: 0.0011123470522803114,
 86077: 0.0,
 86085: 0.0,
 86719: 0.0,
 87061: 0.0011135857461024498,
 87358: 0.0,
 87644: 0.0,
 88488: 0.0,
 89083: 0.0,
 89133: 0.0,
 89246: 0.0,
 89313: 0.0,
 90170: 0.0,
 90286: 0.0011142061281337048,
 90895: 0.0,
 91007: 0.0,
 91211: 0.0,
 92483: 0.002218524681087077

Unnamed: 0,item,title,year,genres
23860,113315,zero motivation (efes beyahasei enosh),2014,Comedy|Drama


Unnamed: 0,item,Documentary,Film-Noir,IMAX,War,Adventure,Sci-Fi,Action,Comedy,Musical,...,Horror,Fantasy,Crime,Children,(no genres listed),Romance,Drama,Thriller,Western,Animation
22821,113315,False,False,False,False,False,False,False,True,False,...,False,False,False,False,False,False,True,False,False,False
