# Imports

In [229]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import requests, zipfile, io
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

In [230]:
zipfile.ZipFile(io.BytesIO(requests.get('http://files.grouplens.org/datasets/movielens/ml-latest-small.zip').content)).extractall()

In [231]:
data_movies = pd.read_csv('ml-latest-small/movies.csv')
data_ratings = pd.read_csv('ml-latest-small/ratings.csv')
data_ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [232]:
rated_movies = data_movies.merge(data_ratings,on = 'movieId',how = 'inner')
rated_movies.drop(['genres','timestamp'], axis=1 , inplace=True)

In [233]:
len(rated_movies.userId.unique()) 

610

In [234]:
def mapping_function(value):
    
    return value >= 4
rated_movies['liked'] = rated_movies['rating'].apply(mapping_function)
# rated_movies.drop(['rating'], axis=1 , inplace=True)
rated_movies

Unnamed: 0,movieId,title,userId,rating,liked
0,1,Toy Story (1995),1,4.0,True
1,1,Toy Story (1995),5,4.0,True
2,1,Toy Story (1995),7,4.5,True
3,1,Toy Story (1995),15,2.5,False
4,1,Toy Story (1995),17,4.5,True
...,...,...,...,...,...
100831,193581,Black Butler: Book of the Atlantic (2017),184,4.0,True
100832,193583,No Game No Life: Zero (2017),184,3.5,False
100833,193585,Flint (2017),184,3.5,False
100834,193587,Bungo Stray Dogs: Dead Apple (2018),184,3.5,False


In [235]:
def should_drop(row):
    
    return  row['liked'] 


rated_movies = rated_movies[rated_movies.apply(should_drop, axis=1)]
rated_movies

Unnamed: 0,movieId,title,userId,rating,liked
0,1,Toy Story (1995),1,4.0,True
1,1,Toy Story (1995),5,4.0,True
2,1,Toy Story (1995),7,4.5,True
4,1,Toy Story (1995),17,4.5,True
6,1,Toy Story (1995),19,4.0,True
...,...,...,...,...,...
100825,191005,Gintama (2017),184,4.5,True
100828,193571,Silver Spoon (2014),184,4.0,True
100829,193573,Love Live! The School Idol Movie (2015),184,4.0,True
100831,193581,Black Butler: Book of the Atlantic (2017),184,4.0,True


In [236]:

rated_movies = rated_movies[~rated_movies['movieId'].isin(movie_counts[movie_counts <= 10].index)]
rated_movies

Unnamed: 0,movieId,title,userId,rating,liked
0,1,Toy Story (1995),1,4.0,True
1,1,Toy Story (1995),5,4.0,True
2,1,Toy Story (1995),7,4.5,True
4,1,Toy Story (1995),17,4.5,True
6,1,Toy Story (1995),19,4.0,True
...,...,...,...,...,...
100587,176371,Blade Runner 2049 (2017),380,4.0,True
100588,176371,Blade Runner 2049 (2017),414,5.0,True
100589,176371,Blade Runner 2049 (2017),515,5.0,True
100590,176371,Blade Runner 2049 (2017),567,5.0,True


In [237]:
rated_movies.movieId.value_counts()


movieId
318       274
356       249
296       244
593       225
2571      222
         ... 
5574       11
5481       11
466        11
1103       11
176371     11
Name: count, Length: 1104, dtype: int64

In [238]:
# Group the DataFrame by 'userId' and aggregate the 'liked' column as a list
user_movie_lists = rated_movies.groupby('userId')['title'].agg(list).reset_index()

# Rename the column to something more descriptive
user_movie_lists.rename(columns={'liked': 'liked_movies'}, inplace=True)

# Merge the new column back to the original DataFrame based on 'userId'
df = pd.merge(rated_movies, user_movie_lists, on='userId')
df.drop(['title_x','movieId','liked'], axis = 1 ,inplace=True)

df.drop_duplicates(subset='userId' , inplace=True)
df.rename(columns={'title_y': 'liked_movies'}, inplace=True)
df

Unnamed: 0,userId,rating,liked_movies
0,1,4.0,"[Toy Story (1995), Grumpier Old Men (1995), He..."
150,5,4.0,"[Toy Story (1995), Get Shorty (1995), Babe (19..."
170,7,4.5,"[Toy Story (1995), Usual Suspects, The (1995),..."
236,17,4.5,"[Toy Story (1995), Seven (a.k.a. Se7en) (1995)..."
318,19,4.0,"[Toy Story (1995), Twelve Monkeys (a.k.a. 12 M..."
...,...,...,...
35285,361,4.0,"[South Park: Bigger, Longer and Uncut (1999), ..."
35304,184,5.0,"[Memento (2000), Pianist, The (2002), Cowboy B..."
35340,598,4.0,"[Ocean's Eleven (2001), Bourne Identity, The (..."
35349,306,4.0,"[Finding Nemo (2003), Devil Wears Prada, The (..."


In [239]:
# importing the library
try:
    import apyori
except:
    !pip install apyori

from apyori import apriori # for association rule learning models

In [240]:
transactions = df['liked_movies'].tolist()
len(transactions)  

609

In [243]:
rules = apriori(transactions = transactions, min_support=0.00030, min_confidance=0.01, min_lift=3, min_length=2, max_length=2)
#let's transform them into a list
results = list(rules)

def inspect(results):
    '''
    function to put the result in well organised pandas dataframe
    '''
    lhs         = [tuple(result[2][0][0])[0] for result in results]
    rhs         = [tuple(result[2][0][1])[0] for result in results]
    supports    = [result[1] for result in results]
    confidences = [result[2][0][2] for result in results]
    lifts       = [result[2][0][3] for result in results]
    return list(zip(lhs, rhs, supports, confidences, lifts))

resultsinDataFrame = pd.DataFrame(inspect(results), columns = ['Item #1', 'Item #2', 'Support', 'Confidence', 'Lift'])
resultsinDataFrame.head()


Unnamed: 0,Item #1,Item #2,Support,Confidence,Lift
0,(500) Days of Summer (2009),10 Things I Hate About You (1999),0.011494,0.269231,6.558462
1,(500) Days of Summer (2009),12 Angry Men (1957),0.013136,0.307692,3.824176
2,(500) Days of Summer (2009),127 Hours (2010),0.004926,0.115385,5.019231
3,(500) Days of Summer (2009),21 Jump Street (2012),0.011494,0.269231,9.108974
4,(500) Days of Summer (2009),22 Jump Street (2014),0.004926,0.115385,6.388112


In [280]:
film = "Now You See Me (2013)"
result = resultsinDataFrame[(resultsinDataFrame['Item #1'] == film) | (resultsinDataFrame['Item #2'] == film) ]\
.nlargest(n=20, columns='Lift')


##################################################
# moving the results to a list
modified_results = result.copy()
mask = modified_results['Item #2'] == film
modified_results.loc[mask, 'Item #2'] = modified_results.loc[mask, 'Item #1']
recommendations = modified_results["Item #2"].tolist()


In [281]:
recommendations

['Untitled Spider-Man Reboot (2017)',
 'Perks of Being a Wallflower, The (2012)',
 'Guardians of the Galaxy 2 (2017)',
 'Spotlight (2015)',
 '22 Jump Street (2014)',
 'About Time (2013)',
 'Pitch Perfect (2012)',
 'Secret Life of Walter Mitty, The (2013)',
 'The Hunger Games: Catching Fire (2013)',
 'Thor: Ragnarok (2017)',
 'Logan (2017)',
 '21 Jump Street (2012)',
 'Big Short, The (2015)',
 'Doctor Strange (2016)',
 'Wall Street (1987)',
 "We're the Millers (2013)",
 'Captain America: The First Avenger (2011)',
 'John Wick (2014)',
 'Kingsman: The Secret Service (2015)',
 'Ant-Man (2015)']