## Authors:
### Bishoy George       Bishoy.George.eg@gmail.com
### Toka Abd El Ghafar
### Farah Mohamad
### Zaynab El Agamy
### Omar Ahmed Shihi

In [1]:
import numpy as np
import pandas as pd

In [2]:
ratings_df = pd.read_csv('ratings_small.csv')
movies_df = pd.read_csv("movies_metadata.csv")

  exec(code_obj, self.user_global_ns, self.user_ns)


In [3]:
#filter the movies_df to keep only those rows where the title is not missing
title_mask = movies_df['title'].isna()
movies_df = movies_df.loc[title_mask == False]

#Merge the 2 datasets
movies_df = movies_df.astype({'id': 'int64'})
df = pd.merge(ratings_df, movies_df[['id', 'title']], left_on='movieId', right_on='id')
#Id column is repeated and the timestamp is not important for this problem. So, you can drop the two.
df.drop(['timestamp', 'id'], axis=1, inplace=True)
df = df.drop_duplicates(['userId','title'])
df

Unnamed: 0,userId,movieId,rating,title
0,1,1371,2.5,Rocky III
1,4,1371,4.0,Rocky III
2,7,1371,3.0,Rocky III
3,19,1371,4.0,Rocky III
4,21,1371,3.0,Rocky III
...,...,...,...,...
44989,652,129009,4.0,Love Is a Ball
44990,653,2103,3.0,Solaris
44991,659,167,4.0,K-PAX
44992,659,563,3.0,Starship Troopers


In [4]:
df_pivot = df.pivot(index='userId', columns='title', values='rating').fillna(0)
df_pivot = df_pivot.astype('int64')

#function that converts ratings to binary values (0 if the rating is less than or equal to 0, 1 otherwise). Then, it applies this function to each element of the DataFrame df_pivot using applymap().
def encode_ratings(x):
    if x<=0:
        return 0
    if x>=1:
        return 1

df_pivot = df_pivot.applymap(encode_ratings)
df_pivot

title,!Women Art Revolution,'Gator Bait,'Twas the Night Before Christmas,...And God Created Woman,00 Schneider - Jagd auf Nihil Baxter,10 Items or Less,10 Things I Hate About You,"10,000 BC",11'09''01 - September 11,12 Angry Men,...,Zodiac,Zombie Flesh Eaters,Zombie Holocaust,Zozo,eXistenZ,xXx,¡Three Amigos!,À nos amours,Ödipussi,Şaban Oğlu Şaban
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
667,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
668,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
669,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
670,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
from mlxtend.frequent_patterns import apriori
frequent_itemset = apriori(df_pivot, min_support=0.07, use_colnames=True)
frequent_itemset.head()



Unnamed: 0,support,itemsets
0,0.129657,"(20,000 Leagues Under the Sea)"
1,0.129657,(2001: A Space Odyssey)
2,0.298063,(48 Hrs.)
3,0.292101,(5 Card Stud)
4,0.09389,(A Brief History of Time)


In [6]:
#generate association rules from the frequent itemsets using the association_rules function. It specifies "lift" as the metric to evaluate the rules and sets a minimum threshold of 1 for the lift.
from mlxtend.frequent_patterns import association_rules
rules = association_rules(frequent_itemset, metric="lift", min_threshold=1)
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,"(20,000 Leagues Under the Sea)",(48 Hrs.),0.129657,0.298063,0.076006,0.586207,1.966724,0.03736,1.696349,0.564766
1,(48 Hrs.),"(20,000 Leagues Under the Sea)",0.298063,0.129657,0.076006,0.255,1.966724,0.03736,1.168245,0.700262
2,"(20,000 Leagues Under the Sea)",(A Nightmare on Elm Street),0.129657,0.266766,0.081967,0.632184,2.369807,0.047379,1.99348,0.664134
3,(A Nightmare on Elm Street),"(20,000 Leagues Under the Sea)",0.266766,0.129657,0.081967,0.307263,2.369807,0.047379,1.256382,0.788322
4,"(20,000 Leagues Under the Sea)",(Back to the Future Part II),0.129657,0.210134,0.077496,0.597701,2.844379,0.050251,1.963381,0.745028


In [7]:
#sort the generated association rules by 'lift' in descending order
df_res = rules.sort_values(by=['lift'], ascending=False)
df_res.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
70959,(On Guard),"(The Garden of Eden, Muxmäuschenstill)",0.087928,0.099851,0.070045,0.79661,7.977991,0.061265,4.425733,0.958976
70958,"(The Garden of Eden, Muxmäuschenstill)",(On Guard),0.099851,0.087928,0.070045,0.701493,7.977991,0.061265,3.05544,0.971678
197236,"(Big Fish, Rope)","(Psycho, All the Way Boys)",0.092399,0.09538,0.070045,0.758065,7.947833,0.061232,3.739096,0.963176
197233,"(Psycho, All the Way Boys)","(Big Fish, Rope)",0.09538,0.092399,0.070045,0.734375,7.947833,0.061232,3.416849,0.96635
2541044,"(The Hours, Sissi, Rain Man, Monsoon Wedding, ...","(Cockles and Muscles, 48 Hrs., A Nightmare on ...",0.098361,0.09389,0.071535,0.727273,7.746032,0.0623,3.322404,0.965909


In [8]:
#These lines filter association rules where 'Men in Black II' appears as the antecedent and the lift is greater than 2, storing the result in df_MIB.
df_MIB = df_res[df_res['antecedents'].apply(lambda x: len(x) ==1 and next(iter(x)) == 'Men in Black II')]
df_MIB = df_MIB[df_MIB['lift'] > 2]
df_MIB.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
63400,(Men in Black II),"(Nostalgia, Terminator 3: Rise of the Machines)",0.33383,0.080477,0.077496,0.232143,2.88459,0.050631,1.197518,0.980726
56667,(Men in Black II),"(Jarhead, Sleepless in Seattle)",0.33383,0.077496,0.073025,0.21875,2.822716,0.047155,1.180805,0.969319
1167108,(Men in Black II),"(Young and Innocent, Point Break, The Talented...",0.33383,0.080477,0.073025,0.21875,2.718171,0.04616,1.17699,0.948865
380667,(Men in Black II),"(Young and Innocent, Point Break, The Talented...",0.33383,0.080477,0.073025,0.21875,2.718171,0.04616,1.17699,0.948865
63406,(Men in Black II),"(Nostalgia, The 39 Steps)",0.33383,0.080477,0.073025,0.21875,2.718171,0.04616,1.17699,0.948865


In [9]:
#This code extracts consequents (movies associated with 'Men in Black II') from the filtered association rules and creates a list of unique movie titles.
movies = df_MIB['consequents'].values
movie_list = []
for movie in movies:
    for title in movie:
        if title not in movie_list:
            movie_list.append(title)
print("All Recommended Movies: \n")
movie_list

All Recommended Movies: 



['Nostalgia',
 'Terminator 3: Rise of the Machines',
 'Jarhead',
 'Sleepless in Seattle',
 'Young and Innocent',
 'Point Break',
 'The Talented Mr. Ripley',
 'The 39 Steps',
 'Once Were Warriors',
 'The Thomas Crown Affair',
 'Scarface',
 'The Prisoner of Zenda',
 'Solaris',
 'Dawn of the Dead',
 'Sissi',
 'Rope',
 'Fools Rush In',
 'Rain Man',
 'The Million Dollar Hotel',
 'Say Anything...',
 '48 Hrs.',
 'Monsoon Wedding',
 'A Nightmare on Elm Street',
 'M',
 'The Passion of Joan of Arc',
 'All the Way Boys',
 'Titanic',
 'Bridge to Terabithia',
 'Donnie Darko',
 'Psycho',
 'Lonely Hearts',
 'Street Kings',
 'The Forbidden Kingdom',
 'Arlington Road',
 'Beauty and the Beast',
 'The Cave of the Yellow Dog',
 "Jacob's Ladder",
 'High Noon',
 'My Name Is Bruce',
 'Blood: The Last Vampire',
 'The Man with the Golden Arm',
 'Three Colors: Red',
 'The Hours',
 'Twin Peaks: Fire Walk with Me',
 'Superstar: The Karen Carpenter Story',
 'The Conversation',
 'Judgment Night',
 'Rebecca',
 'To K

In [10]:
print("top 10 movies you might like:")
movie_list[:10]

top 10 movies you might like:


['Nostalgia',
 'Terminator 3: Rise of the Machines',
 'Jarhead',
 'Sleepless in Seattle',
 'Young and Innocent',
 'Point Break',
 'The Talented Mr. Ripley',
 'The 39 Steps',
 'Once Were Warriors',
 'The Thomas Crown Affair']

### TESTING

In [11]:
testInput=input()

Titanic


In [12]:
#These lines filter association rules where INPUT appears as the antecedent and the lift is greater than 2, storing the result in df_INPUT.

df_INPUT = df_res[df_res['antecedents'].apply(lambda x: len(x) ==1 and next(iter(x)) == testInput)]
df_INPUT = df_INPUT[df_INPUT['lift'] > 2]
df_INPUT.head()

#This code extracts consequents (movies associated with INPUT) from the filtered association rules and creates a list of unique movie titles.
movies0 = df_INPUT['consequents'].values
movie_list0 = []
for movie in movies0:
    for title in movie:
        if title not in movie_list0:
            movie_list0.append(title)
            
print("All Recommended Movies:")
movie_list0

All Recommended Movies:


['Psycho',
 'Syriana',
 'Big Fish',
 'Monsoon Wedding',
 'Solaris',
 'Terminator 3: Rise of the Machines',
 'Reservoir Dogs',
 'A Nightmare on Elm Street',
 'Rain Man',
 'Romeo + Juliet',
 'The Passion of Joan of Arc',
 'The Hours',
 'Sissi',
 'High Noon',
 'Back to the Future Part II',
 'The Million Dollar Hotel',
 'Batman Returns',
 'To Kill a Mockingbird',
 'Once Were Warriors',
 'The Conversation',
 'Three Colors: Red',
 "Dave Chappelle's Block Party",
 'Bang, Boom, Bang',
 'Wag the Dog',
 '48 Hrs.',
 'Cockles and Muscles',
 'Silent Hill',
 'Tough Enough',
 'Love Actually',
 'The 39 Steps',
 'Beauty and the Beast',
 'Night on Earth',
 'Lost in Translation',
 'Live and Let Die',
 'Grill Point',
 'Men in Black II',
 'Metropolis',
 'All the Way Boys',
 'A River Runs Through It',
 'Aliens vs Predator: Requiem',
 'Bridge to Terabithia',
 'Young and Innocent',
 'Rope',
 'Jurassic Park',
 'Star Wars',
 'Arlington Road',
 'Ariel',
 'Judgment Night',
 'Contempt',
 'Twin Peaks: Fire Walk wit

In [13]:
print("top recomended movies you might like:")
movie_list0[:10]

top recomended movies you might like:


['Psycho',
 'Syriana',
 'Big Fish',
 'Monsoon Wedding',
 'Solaris',
 'Terminator 3: Rise of the Machines',
 'Reservoir Dogs',
 'A Nightmare on Elm Street',
 'Rain Man',
 'Romeo + Juliet']