In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
data_movie = pd.read_csv('movie.csv')
data_movie

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
27273,131254,Kein Bund für's Leben (2007),Comedy
27274,131256,"Feuer, Eis & Dosenbier (2002)",Comedy
27275,131258,The Pirates (2014),Adventure
27276,131260,Rentun Ruusu (2001),(no genres listed)


In [3]:
data_rating = pd.read_csv('rating.csv')
data_rating

Unnamed: 0,userId,movieId,rating,timestamp
0,1,2,3.5,2005-04-02 23:53:47
1,1,29,3.5,2005-04-02 23:31:16
2,1,32,3.5,2005-04-02 23:33:39
3,1,47,3.5,2005-04-02 23:32:07
4,1,50,3.5,2005-04-02 23:29:40
...,...,...,...,...
20000258,138493,68954,4.5,2009-11-13 15:42:00
20000259,138493,69526,4.5,2009-12-03 18:31:48
20000260,138493,69644,3.0,2009-12-07 18:10:57
20000261,138493,70286,5.0,2009-11-13 15:42:24


In [4]:
df = pd.merge(data_movie, data_rating, on = 'movieId')

In [5]:
df.isnull().sum()

movieId      0
title        0
genres       0
userId       0
rating       0
timestamp    0
dtype: int64

In [6]:
df.drop('timestamp', axis = 1, inplace=True)

In [7]:
merged_data=df
df[:10]

Unnamed: 0,movieId,title,genres,userId,rating
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,3,4.0
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,6,5.0
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,8,4.0
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,10,4.0
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,11,4.5
5,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,12,4.0
6,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,13,4.0
7,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,14,4.5
8,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,16,3.0
9,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,19,5.0


In [20]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

# Input five movie titles with years
input_movies = input("Enter five movie titles with years, separated by commas: ").split(',')

# Strip any extra whitespace from movie titles
input_movies = [movie.strip() for movie in input_movies]

# Filter data for users within the range 2000-5500 who have watched all five input movies
users_with_input_movies = merged_data[(merged_data['userId'] >= 4500) & (merged_data['userId'] <= 5500)]
users_with_input_movies = users_with_input_movies[users_with_input_movies['title'].isin(input_movies)]

# Find users who have watched all five specified movies
common_users = users_with_input_movies.groupby('userId').filter(lambda x: x['title'].nunique() == 5)['userId'].unique()

# If no common users are found, return an error message
if len(common_users) == 0:
    print("No users found who have watched all five specified movies within the user ID range 4500-5500.")
else:
    # Iterate over each user to generate personalized recommendations
    for user_id in common_users:
        print(f"\nRecommendations for User ID: {user_id}")
        
        # Filter data for the selected user
        user_data = merged_data[merged_data['userId'] == user_id]
        
        # Select movies the user has already watched that match the input movies
        watched_movies = user_data[user_data['title'].isin(input_movies)]
        
        # Count of matched movies
        matched_count = watched_movies['title'].nunique()
        
        # Display matched movies
        print("Movies the user has watched (matching the input list):")
        print(watched_movies[['movieId', 'title', 'genres']])
        print(f"Total matched movies: {matched_count}")

        # Extract genres from the watched movies
        watched_movies['genre_list'] = watched_movies['genres'].str.split('|')
        
        # Create a one-hot encoded dataframe for genres
        genre_dummies = merged_data['genres'].str.get_dummies('|')

        # Only include movies with rating > 3 for recommendation purposes
        high_rated_movies = merged_data[merged_data['rating'] > 3]

        # One-hot encoding for genres of movies with rating > 3
        genre_dummies_high_rated = high_rated_movies['genres'].str.get_dummies('|')

        # Use apriori to find frequent itemsets with minimum support
        frequent_itemsets = apriori(genre_dummies_high_rated, min_support=0.1, use_colnames=True)

        # Generate association rules from the frequent itemsets
        rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)

        # Filter rules where the antecedent contains genres the user has watched
        relevant_rules = rules[rules['antecedents'].apply(lambda x: any(genre in watched_genres for genre in x))]
        
        # Calculate and display confidence scores for each relevant rule
        print("\nTop Association Rules with Confidence:")
        print(relevant_rules[['antecedents', 'consequents', 'confidence', 'lift']].head())

        # Extract the genres from the watched movies
        watched_genres = watched_movies['genre_list'].explode().unique()

        # Recommend movies that match the genres of the watched movies
        recommended_movies = high_rated_movies[high_rated_movies['genres'].str.contains('|'.join(watched_genres))]

        # Exclude already watched movies from recommendations
        recommended_movies = recommended_movies[~recommended_movies['title'].isin(input_movies)]

        # Get distinct recommended movie titles with genres and rating > 3
        recommended_movie_titles = recommended_movies[['movieId', 'title', 'genres']].drop_duplicates()

        # Display recommended movies with genres
        print("Recommended Movies:")
        print(recommended_movie_titles)

Enter five movie titles with years, separated by commas:  Eraser (1996),Contact (1997),Apocalypse Now (1979),Toy Story 2 (1999),Maverick (1994)



Recommendations for User ID: 4529
Movies the user has watched (matching the input list):
          movieId                  title  \
2648046       368        Maverick (1994)   
4733071       786          Eraser (1996)   
6374924      1208  Apocalypse Now (1979)   
8199241      1584         Contact (1997)   
12570984     3114     Toy Story 2 (1999)   

                                               genres  
2648046                      Adventure|Comedy|Western  
4733071                         Action|Drama|Thriller  
6374924                              Action|Drama|War  
8199241                                  Drama|Sci-Fi  
12570984  Adventure|Animation|Children|Comedy|Fantasy  
Total matched movies: 5


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  watched_movies['genre_list'] = watched_movies['genres'].str.split('|')



Top Association Rules with Confidence:
   antecedents consequents  confidence      lift
0  (Adventure)    (Action)     0.54333  2.051337
Recommended Movies:
          movieId                                              title  \
0               1                                   Toy Story (1995)   
49695           2                                     Jumanji (1995)   
71938           3                            Grumpier Old Men (1995)   
84681           4                           Waiting to Exhale (1995)   
87435           5                 Father of the Bride Part II (1995)   
...           ...                                                ...   
20000256   131250                              No More School (2000)   
20000257   131252  Forklift Driver Klaus: The First Day on the Jo...   
20000258   131254                       Kein Bund für's Leben (2007)   
20000259   131256                      Feuer, Eis & Dosenbier (2002)   
20000262   131262                                 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  watched_movies['genre_list'] = watched_movies['genres'].str.split('|')



Top Association Rules with Confidence:
   antecedents consequents  confidence      lift
0  (Adventure)    (Action)     0.54333  2.051337
Recommended Movies:
          movieId                                              title  \
0               1                                   Toy Story (1995)   
49695           2                                     Jumanji (1995)   
71938           3                            Grumpier Old Men (1995)   
84681           4                           Waiting to Exhale (1995)   
87435           5                 Father of the Bride Part II (1995)   
...           ...                                                ...   
20000256   131250                              No More School (2000)   
20000257   131252  Forklift Driver Klaus: The First Day on the Jo...   
20000258   131254                       Kein Bund für's Leben (2007)   
20000259   131256                      Feuer, Eis & Dosenbier (2002)   
20000262   131262                                 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  watched_movies['genre_list'] = watched_movies['genres'].str.split('|')



Top Association Rules with Confidence:
   antecedents consequents  confidence      lift
0  (Adventure)    (Action)     0.54333  2.051337
Recommended Movies:
          movieId                                              title  \
0               1                                   Toy Story (1995)   
49695           2                                     Jumanji (1995)   
71938           3                            Grumpier Old Men (1995)   
84681           4                           Waiting to Exhale (1995)   
87435           5                 Father of the Bride Part II (1995)   
...           ...                                                ...   
20000256   131250                              No More School (2000)   
20000257   131252  Forklift Driver Klaus: The First Day on the Jo...   
20000258   131254                       Kein Bund für's Leben (2007)   
20000259   131256                      Feuer, Eis & Dosenbier (2002)   
20000262   131262                                 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  watched_movies['genre_list'] = watched_movies['genres'].str.split('|')



Top Association Rules with Confidence:
   antecedents consequents  confidence      lift
0  (Adventure)    (Action)     0.54333  2.051337
Recommended Movies:
          movieId                                              title  \
0               1                                   Toy Story (1995)   
49695           2                                     Jumanji (1995)   
71938           3                            Grumpier Old Men (1995)   
84681           4                           Waiting to Exhale (1995)   
87435           5                 Father of the Bride Part II (1995)   
...           ...                                                ...   
20000256   131250                              No More School (2000)   
20000257   131252  Forklift Driver Klaus: The First Day on the Jo...   
20000258   131254                       Kein Bund für's Leben (2007)   
20000259   131256                      Feuer, Eis & Dosenbier (2002)   
20000262   131262                                 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  watched_movies['genre_list'] = watched_movies['genres'].str.split('|')



Top Association Rules with Confidence:
   antecedents consequents  confidence      lift
0  (Adventure)    (Action)     0.54333  2.051337
Recommended Movies:
          movieId                                              title  \
0               1                                   Toy Story (1995)   
49695           2                                     Jumanji (1995)   
71938           3                            Grumpier Old Men (1995)   
84681           4                           Waiting to Exhale (1995)   
87435           5                 Father of the Bride Part II (1995)   
...           ...                                                ...   
20000256   131250                              No More School (2000)   
20000257   131252  Forklift Driver Klaus: The First Day on the Jo...   
20000258   131254                       Kein Bund für's Leben (2007)   
20000259   131256                      Feuer, Eis & Dosenbier (2002)   
20000262   131262                                 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  watched_movies['genre_list'] = watched_movies['genres'].str.split('|')



Top Association Rules with Confidence:
   antecedents consequents  confidence      lift
0  (Adventure)    (Action)     0.54333  2.051337
Recommended Movies:
          movieId                                              title  \
0               1                                   Toy Story (1995)   
49695           2                                     Jumanji (1995)   
71938           3                            Grumpier Old Men (1995)   
84681           4                           Waiting to Exhale (1995)   
87435           5                 Father of the Bride Part II (1995)   
...           ...                                                ...   
20000256   131250                              No More School (2000)   
20000257   131252  Forklift Driver Klaus: The First Day on the Jo...   
20000258   131254                       Kein Bund für's Leben (2007)   
20000259   131256                      Feuer, Eis & Dosenbier (2002)   
20000262   131262                                 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  watched_movies['genre_list'] = watched_movies['genres'].str.split('|')



Top Association Rules with Confidence:
   antecedents consequents  confidence      lift
0  (Adventure)    (Action)     0.54333  2.051337
Recommended Movies:
          movieId                                              title  \
0               1                                   Toy Story (1995)   
49695           2                                     Jumanji (1995)   
71938           3                            Grumpier Old Men (1995)   
84681           4                           Waiting to Exhale (1995)   
87435           5                 Father of the Bride Part II (1995)   
...           ...                                                ...   
20000256   131250                              No More School (2000)   
20000257   131252  Forklift Driver Klaus: The First Day on the Jo...   
20000258   131254                       Kein Bund für's Leben (2007)   
20000259   131256                      Feuer, Eis & Dosenbier (2002)   
20000262   131262                                 