In [225]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [226]:
movies = pd.read_csv('data/movies.csv')
ratings = pd.read_csv('data/ratings.csv')

In [227]:
movies_ratings = ratings.merge(movies[['movieId', 'title']], how="inner", on="movieId")
movies_ratings.drop('timestamp', axis=1, inplace=True)
# movies_ratings['title'] = movies_ratings['title'].apply(lambda x: x[:len(x) - 6])
# display(movies_ratings)
# print(len(movies_ratings.index))

positive_mask = movies_ratings.rating >= 4
good_movies = movies_ratings[positive_mask]
# print(len(good_movies.index))
# display(good_movies)

negative_mask = movies_ratings.rating <= 2
bad_movies = movies_ratings[negative_mask]
# print(len(bad_movies.index))
# display(bad_movies)

In [228]:
def create_association_rules(movies_df, metric="confidence", min_support=0.05, min_threshold=0.7):
    movies_per_user = movies_df.groupby('userId')['title'].apply(list).reset_index()
    movies_list = movies_per_user['title'].tolist()

    te = TransactionEncoder()
    te_array = te.fit(movies_list).transform(movies_list)

    movies_support = pd.DataFrame(te_array, columns=te.columns_)

    frequent_itemsets = apriori(movies_support, min_support=min_support, use_colnames=True)

    return association_rules(frequent_itemsets, metric=metric, min_threshold=min_threshold)
    

In [229]:
def get_recommendations(rules, movies):
    recommendations = rules['antecedents'].apply(lambda x: x.issuperset(movies))
    recommendation_sets = rules[recommendations]['consequents'].unique()

    recommendation_list = []
    for set in recommendation_sets:
        recommendation_list.extend(set)
        
    return recommendation_list


### Well rated movies

In [246]:
rules = create_association_rules(movies_df = good_movies, min_support=0.06, min_threshold=0.75)
# print(rules.shape)

In [247]:
movies = {'Pulp Fiction (1994)', 'Reservoir Dogs (1992)'}
recommendations = get_recommendations(rules, movies)
display(recommendations)

['American Beauty (1999)',
 'Silence of the Lambs, The (1991)',
 'Godfather, The (1972)',
 'Usual Suspects, The (1995)',
 'Seven (a.k.a. Se7en) (1995)',
 'Star Wars: Episode V - The Empire Strikes Back (1980)',
 'Star Wars: Episode IV - A New Hope (1977)']

### Poorly rated movies

In [258]:
rules = create_association_rules(movies_df = bad_movies, min_support=0.01, min_threshold=0.6)
# print(rules.shape)

In [259]:
movies = {'Mask, The (1994)'}
recommendations = get_recommendations(rules, movies)
display(recommendations)

['Ace Ventura: Pet Detective (1994)',
 'Ace Ventura: When Nature Calls (1995)',
 'Armageddon (1998)']