# Odkrywanie reguł asocjacyjnych
### Bartłomiej Kowalewski, nr 145204

In [184]:
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules
import pandas as pd

In [185]:
movies = pd.read_csv("data/movies.csv")
ratings = pd.read_csv("data/ratings.csv")

In [186]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [187]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [188]:
movies_ratings = movies.merge(ratings, on="movieId")

In [189]:
movies_ratings.head()

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,7,3.0,851866703
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,9,4.0,938629179
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,13,5.0,1331380058
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,15,2.0,997938310
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,19,3.0,855190091


In [190]:
good_movies = movies_ratings[movies_ratings['rating'] >= 4]
bad_movies = movies_ratings[movies_ratings['rating'] <= 2]

In [191]:
good_movies_list = good_movies.groupby('userId').title.apply(list).tolist()
bad_movies_list = bad_movies.groupby('userId').title.apply(list).tolist()

In [192]:
good_movies_list[0]

['Cinema Paradiso (Nuovo cinema Paradiso) (1989)',
 'French Connection, The (1971)',
 'Tron (1982)']

In [193]:
bad_movies_list[0]

['Escape from New York (1981)',
 'Deer Hunter, The (1978)',
 'Ben-Hur (1959)',
 'Gandhi (1982)',
 'Cape Fear (1991)',
 'Beavis and Butt-Head Do America (1996)',
 'Willow (1988)',
 'Antz (1998)',
 'Time Bandits (1981)']

In [194]:
te = TransactionEncoder()

In [195]:
te_array_good = te.fit(good_movies_list).transform(good_movies_list)
dataframe_good_movies = pd.DataFrame(te_array_good, columns=te.columns_)

In [196]:
te = TransactionEncoder()

In [197]:
te_array_bad = te.fit(bad_movies_list).transform(bad_movies_list)
dataframe_bad_movies = pd.DataFrame(te_array_bad, columns=te.columns_)

In [198]:
frequent_itemsets_good = apriori(dataframe_good_movies, min_support=0.05, use_colnames=True)

In [199]:
frequent_itemsets_bad = apriori(dataframe_bad_movies, min_support=0.01, use_colnames=True)

In [200]:
frequent_itemsets_good

Unnamed: 0,support,itemsets
0,0.090909,(12 Angry Men (1957))
1,0.123696,(2001: A Space Odyssey (1968))
2,0.062593,"(Abyss, The (1989))"
3,0.071535,(Ace Ventura: Pet Detective (1994))
4,0.062593,"(African Queen, The (1951))"
...,...,...
17192,0.050671,(Raiders of the Lost Ark (Indiana Jones and th...
17193,0.053651,(Raiders of the Lost Ark (Indiana Jones and th...
17194,0.050671,(Raiders of the Lost Ark (Indiana Jones and th...
17195,0.056632,(Raiders of the Lost Ark (Indiana Jones and th...


In [201]:
frequent_itemsets_bad

Unnamed: 0,support,itemsets
0,0.016722,(10 Things I Hate About You (1999))
1,0.011706,"(10,000 BC (2008))"
2,0.013378,(101 Dalmatians (1996))
3,0.010033,(102 Dalmatians (2000))
4,0.010033,(13 Going on 30 (2004))
...,...,...
1147,0.010033,"(Twister (1996), Batman & Robin (1997), Congo ..."
1148,0.010033,"(Twister (1996), Batman & Robin (1997), Godzil..."
1149,0.010033,"(Twister (1996), Batman & Robin (1997), Waterw..."
1150,0.010033,"(Waterworld (1995), Lost in Space (1998), Cong..."


In [202]:
good_movies_rules = association_rules(frequent_itemsets_good, metric='confidence', min_threshold=0.9)

In [203]:
bad_movies_rules = association_rules(frequent_itemsets_bad, metric='confidence', min_threshold=0.6)

In [204]:
positive_rating_idx = good_movies_rules['antecedents'].apply(lambda x: x.issuperset({'Pulp Fiction (1994)', 'Reservoir Dogs (1992)'}))
good_movies_rules[positive_rating_idx]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
1655,"(Godfather: Part II, The (1974), Reservoir Dog...","(Godfather, The (1972))",0.067064,0.265276,0.064083,0.955556,3.602122,0.046293,16.531297
2281,"(Pulp Fiction (1994), L.A. Confidential (1997)...","(Silence of the Lambs, The (1991))",0.056632,0.356185,0.052161,0.921053,2.585884,0.03199,8.154993
2903,"(Pulp Fiction (1994), Star Wars: Episode VI - ...",(Raiders of the Lost Ark (Indiana Jones and th...,0.055142,0.265276,0.050671,0.918919,3.464015,0.036043,9.0616
2922,"(Sixth Sense, The (1999), Reservoir Dogs (1992...","(Silence of the Lambs, The (1991))",0.056632,0.356185,0.052161,0.921053,2.585884,0.03199,8.154993
2925,"(Pulp Fiction (1994), Star Wars: Episode V - T...",(Star Wars: Episode IV - A New Hope (1977)),0.070045,0.345753,0.064083,0.914894,2.646093,0.039865,7.687407
2926,"(Pulp Fiction (1994), Star Wars: Episode VI - ...",(Star Wars: Episode IV - A New Hope (1977)),0.055142,0.345753,0.053651,0.972973,2.814073,0.034586,24.207154
2929,"(Pulp Fiction (1994), Star Wars: Episode VI - ...",(Star Wars: Episode V - The Empire Strikes Bac...,0.055142,0.281669,0.052161,0.945946,3.358358,0.036629,13.289121
3236,"(Matrix, The (1999), Pulp Fiction (1994), Rese...","(Silence of the Lambs, The (1991))",0.053651,0.356185,0.050671,0.944444,2.651557,0.031561,11.588674
4361,"(Godfather: Part II, The (1974), Pulp Fiction ...","(Godfather, The (1972))",0.053651,0.265276,0.050671,0.944444,3.560237,0.036438,13.225037
4372,"(Silence of the Lambs, The (1991), Godfather: ...","(Godfather, The (1972))",0.055142,0.265276,0.053651,0.972973,3.66778,0.039024,27.184799


In [205]:
recommended_movies = good_movies_rules[positive_rating_idx]['consequents'].unique().tolist()
for i in recommended_movies:
  print(list(i))

['Godfather, The (1972)']
['Silence of the Lambs, The (1991)']
['Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981)']
['Star Wars: Episode IV - A New Hope (1977)']
['Star Wars: Episode V - The Empire Strikes Back (1980)']
['Star Wars: Episode V - The Empire Strikes Back (1980)', 'Star Wars: Episode IV - A New Hope (1977)']


In [206]:
negative_rating_idx = bad_movies_rules['antecedents'].apply(lambda x: x.issuperset({'Mask, The (1994)'}))
bad_movies_rules[negative_rating_idx]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
75,"(Mask, The (1994), Ace Ventura: When Nature Ca...",(Ace Ventura: Pet Detective (1994)),0.016722,0.090301,0.011706,0.7,7.751852,0.010196,3.03233
86,"(Mask, The (1994), Armageddon (1998))",(Ace Ventura: Pet Detective (1994)),0.011706,0.090301,0.010033,0.857143,9.492063,0.008976,6.367893
94,"(Mask, The (1994), There's Something About Mar...",(Ace Ventura: Pet Detective (1994)),0.011706,0.090301,0.010033,0.857143,9.492063,0.008976,6.367893
96,"(Mask, The (1994), Waterworld (1995))",(Ace Ventura: Pet Detective (1994)),0.013378,0.090301,0.010033,0.75,8.305556,0.008825,3.638796
97,"(Mask, The (1994), Armageddon (1998))",(Ace Ventura: When Nature Calls (1995)),0.011706,0.058528,0.010033,0.857143,14.644898,0.009348,6.590301
98,"(Mask, The (1994), Ace Ventura: When Nature Ca...",(Armageddon (1998)),0.016722,0.046823,0.010033,0.6,12.814286,0.00925,2.382943


In [207]:
not_recommended_movies = bad_movies_rules[negative_rating_idx]['consequents'].unique().tolist()
for i in not_recommended_movies:
  print(list(i))

['Ace Ventura: Pet Detective (1994)']
['Ace Ventura: When Nature Calls (1995)']
['Armageddon (1998)']


### 1. Obejrzałem już „Pulp Fiction” i “Reservoir Dogs”, oba filmy bardzo mi się podobały. Jaki film należy mi zarekomendować?

Według przygotowanego modelu rekomendacyjnego do generowania pozytywnych rekomendacji, jako następny film należałoby wybrać np. "Ojciec Chrzestny" lub "Milczenie owiec".

### 2. Bardzo nie podobał mi się film ”Maska”. Jakich filmów powinienem unikać?

Według przygotowanego modelu rekomendacyjnego do generowania negatywnych rekomendacji, filmy, których należy unikać to "Ace Ventura: Psi detektyw" oraz "Ace Ventura: Zew natury".