# Rule Mining for Genres using Apriori

## Loading functions and modules

In [18]:
import pandas as pd
import numpy as np
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

## Loading Data

In [19]:
df = pd.read_csv("../data/final_0_80509.csv")
df.head()

  df = pd.read_csv("../data/final_0_80509.csv")


Unnamed: 0,Index,AppID,Title,Initial_Price,Final_Price,Discount_Percent,Developers,Publishers,Genres,Categories,...,Subtitle_Languages,Positive_Reviews,Negative_Reviews,Total_Reviews,Overall_Review_Summary,Recent_Reviews,Recent_Review_Summary,Mature_Content_Desc,Awards,Curators
0,0,20200,Galactic Bowling,,,,['Perpetual FX Creative'],['Perpetual FX Creative'],"['Casual', 'Indie', 'Sports']","['Single-player', 'Multi-player', 'Steam Achie...",...,['English'],6.0,11.0,12.0,Mostly Negative,,,,,6.0
1,1,655370,Train Bandit,52.0,52.0,0.0,['Rusty Moyher'],['Wild Rooster'],"['Action', 'Indie']","['Single-player', 'Steam Achievements', 'Full ...",...,"['English', 'French', 'Italian', 'German', 'Sp...",57.0,7.0,53.0,Very Positive,,,,,8.0
2,2,1732930,Jolt Project,199.0,199.0,0.0,['Campião Games'],['Campião Games'],"['Action', 'Adventure', 'Indie', 'Strategy']",['Single-player'],...,['English'],,,,,,,,,1.0
3,3,1355720,Henosis™,,,,['Odd Critter Games'],['Odd Critter Games'],"['Adventure', 'Casual', 'Indie']","['Single-player', 'Full controller support']",...,['English'],5.0,0.0,5.0,5 user reviews,,,,,5.0
4,4,1139950,Two Weeks in Painland,0.0,0.0,0.0,['Unusual Games'],['Unusual Games'],"['Adventure', 'Indie']","['Single-player', 'Steam Achievements']",...,"['English', 'Spanish - Spain']",53.0,6.0,59.0,Very Positive,,,This Game may contain content not appropriate ...,,2.0


## Looking at all genres

In [36]:
n = 80509
dataset = []
for genres in df["Genres"][:n]:
    if genres is not np.nan:
        dataset.append(eval(genres))

dataset[:10]

[['Casual', 'Indie', 'Sports'],
 ['Action', 'Indie'],
 ['Action', 'Adventure', 'Indie', 'Strategy'],
 ['Adventure', 'Casual', 'Indie'],
 ['Adventure', 'Indie'],
 ['Adventure',
  'Casual',
  'Free to Play',
  'Massively Multiplayer',
  'RPG',
  'Strategy'],
 ['Indie', 'Strategy'],
 ['Casual'],
 ['Adventure', 'RPG', 'Simulation', 'Strategy'],
 ['Action', 'Adventure', 'Indie']]

## Converting genres to asymmetric binary attributes

In [37]:
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df2 = pd.DataFrame(te_ary, columns=te.columns_)
df2

Unnamed: 0,360 Video,Accounting,Action,Adventure,Animation & Modeling,Audio Production,Casual,Design & Illustration,Documentary,Early Access,...,Short,Simulation,Software Training,Sports,Strategy,Tutorial,Utilities,Video Production,Violent,Web Publishing
0,False,False,False,False,False,False,True,False,False,False,...,False,False,False,True,False,False,False,False,False,False
1,False,False,True,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,True,True,False,False,False,False,False,False,...,False,False,False,False,True,False,False,False,False,False
3,False,False,False,True,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,True,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75616,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
75617,False,False,True,False,False,False,True,False,False,False,...,False,True,False,True,False,False,False,False,False,False
75618,False,False,False,False,False,False,False,False,False,False,...,False,True,False,False,False,False,False,False,False,False
75619,False,False,False,True,False,False,False,False,False,False,...,False,False,False,False,True,False,False,False,False,False


## Rule minning
### min support 50%, confidence 75%

In [38]:
frq_items = apriori(df2, min_support=0.2, use_colnames=True)
rules = association_rules(frq_items, min_threshold = .4, metric="confidence") 
rules.sort_values(["support"],ascending=[False])

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
4,(Casual),(Indie),0.407109,0.688407,0.310165,0.761872,1.106718,0.029909,1.308514,0.16264
5,(Indie),(Casual),0.688407,0.407109,0.310165,0.450555,1.106718,0.029909,1.079073,0.309467
0,(Indie),(Action),0.688407,0.41954,0.31002,0.450344,1.073424,0.021206,1.056043,0.219522
1,(Action),(Indie),0.41954,0.688407,0.31002,0.738952,1.073424,0.021206,1.193626,0.11784
2,(Adventure),(Indie),0.383941,0.688407,0.290316,0.756148,1.098403,0.026009,1.277797,0.14542
3,(Indie),(Adventure),0.688407,0.383941,0.290316,0.421722,1.098403,0.026009,1.065334,0.287514


## Conclusion of good rules
1. Casual <-> Indie . Most Casual games are made by an individual and vice versa
2. Even on basis of Action <-> Indie . Most Actions games are made by an individual and vice versa
2. And on basis of Adventure <-> Indie . Most Adventure games are made by an individual and vice versa