## Imports

In [205]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

## Load dataframe

In [206]:
df = pd.read_csv('combined_df.csv')

## binning numeric columns

In [207]:
count_quantiles = 3
# binning well distributed columns
for column in ['Normal_Rating_Count', 'Normal_Rating', 'Normal_Reviews', 'Normal_Size', 'Normal_Installs']:
    df[column + '_binned'] = pd.qcut(df[column], q=count_quantiles, labels=['Low', 'Medium', 'High'])
# binning skew distributed column
df[['Normal_Revenue_binned']] = "Zero"
non_zero_mask = df['Normal_Revenue'] > 0
df.loc[non_zero_mask, 'Normal_Revenue_binned'] = pd.qcut(df.loc[non_zero_mask, 'Normal_Revenue'], q=count_quantiles, labels=['Low', 'Medium', 'High'])
# binning skew distributed column
df[['Normal_Price_binned']] = "Zero"
non_zero_mask = df['Normal_Price'] > 0
df.loc[non_zero_mask, 'Normal_Price_binned'] = pd.qcut(df.loc[non_zero_mask, 'Normal_Price'], q=count_quantiles, labels=['Low', 'Medium', 'High'])

In [208]:
df_relevant = df[
    ['Type', 'Genres', 'Normal_Rating_Count_binned', 'Released', 'Category', 'Normal_Rating_binned', 'Normal_Reviews_binned', 'Normal_Size_binned',
     'Normal_Installs_binned', 'Normal_Price_binned', 'Content Rating', 'Normal_Revenue_binned']]

## Extract Frequent Pattern

In [210]:
def apply_apriori(transactions, min_support=0.5, min_confidence=0.5):
    te = TransactionEncoder()
    te_ary = te.fit(transactions).transform(transactions)
    df = pd.DataFrame(te_ary, columns=te.columns_)

    frequent_itemsets = apriori(df, min_support=min_support, use_colnames=True)
    return frequent_itemsets

In [213]:
transactions = df_relevant.apply(lambda row: [f"{col}: {val}" for col, val in row.items() if pd.notna(val)], axis=1).tolist()
frequent_itemsets = apply_apriori(transactions)
frequent_itemsets.sort_values(by='support', ascending=False)

Unnamed: 0,support,itemsets
2,0.94256,(Normal_Revenue_binned: Zero)
1,0.9424,(Normal_Price_binned: Zero)
7,0.9424,"(Normal_Revenue_binned: Zero, Normal_Price_bin..."
3,0.93056,(Type: 0)
8,0.93056,"(Type: 0, Normal_Price_binned: Zero)"
9,0.93056,"(Normal_Revenue_binned: Zero, Type: 0)"
13,0.93056,"(Normal_Revenue_binned: Zero, Type: 0, Normal_..."
0,0.8424,(Content Rating: Everyone)
5,0.7952,"(Normal_Revenue_binned: Zero, Content Rating: ..."
4,0.79504,"(Content Rating: Everyone, Normal_Price_binned..."


## Find maximal itemsets

In [217]:
def is_maximal(frequent_itemsets):
    maximal_itemsets = []
    for i, itemset in frequent_itemsets.iterrows():
        is_subset = False
        for j, other_itemset in frequent_itemsets.iterrows():
            if itemset['itemsets'] != other_itemset['itemsets'] and itemset['itemsets'].issubset(other_itemset['itemsets']):
                is_subset = True
                break
        if not is_subset:
            maximal_itemsets.append(itemset)
    return pd.DataFrame(maximal_itemsets)

maximal_itemsets = is_maximal(frequent_itemsets)
maximal_itemsets

Unnamed: 0,support,itemsets
14,0.78352,"(Normal_Revenue_binned: Zero, Content Rating: ..."


## extract association rules based on maximal itemsets

In [219]:
rules = association_rules(maximal_itemsets, metric="confidence", min_threshold=0.7, support_only=True)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,"(Normal_Revenue_binned: Zero, Content Rating: ...",(Normal_Price_binned: Zero),,,0.78352,,,,,
1,"(Normal_Revenue_binned: Zero, Content Rating: ...",(Type: 0),,,0.78352,,,,,
2,"(Normal_Revenue_binned: Zero, Type: 0, Normal_...",(Content Rating: Everyone),,,0.78352,,,,,
3,"(Content Rating: Everyone, Type: 0, Normal_Pri...",(Normal_Revenue_binned: Zero),,,0.78352,,,,,
4,"(Normal_Revenue_binned: Zero, Content Rating: ...","(Type: 0, Normal_Price_binned: Zero)",,,0.78352,,,,,
5,"(Normal_Revenue_binned: Zero, Type: 0)","(Content Rating: Everyone, Normal_Price_binned...",,,0.78352,,,,,
6,"(Normal_Revenue_binned: Zero, Normal_Price_bin...","(Content Rating: Everyone, Type: 0)",,,0.78352,,,,,
7,"(Content Rating: Everyone, Type: 0)","(Normal_Revenue_binned: Zero, Normal_Price_bin...",,,0.78352,,,,,
8,"(Content Rating: Everyone, Normal_Price_binned...","(Normal_Revenue_binned: Zero, Type: 0)",,,0.78352,,,,,
9,"(Type: 0, Normal_Price_binned: Zero)","(Normal_Revenue_binned: Zero, Content Rating: ...",,,0.78352,,,,,
