# Libraries

In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import plotly.express as px
from mlxtend.frequent_patterns import apriori, association_rules

In [2]:
pd.options.display.float_format = '{:.0f}'.format

# Preprocessing

In [3]:
df= pd.read_parquet('May.parquet')

In [None]:
df.head()

In [4]:
def categorization(data):
    data['merchant_category']= np.where(data['mcc'].isin(['5942', '5943']), 'Books and stationery',
                               np.where(data['mcc'].isin(['7832','7991','7922']), 'Entertainment',
                               np.where(data['mcc'].isin(['5137', '5139', '5611', '5621', '5631', '5641', '5651', '5655', '5661', '5681', '5691', '5697', '5698', '5699', '7296']), 'Clothing stores',
                               np.where(data['mcc'].isin(['7941', '7230', '7298']), 'Sport and Beauty',
                               np.where(data['mcc'].isin(['5812','5814', '5813']), 'Cafe/Restaurant',
                               np.where(data['mcc'].isin(['5411']), 'Grocery stores',
                               np.where(data['mcc'].isin(['5541', '5542', '5983']), 'Petrol',
                               np.where(data['mcc'].isin(['5193', '5992']), 'Flower shops',
                               np.where(data['mcc'].isin(['5977']), 'Perfumery',
                               np.where(data['mcc'].isin(['5021', '5712']), 'Furniture stores',
                               np.where(data['mcc'].isin(['4582', '7011', '4511', '3246', '3543', '3614', '3047', '3182', '3504', '3301', '3640', '3509', '3533', '3826', '3025', '3519', '3685', '3590', '3512', '3612', '3051', '3008', '3068', '3665', '3634', '3649', '3583', '3026', '3256', '3245', '3790']), 'Air ticket and hotel',
                               np.where(data['mcc'].isin(['4121']), 'Taxi',
                               np.where(data['mcc'].isin(['5732']), 'Electronics stores','Other')))))))))))))
    return data

In [5]:
df= categorization(df)

In [6]:
del df['trn_id']

In [7]:
df = df[~df.merchant_category.isin(['Other'])]

In [8]:
df=df[['customer_no','merchant_category']]

In [9]:
# Pivot the dataframe
pivot_df = pd.pivot_table(df, index='customer_no', columns='merchant_category', aggfunc=lambda x: True, fill_value=False)

# Rename columns
pivot_df.columns = ['category_' + str(col) for col in pivot_df.columns]

In [10]:
pivot_df.reset_index().iloc[0:,1:]

Unnamed: 0,category_Air ticket and hotel,category_Books and stationery,category_Cafe/Restaurant,category_Clothing stores,category_Electronics stores,category_Entertainment,category_Flower shops,category_Furniture stores,category_Grocery stores,category_Perfumery,category_Petrol,category_Sport and Beauty,category_Taxi
0,True,True,True,True,False,True,False,False,True,False,True,True,True
1,False,False,True,True,False,False,False,False,True,False,True,False,True
2,False,False,False,False,False,False,False,False,True,False,False,False,False
3,False,False,False,False,False,False,False,False,True,False,False,False,False
4,False,False,False,False,False,False,False,False,True,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
337798,False,False,False,False,True,False,False,False,True,False,False,False,False
337799,False,False,False,False,False,False,False,False,True,False,False,False,False
337800,False,False,False,False,False,False,False,False,True,False,False,False,False
337801,False,False,False,False,False,False,False,False,True,False,False,False,False


In [11]:
# Apply the Apriori algorithm to find frequent itemsets
frequent_itemsets = apriori(pivot_df.reset_index().iloc[0:,1:], min_support=0.01, use_colnames=True,max_len=5)
frequent_itemsets_size_4 = frequent_itemsets[frequent_itemsets['itemsets'].apply(lambda x: len(x) == 4)]

# Generate association rules
rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.7)

In [12]:
frequent_itemsets_size_4_df = pd.DataFrame(frequent_itemsets_size_4['itemsets'].tolist(),
                                           columns=['Item 1', 'Item 2', 'Item 3', 'Item 4'])

In [13]:
frequent_itemsets_size_4

Unnamed: 0,support,itemsets
83,0,"(category_Grocery stores, category_Clothing st..."
84,0,"(category_Petrol, category_Grocery stores, cat..."
85,0,"(category_Grocery stores, category_Clothing st..."
86,0,"(category_Grocery stores, category_Clothing st..."
87,0,"(category_Grocery stores, category_Clothing st..."
88,0,"(category_Grocery stores, category_Clothing st..."
89,0,"(category_Clothing stores, category_Grocery st..."
90,0,"(category_Taxi, category_Grocery stores, categ..."
91,0,"(category_Petrol, category_Grocery stores, cat..."
92,0,"(category_Taxi, category_Grocery stores, categ..."


In [14]:
frequent_itemsets_size_4_df

Unnamed: 0,Item 1,Item 2,Item 3,Item 4
0,category_Grocery stores,category_Clothing stores,category_Cafe/Restaurant,category_Air ticket and hotel
1,category_Petrol,category_Grocery stores,category_Cafe/Restaurant,category_Air ticket and hotel
2,category_Grocery stores,category_Clothing stores,category_Cafe/Restaurant,category_Books and stationery
3,category_Grocery stores,category_Clothing stores,category_Cafe/Restaurant,category_Entertainment
4,category_Grocery stores,category_Clothing stores,category_Cafe/Restaurant,category_Perfumery
5,category_Grocery stores,category_Clothing stores,category_Petrol,category_Cafe/Restaurant
6,category_Clothing stores,category_Grocery stores,category_Sport and Beauty,category_Cafe/Restaurant
7,category_Taxi,category_Grocery stores,category_Clothing stores,category_Cafe/Restaurant
8,category_Petrol,category_Grocery stores,category_Cafe/Restaurant,category_Perfumery
9,category_Taxi,category_Grocery stores,category_Cafe/Restaurant,category_Perfumery


In [15]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(category_Air ticket and hotel),(category_Cafe/Restaurant),0,0,0,1,3,0,3
1,(category_Air ticket and hotel),(category_Grocery stores),0,1,0,1,1,-0,1
2,(category_Books and stationery),(category_Grocery stores),0,1,0,1,1,0,2
3,(category_Entertainment),(category_Cafe/Restaurant),0,0,0,1,3,0,4
4,(category_Cafe/Restaurant),(category_Grocery stores),0,1,0,1,1,0,1
...,...,...,...,...,...,...,...,...,...
80,"(category_Taxi, category_Perfumery)","(category_Grocery stores, category_Cafe/Restau...",0,0,0,1,3,0,5
81,"(category_Taxi, category_Grocery stores, categ...",(category_Cafe/Restaurant),0,0,0,1,3,0,3
82,"(category_Taxi, category_Petrol, category_Cafe...",(category_Grocery stores),0,1,0,1,1,0,5
83,"(category_Taxi, category_Petrol)","(category_Grocery stores, category_Cafe/Restau...",0,0,0,1,3,0,3
