In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.impute import KNNImputer
from sklearn.cluster import KMeans, AgglomerativeClustering
from sklearn.model_selection import train_test_split
from sklearn.metrics import silhouette_score, confusion_matrix
from scipy.cluster.hierarchy import dendrogram
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from mlxtend.preprocessing import TransactionEncoder

In [11]:
def association_rules_pipeline(customers, basket, join_column='customer_id', list_column='list_of_goods',
                              min_support=0.2, metric='lift', min_threshold=1):
    """
    Perform the association rules pipeline on customer-basket data.

    Args:
        customers (pandas.DataFrame): The customer data.
        basket (pandas.DataFrame): The basket data.
        join_column (str, optional): The column name used for joining the customer and basket data. Defaults to 'customer_id'.
        list_column (str, optional): The column name containing the list of goods in the basket. Defaults to 'list_of_goods'.
        min_support (float, optional): The minimum support threshold for generating frequent itemsets. Defaults to 0.2.
        metric (str, optional): The metric used for evaluating association rules. Defaults to 'lift'.
        min_threshold (float, optional): The minimum threshold for the metric to consider a rule. Defaults to 1.

    Returns:
        pandas.DataFrame: The generated association rules.
    """
    # Merge customer and basket data on the specified join column
    data_merged = pd.merge(basket, customers, on=join_column, how='inner')

    # Extract transactions from the merged data
    transactions = data_merged[list_column].apply(lambda x: [item.strip() for item in x[1:-1].split(',')])

    # Convert transactions to transaction matrix using TransactionEncoder
    te = TransactionEncoder()
    te_fit = te.fit(transactions).transform(transactions)
    transactions_items = pd.DataFrame(te_fit, columns=te.columns_)

    # Generate frequent itemsets using Apriori algorithm
    frequent_itemsets = apriori(transactions_items, min_support=min_support, use_colnames=True)

    # Generate association rules from frequent itemsets
    rules = association_rules(frequent_itemsets, metric=metric, min_threshold=min_threshold)

    return rules

In [8]:
customer_basket = pd.read_csv('Datasets\customer_basket.csv')

Big_Families = pd.read_csv('Clusters\Big Families.csv', index_col=0)
Big_Spenders = pd.read_csv('Clusters\Big Spenders.csv', index_col=0)
Fishy_Pals = pd.read_csv('Clusters\Fishy Pals.csv', index_col=0)
Gamer_Community = pd.read_csv('Clusters\Gamer Community.csv', index_col=0)
Pet_Lovers = pd.read_csv('Clusters\Pet Lovers.csv', index_col=0)
Savings_Squad = pd.read_csv('Clusters\Savings Squad.csv', index_col=0)
Veggies_Society = pd.read_csv('Clusters\Veggies Society.csv', index_col=0)
Drunkards = pd.read_csv('Clusters\Drunkards.csv', index_col=0)

  customer_basket = pd.read_csv('Datasets\customer_basket.csv')
  Big_Families = pd.read_csv('Clusters\Big Families.csv', index_col=0)
  Big_Spenders = pd.read_csv('Clusters\Big Spenders.csv', index_col=0)
  Fishy_Pals = pd.read_csv('Clusters\Fishy Pals.csv', index_col=0)
  Gamer_Community = pd.read_csv('Clusters\Gamer Community.csv', index_col=0)
  Pet_Lovers = pd.read_csv('Clusters\Pet Lovers.csv', index_col=0)
  Savings_Squad = pd.read_csv('Clusters\Savings Squad.csv', index_col=0)
  Veggies_Society = pd.read_csv('Clusters\Veggies Society.csv', index_col=0)
  Drunkards = pd.read_csv('Clusters\Drunkards.csv', index_col=0)


In [22]:
Pet_Lovers

Unnamed: 0,customer_id,customer_name,customer_gender,kids_home,teens_home,number_complaints,distinct_stores_visited,spend_groceries,spend_electronics,typical_hour,...,percentage_spend_electronics,percentage_spend_vegetables,percentage_spend_nonalcohol_drinks,percentage_spend_alcohol_drinks,percentage_spend_meat,percentage_spend_fish,percentage_spend_hygiene,percentage_spend_videogames,percentage_spend_petfood,cluster
7,8,Arthur Dematteo,male,0,0,1,1,7902.0,201.0,13.00000,...,0.008473,0.007503,0.018421,0.000422,0.013784,0.000042,0.001981,0.0,0.616280,Pet Lovers
27,33,Stan Vian,male,0,0,1,1,9889.0,178.0,7.00000,...,0.006730,0.001361,0.019775,0.000832,0.004991,0.007600,0.002836,0.0,0.581956,Pet Lovers
43,50,Doloris Atwell,female,0,0,0,1,10575.0,136.0,14.00000,...,0.006903,0.003857,0.020150,0.002792,0.020962,0.013450,0.005380,0.0,0.389757,Pet Lovers
46,53,Salvatore Blackmon,male,0,0,0,1,11573.0,214.0,17.00000,...,0.008424,0.006928,0.016848,0.001535,0.001496,0.010786,0.002716,0.0,0.495689,Pet Lovers
51,58,Ann Nall,female,0,0,0,1,12934.0,56.0,11.00000,...,0.002080,0.008470,0.017721,0.000557,0.002192,0.010031,0.001746,0.0,0.476688,Pet Lovers
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43208,49961,Dorothy Laughman,female,0,0,0,1,9408.0,214.0,13.52058,...,0.010191,0.005476,0.029335,0.000190,0.014096,0.010477,0.002714,0.0,0.479499,Pet Lovers
43224,49980,Joesph Elizalde,male,0,0,1,1,1144.0,420.0,15.00000,...,0.041745,0.008250,0.049399,0.008647,0.020674,0.030116,0.008548,0.0,0.718915,Pet Lovers
43228,49985,Cesar Harmon,male,0,0,0,1,18463.0,93.0,12.00000,...,0.002670,0.002469,0.013524,0.000029,0.002986,0.004594,0.002584,0.0,0.441024,Pet Lovers
43229,49987,Anna Wojcik,female,0,0,1,1,10105.0,49.0,13.52058,...,0.002231,0.009242,0.023173,0.002777,0.009561,0.009242,0.003187,0.0,0.480537,Pet Lovers


In [17]:
clusters = {'Big_Families': Big_Families, 'Big_Spenders': Big_Spenders, 'Drunkards': Drunkards, 'Fishy_Pals': Fishy_Pals, 'Gamer_Community': Gamer_Community, 'Savings_Squad': Savings_Squad, 'Veggies_Society': Veggies_Society}
for cluster_name, cluster in clusters.items():
    print(cluster_name.upper())
    display(association_rules_pipeline(cluster, customer_basket).sort_values('lift', ascending = False)[:50])

BIG_FAMILIES


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
603,"('cooking oil', 'candy bars')","('napkins', 'babies food')",0.22009,0.191176,0.053091,0.241223,1.261782,0.011015,1.065957,0.266018
598,"('napkins', 'babies food')","('cooking oil', 'candy bars')",0.191176,0.22009,0.053091,0.277705,1.261782,0.011015,1.079767,0.256509
589,"('cooking oil', 'candy bars')","('muffins', 'babies food')",0.22009,0.220214,0.060631,0.275481,1.250969,0.012164,1.076281,0.257234
584,"('muffins', 'babies food')","('cooking oil', 'candy bars')",0.220214,0.22009,0.060631,0.275325,1.250969,0.012164,1.076221,0.257275
604,('napkins'),"('candy bars', 'cooking oil', 'babies food')",0.228938,0.188871,0.053091,0.2319,1.227822,0.009851,1.05602,0.240642
597,"('candy bars', 'cooking oil', 'babies food')",('napkins'),0.188871,0.228938,0.053091,0.281095,1.227822,0.009851,1.072551,0.228755
599,"('napkins', 'cooking oil')","('candy bars', 'babies food')",0.144068,0.301408,0.053091,0.368512,1.222634,0.009668,1.106263,0.212743
602,"('candy bars', 'babies food')","('napkins', 'cooking oil')",0.301408,0.144068,0.053091,0.176142,1.222634,0.009668,1.038932,0.260659
612,"('muffins', 'gums')","('cooking oil', 'babies food')",0.08755,0.475324,0.050785,0.580071,1.22037,0.009171,1.24944,0.197903
617,"('cooking oil', 'babies food')","('muffins', 'gums')",0.475324,0.08755,0.050785,0.106843,1.22037,0.009171,1.021601,0.344167


BIG_SPENDERS


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
521,"('cake', 'cooking oil')","('napkins', 'oil')",0.267542,0.19302,0.063453,0.237169,1.228732,0.011812,1.057876,0.254148
516,"('napkins', 'oil')","('cake', 'cooking oil')",0.19302,0.267542,0.063453,0.328737,1.228732,0.011812,1.091164,0.230678
530,"('soup', 'oil')","('cake', 'cooking oil')",0.170276,0.267542,0.055952,0.328597,1.228208,0.010396,1.090937,0.223936
535,"('cake', 'cooking oil')","('soup', 'oil')",0.267542,0.170276,0.055952,0.209134,1.228208,0.010396,1.049134,0.253674
577,"('cooking oil', 'candy bars')","('napkins', 'oil')",0.211227,0.19302,0.050024,0.236827,1.226958,0.009253,1.057402,0.234512
572,"('napkins', 'oil')","('cooking oil', 'candy bars')",0.19302,0.211227,0.050024,0.259166,1.226958,0.009253,1.06471,0.229221
532,"('soup', 'cooking oil')","('cake', 'oil')",0.123579,0.369526,0.055952,0.452766,1.225261,0.010287,1.15211,0.209771
533,"('cake', 'oil')","('soup', 'cooking oil')",0.369526,0.123579,0.055952,0.151416,1.225261,0.010287,1.032805,0.291602
559,"('muffins', 'cooking oil')","('candy bars', 'oil')",0.154488,0.290769,0.054924,0.355521,1.22269,0.010003,1.100471,0.215409
562,"('candy bars', 'oil')","('muffins', 'cooking oil')",0.290769,0.154488,0.054924,0.188891,1.22269,0.010003,1.042415,0.256801


DRUNKARDS


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
213,"('white wine', 'cider')","('beer', 'dessert wine')",0.371758,0.113545,0.051873,0.139535,1.228899,0.009662,1.030205,0.296483
212,"('beer', 'dessert wine')","('white wine', 'cider')",0.113545,0.371758,0.051873,0.456853,1.228899,0.009662,1.15667,0.210121
130,"('white wine', 'cider')",('beer'),0.371758,0.29683,0.135447,0.364341,1.22744,0.025098,1.106207,0.294944
131,('beer'),"('white wine', 'cider')",0.29683,0.371758,0.135447,0.456311,1.22744,0.025098,1.155517,0.263516
59,('yogurt cake'),('cider'),0.094524,0.482997,0.055908,0.591463,1.224569,0.010253,1.2655,0.20253
58,('cider'),('yogurt cake'),0.482997,0.094524,0.055908,0.115752,1.224569,0.010253,1.024006,0.35471
158,"('cider', 'champagne')",('dessert wine'),0.125072,0.342939,0.051873,0.414747,1.209387,0.008981,1.122694,0.197885
163,('dessert wine'),"('cider', 'champagne')",0.342939,0.125072,0.051873,0.151261,1.209387,0.008981,1.030856,0.263499
40,('champagne'),('french wine'),0.243804,0.17464,0.051297,0.210402,1.204777,0.008719,1.045292,0.22477
41,('french wine'),('champagne'),0.17464,0.243804,0.051297,0.293729,1.204777,0.008719,1.070689,0.205935


FISHY_PALS


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
476,"('shrimp', 'salmon')","('fresh tuna', 'oil')",0.342134,0.114411,0.057206,0.167203,1.461415,0.018062,1.06339,0.479933
477,"('fresh tuna', 'oil')","('shrimp', 'salmon')",0.114411,0.342134,0.057206,0.5,1.461415,0.018062,1.315732,0.356522
482,('oil'),"('shrimp', 'fresh tuna', 'salmon')",0.193619,0.206821,0.057206,0.295455,1.428554,0.017161,1.125803,0.372022
471,"('shrimp', 'fresh tuna', 'salmon')",('oil'),0.206821,0.193619,0.057206,0.276596,1.428554,0.017161,1.114703,0.378214
475,"('shrimp', 'oil')","('fresh tuna', 'salmon')",0.157316,0.255226,0.057206,0.363636,1.424765,0.017055,1.17036,0.353786
478,"('fresh tuna', 'salmon')","('shrimp', 'oil')",0.255226,0.157316,0.057206,0.224138,1.424765,0.017055,1.086126,0.400295
483,('salmon'),"('shrimp', 'fresh tuna', 'oil')",0.426843,0.09571,0.057206,0.134021,1.400284,0.016353,1.04424,0.498745
470,"('shrimp', 'fresh tuna', 'oil')",('salmon'),0.09571,0.426843,0.057206,0.597701,1.400284,0.016353,1.424705,0.316115
223,"('shrimp', 'oil')",('canned_tuna'),0.157316,0.320132,0.070407,0.447552,1.398025,0.020045,1.230647,0.337855
226,('canned_tuna'),"('shrimp', 'oil')",0.320132,0.157316,0.070407,0.219931,1.398025,0.020045,1.080269,0.418765


GAMER_COMMUNITY


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
565,"('champagne', 'airpods')","('samsung galaxy 10', 'bluetooth headphones')",0.271345,0.210128,0.070326,0.259175,1.233414,0.013309,1.066206,0.259714
560,"('samsung galaxy 10', 'bluetooth headphones')","('champagne', 'airpods')",0.210128,0.271345,0.070326,0.33468,1.233414,0.013309,1.095196,0.239586
588,"('samsung galaxy 10', 'bluetooth headphones')","('champagne', 'laptop')",0.210128,0.203721,0.052339,0.249082,1.222661,0.009532,1.060407,0.230559
593,"('champagne', 'laptop')","('samsung galaxy 10', 'bluetooth headphones')",0.203721,0.210128,0.052339,0.256915,1.222661,0.009532,1.062964,0.228704
589,"('samsung galaxy 10', 'champagne')","('bluetooth headphones', 'laptop')",0.395553,0.108384,0.052339,0.132319,1.220836,0.009468,1.027585,0.299264
592,"('bluetooth headphones', 'laptop')","('samsung galaxy 10', 'champagne')",0.108384,0.395553,0.052339,0.482906,1.220836,0.009468,1.16893,0.202878
607,"('champagne', 'spaghetti')","('samsung galaxy 10', 'bluetooth headphones')",0.236992,0.210128,0.060522,0.255375,1.215328,0.010723,1.060764,0.232208
602,"('samsung galaxy 10', 'bluetooth headphones')","('champagne', 'spaghetti')",0.210128,0.236992,0.060522,0.288024,1.215328,0.010723,1.071675,0.224311
563,"('bluetooth headphones', 'champagne')","('samsung galaxy 10', 'airpods')",0.304925,0.190906,0.070326,0.230633,1.208095,0.012114,1.051635,0.247816
562,"('samsung galaxy 10', 'airpods')","('bluetooth headphones', 'champagne')",0.190906,0.304925,0.070326,0.368378,1.208095,0.012114,1.100461,0.212893


SAVINGS_SQUAD


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
212,"('cider', 'champagne')",('white wine'),0.05917,0.276375,0.050493,0.853351,3.087654,0.03414,4.934401,0.718652
213,('white wine'),"('cider', 'champagne')",0.276375,0.05917,0.050493,0.182696,3.087654,0.03414,1.151139,0.934365
121,"('beer', 'white wine')",('cider'),0.098355,0.203738,0.06184,0.628743,3.086036,0.041801,2.144771,0.749696
124,('cider'),"('beer', 'white wine')",0.203738,0.098355,0.06184,0.303527,3.086036,0.041801,1.294587,0.848916
125,('white wine'),"('beer', 'cider')",0.276375,0.072677,0.06184,0.223753,3.078755,0.041754,1.194625,0.933071
120,"('beer', 'cider')",('white wine'),0.072677,0.276375,0.06184,0.850891,3.078755,0.041754,4.853005,0.72811
219,('white wine'),"('cider', 'dessert wine')",0.276375,0.087479,0.07409,0.268078,3.064486,0.049913,1.246746,0.930981
218,"('cider', 'dessert wine')",('white wine'),0.087479,0.276375,0.07409,0.846948,3.064486,0.049913,4.727965,0.738263
220,('cider'),"('white wine', 'dessert wine')",0.203738,0.119007,0.07409,0.363654,3.055724,0.049844,1.384455,0.844879
217,"('white wine', 'dessert wine')",('cider'),0.119007,0.203738,0.07409,0.622567,3.055724,0.049844,2.109677,0.763622


VEGGIES_SOCIETY


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
595,"('tomatoes', 'frozen vegetables')","('mashed potato', 'asparagus')",0.183563,0.247347,0.055872,0.304377,1.230564,0.010469,1.081983,0.229491
594,"('mashed potato', 'asparagus')","('tomatoes', 'frozen vegetables')",0.247347,0.183563,0.055872,0.225886,1.230564,0.010469,1.054673,0.248939
592,"('mashed potato', 'tomatoes')","('frozen vegetables', 'asparagus')",0.286906,0.160994,0.055872,0.194741,1.209622,0.009682,1.041909,0.243019
597,"('frozen vegetables', 'asparagus')","('mashed potato', 'tomatoes')",0.160994,0.286906,0.055872,0.347048,1.209622,0.009682,1.092107,0.206548
527,"('carrots', 'asparagus')","('tomatoes', 'green beans')",0.393254,0.152714,0.07237,0.18403,1.205064,0.012315,1.038379,0.28046
522,"('tomatoes', 'green beans')","('carrots', 'asparagus')",0.152714,0.393254,0.07237,0.473896,1.205064,0.012315,1.153281,0.200839
608,"('mashed potato', 'asparagus')","('tomatoes', 'melons')",0.247347,0.203987,0.060595,0.244979,1.200957,0.010139,1.054293,0.222321
609,"('tomatoes', 'melons')","('mashed potato', 'asparagus')",0.203987,0.247347,0.060595,0.297054,1.200957,0.010139,1.070711,0.21021
569,"('carrots', 'asparagus')","('tomatoes', 'shallot')",0.393254,0.113462,0.053235,0.135371,1.193096,0.008616,1.025339,0.266741
564,"('tomatoes', 'shallot')","('carrots', 'asparagus')",0.113462,0.393254,0.053235,0.469189,1.193096,0.008616,1.143056,0.182558
