In [98]:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori, association_rules

In [99]:
# Drop unneeded columns
raw_df = pd.read_csv("../res/responses.csv")
raw_df.drop(["Timestamp", "Name (Optional)"], axis=1, inplace=True);

In [100]:
# Change column names
cols = raw_df.columns
new_cols = []
for col in cols:
    try:
        new_cols.append(col[col.index("[")+1:-1])
    except ValueError:
        new_cols.append(col)
raw_df.columns = new_cols

to_replace = {
    "Would you agree that you frequently shop online?":"Frequency",
    "Where do you shop more often?":"Where",
    "BACK TO SCHOOL - Please select all items that you would typically buy during this season.":"Back To School",
    "SUMMER - Please select all items that you would typically buy during this season.":"Summer",
    "Home Entertainment (TV, Speakers, Online Streaming Subscription, Etc.)":"Home Entertainment"
}
raw_df.rename(columns = to_replace, inplace = True)

raw_df.columns

Index(['Age', 'Gender', 'Marital Status', 'Employment Status',
       'Current Region ', 'Frequency', 'Where', 'Apparel/Clothing',
       'Health & Personal Care', 'Home Entertainment', 'Babies & Kids',
       'Food/Groceries', 'Toys, Games, Collectibles', 'Bags', 'Stationery',
       'Books', 'Sports & Travel', 'Wearable Accessories',
       'Automobile Accessories', 'Cameras/Camera Accessories',
       'Laptops/Computers', 'Tech Accessories', 'Pet Care/Accessories',
       'Power Tools', 'Back To School', 'Summer'],
      dtype='object')

In [101]:
# Create baskets according to season

user_data_cols = ['Age', 'Gender', 'Marital Status', 'Employment Status', 'Current Region ', 'Frequency', 'Where']

# Create baskets for xmas, new year, when needed
holidays_basket = raw_df.drop(["Summer", "Back To School"], axis=1);
bdays_basket = raw_df.drop(["Summer", "Back To School"], axis=1);
periods_basket = raw_df.drop(["Summer", "Back To School"], axis=1);
wneeded_basket = raw_df.drop(["Summer", "Back To School"], axis=1);
seasonal_baskets = [holidays_basket, bdays_basket, periods_basket, wneeded_basket]

# Change values accordingly
keywords = ["Christmas", "Birthdays", "Monthly", "Needed"]
i = 0
for basket in seasonal_baskets:
    cols = basket.columns
    keyword = keywords[i]
    for col in cols:
        if col not in user_data_cols:
            basket[col] = np.where(~(basket[col].str.find(keyword) > -1), 0, 1)
    i += 1 

wneeded_basket.head()

Unnamed: 0,Age,Gender,Marital Status,Employment Status,Current Region,Frequency,Where,Apparel/Clothing,Health & Personal Care,Home Entertainment,...,Stationery,Books,Sports & Travel,Wearable Accessories,Automobile Accessories,Cameras/Camera Accessories,Laptops/Computers,Tech Accessories,Pet Care/Accessories,Power Tools
0,21,Male,Single,Employed,Region III - Central Luzon,2,Online Stores,1,1,1,...,0,0,0,1,0,0,0,0,0,0
1,21,Male,Single,Student,CAR - Cordillera Administrative Region,1,Online Stores,0,1,1,...,1,1,1,1,1,1,1,1,1,1
2,20,Male,Single,Student,Region III - Central Luzon,2,Online Stores,1,0,0,...,1,1,1,1,1,1,1,0,1,1
3,21,Female,Single,Student,Region IV-A - CALABARZON,4,Online Stores,0,0,1,...,1,1,1,0,0,0,1,1,1,0
4,22,Female,Single,Student,Region IV-A - CALABARZON,5,Online Stores,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [102]:
# Possible answers of respondents in survey (Also the items available for each season)
summer_items = ["Clothing", "Umbrellas", "Sunscreen/Heat-Protectant Skin Products", 
"Swimsuit", "Swim Cap", "Swim Goggles", "Swim Snorkles", "Towels", "Bags", "Water Bottles", 
"Ice Cooler", "Inflatable Pools", "Pool Toys", "Swimming Safety Equipment (Swimming Rings/Life Jacket/Floaties/Etc.)", "Electric Fan"]

bts_items = ["Pen/Marker/Pencil/Scriber", "Paper/Envelopes/Post-it Notes/", "Notebooks", 
"Academic Books", "Art Materials", "Laptop", "Phone/Tablet", "Tech Products/Tech Accessories", 
"School Uniforms/Clothing", "Umbrella", "Raincoat", "Bags/School Bags", "Make up/Beauty Products"]

In [103]:
# Create baskets according to season part 2
summer_basket = raw_df[user_data_cols]
bts_basket = raw_df[user_data_cols]

# Set appropriate columns
summer_basket = summer_basket.reindex(columns = summer_basket.columns.tolist() + summer_items)
bts_basket = bts_basket.reindex(columns = bts_basket.columns.tolist() + bts_items)

# Assign appropriate values in summer basket
for col in summer_basket.columns:
    if col not in user_data_cols:
        summer_basket[col] = np.where(~(raw_df["Summer"].str.find(col) > -1), 0, 1)
# Assign Appropriate Values in Back To School basket
for col in bts_basket.columns:
    if col not in user_data_cols:
        bts_basket[col] = np.where(~(raw_df["Back To School"].str.find(col) > -1), 0, 1)

# Clean column name
to_replace_summer = {
    "Swimming Safety Equipment (Swimming Rings/Life Jacket/Floaties/Etc.)":"Swimming Safety Equipment",
}
summer_basket.rename(columns = to_replace_summer, inplace = True)

summer_basket.head()

Unnamed: 0,Age,Gender,Marital Status,Employment Status,Current Region,Frequency,Where,Clothing,Umbrellas,Sunscreen/Heat-Protectant Skin Products,...,Swim Goggles,Swim Snorkles,Towels,Bags,Water Bottles,Ice Cooler,Inflatable Pools,Pool Toys,Swimming Safety Equipment,Electric Fan
0,21,Male,Single,Employed,Region III - Central Luzon,2,Online Stores,1,1,0,...,0,0,1,0,1,0,0,0,0,0
1,21,Male,Single,Student,CAR - Cordillera Administrative Region,1,Online Stores,1,0,0,...,0,0,0,0,1,0,0,0,0,0
2,20,Male,Single,Student,Region III - Central Luzon,2,Online Stores,1,1,1,...,1,0,0,0,1,0,0,0,0,0
3,21,Female,Single,Student,Region IV-A - CALABARZON,4,Online Stores,1,0,1,...,0,0,0,0,0,0,0,0,0,0
4,22,Female,Single,Student,Region IV-A - CALABARZON,5,Online Stores,1,0,1,...,0,0,0,0,0,0,0,0,0,0


List of baskets created:

- holidays_basket (xmas, new year)

- bdays_basket (self/others)

- periods_basket (monthly/weekly/shopping sales)

- wneeded_basket (when needed)

- summer_basket (summer)

- bts_basket (back to school)

In [175]:
# Create a dataframe of all baskets
basket_names = ["holidays_basket", "bdays_basket", "periods_basket", "wneeded_basket", "summer_basket", "bts_basket"]
all_baskets = [holidays_basket, bdays_basket, periods_basket, wneeded_basket, summer_basket, bts_basket]
# initialize minimum supports for each basket here
min_supports = [0.05, 0.1, 0.05, 0.5, 0.1, 0.2]
baskets = pd.DataFrame({"name":basket_names,"basket":all_baskets, "min_support":min_supports})
baskets

Unnamed: 0,name,basket,min_support
0,holidays_basket,Age Gender Marital Status Employment Stat...,0.05
1,bdays_basket,Age Gender Marital Status Employment Stat...,0.1
2,periods_basket,Age Gender Marital Status Employment Stat...,0.05
3,wneeded_basket,Age Gender Marital Status Employment Stat...,0.5
4,summer_basket,Age Gender Marital Status Employment Stat...,0.1
5,bts_basket,Age Gender Marital Status Employment Stat...,0.2


In [177]:
# Get rules from all generalized baskets:

for i in baskets.index:
    # Indicate which basket to show rules:
    row = baskets.iloc[i]
    basket_name = row["name"]
    basket = row["basket"]
    min_support = row["min_support"]
    print(f'basket name: {basket_name}')

    # Sort the holiday basket
    # Sort the basket before dropping user_data_cols
    # Example: sort it such that only female respondents are left
    basket_sorted = basket.drop(user_data_cols, axis=1)

    # Get frequent item set
    freq_items = apriori(basket_sorted, min_support = min_support, use_colnames=True)
    freq_items.sort_values("support", ascending=False)

    print(f'length of frequent item sets: {len(freq_items)}')
    print(f'minimum support: {min_support}')
    # Get rules 
    rules = association_rules(freq_items, metric="lift", min_threshold=1)
    display(rules.sort_values("support", ascending=False).head(10))

basket name: holidays_basket
length of frequent item sets: 35
minimum support: 0.05


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
15,(Apparel/Clothing),(Wearable Accessories),0.32967,0.208791,0.120879,0.366667,1.75614,0.052047,1.249277
14,(Wearable Accessories),(Apparel/Clothing),0.208791,0.32967,0.120879,0.578947,1.75614,0.052047,1.592033
12,(Apparel/Clothing),(Sports & Travel),0.32967,0.120879,0.087912,0.266667,2.206061,0.048062,1.198801
36,(Wearable Accessories),(Bags),0.208791,0.131868,0.087912,0.421053,3.192982,0.060379,1.4995
37,(Bags),(Wearable Accessories),0.131868,0.208791,0.087912,0.666667,3.192982,0.060379,2.373626
13,(Sports & Travel),(Apparel/Clothing),0.120879,0.32967,0.087912,0.727273,2.206061,0.048062,2.457875
8,(Apparel/Clothing),(Bags),0.32967,0.131868,0.087912,0.266667,2.022222,0.044439,1.183816
9,(Bags),(Apparel/Clothing),0.131868,0.32967,0.087912,0.666667,2.022222,0.044439,2.010989
11,(Apparel/Clothing),(Books),0.32967,0.087912,0.076923,0.233333,2.654167,0.047941,1.18968
42,"(Wearable Accessories, Apparel/Clothing)",(Bags),0.120879,0.131868,0.076923,0.636364,4.825758,0.060983,2.387363


basket name: bdays_basket
length of frequent item sets: 32
minimum support: 0.1


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
6,(Wearable Accessories),(Apparel/Clothing),0.340659,0.318681,0.21978,0.645161,2.024472,0.111218,1.92008
7,(Apparel/Clothing),(Wearable Accessories),0.318681,0.340659,0.21978,0.689655,2.024472,0.111218,2.124542
15,"(Toys, Games, Collectibles)",(Wearable Accessories),0.373626,0.340659,0.186813,0.5,1.467742,0.059534,1.318681
14,(Wearable Accessories),"(Toys, Games, Collectibles)",0.340659,0.373626,0.186813,0.548387,1.467742,0.059534,1.38697
19,(Tech Accessories),"(Toys, Games, Collectibles)",0.274725,0.373626,0.175824,0.64,1.712941,0.07318,1.739927
18,"(Toys, Games, Collectibles)",(Tech Accessories),0.373626,0.274725,0.175824,0.470588,1.712941,0.07318,1.369963
21,(Bags),(Wearable Accessories),0.241758,0.340659,0.153846,0.636364,1.868035,0.071489,1.813187
20,(Wearable Accessories),(Bags),0.340659,0.241758,0.153846,0.451613,1.868035,0.071489,1.382676
35,(Laptops/Computers),(Tech Accessories),0.208791,0.274725,0.131868,0.631579,2.298947,0.074508,1.968603
34,(Tech Accessories),(Laptops/Computers),0.274725,0.208791,0.131868,0.48,2.298947,0.074508,1.521555


basket name: periods_basket
length of frequent item sets: 34
minimum support: 0.05


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
8,(Food/Groceries),(Health & Personal Care),0.516484,0.318681,0.274725,0.531915,1.669112,0.110132,1.455544
9,(Health & Personal Care),(Food/Groceries),0.318681,0.516484,0.274725,0.862069,1.669112,0.110132,3.505495
2,(Food/Groceries),(Apparel/Clothing),0.516484,0.175824,0.131868,0.255319,1.452128,0.041058,1.10675
3,(Apparel/Clothing),(Food/Groceries),0.175824,0.516484,0.131868,0.75,1.452128,0.041058,1.934066
14,(Food/Groceries),(Home Entertainment),0.516484,0.153846,0.098901,0.191489,1.244681,0.019442,1.046559
15,(Home Entertainment),(Food/Groceries),0.153846,0.516484,0.098901,0.642857,1.244681,0.019442,1.353846
30,(Food/Groceries),(Pet Care/Accessories),0.516484,0.120879,0.087912,0.170213,1.408124,0.02548,1.059453
31,(Pet Care/Accessories),(Food/Groceries),0.120879,0.516484,0.087912,0.727273,1.408124,0.02548,1.772894
24,(Books),(Food/Groceries),0.120879,0.516484,0.076923,0.636364,1.232108,0.014491,1.32967
25,(Food/Groceries),(Books),0.516484,0.120879,0.076923,0.148936,1.232108,0.014491,1.032967


basket name: wneeded_basket
length of frequent item sets: 30
minimum support: 0.5


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
26,(Tech Accessories),(Laptops/Computers),0.681319,0.747253,0.582418,0.854839,1.143975,0.0733,1.741148
27,(Laptops/Computers),(Tech Accessories),0.747253,0.681319,0.582418,0.779412,1.143975,0.0733,1.444689
13,(Laptops/Computers),(Stationery),0.747253,0.692308,0.582418,0.779412,1.125817,0.065089,1.394872
12,(Stationery),(Laptops/Computers),0.692308,0.747253,0.582418,0.84127,1.125817,0.065089,1.592308
20,(Laptops/Computers),(Sports & Travel),0.747253,0.604396,0.549451,0.735294,1.216578,0.097814,1.494505
21,(Sports & Travel),(Laptops/Computers),0.604396,0.747253,0.549451,0.909091,1.216578,0.097814,2.78022
19,(Laptops/Computers),(Books),0.747253,0.626374,0.538462,0.720588,1.150413,0.070402,1.337189
18,(Books),(Laptops/Computers),0.626374,0.747253,0.538462,0.859649,1.150413,0.070402,1.800824
28,(Pet Care/Accessories),(Laptops/Computers),0.626374,0.747253,0.538462,0.859649,1.150413,0.070402,1.800824
29,(Laptops/Computers),(Pet Care/Accessories),0.747253,0.626374,0.538462,0.720588,1.150413,0.070402,1.337189


basket name: summer_basket
length of frequent item sets: 90
minimum support: 0.1


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Sunscreen/Heat-Protectant Skin Products),(Clothing),0.56044,0.736264,0.417582,0.745098,1.011999,0.004951,1.034658
1,(Clothing),(Sunscreen/Heat-Protectant Skin Products),0.736264,0.56044,0.417582,0.567164,1.011999,0.004951,1.015536
27,(Sunscreen/Heat-Protectant Skin Products),(Water Bottles),0.56044,0.538462,0.395604,0.705882,1.310924,0.093829,1.569231
26,(Water Bottles),(Sunscreen/Heat-Protectant Skin Products),0.538462,0.56044,0.395604,0.734694,1.310924,0.093829,1.656805
4,(Towels),(Clothing),0.406593,0.736264,0.32967,0.810811,1.101251,0.03031,1.394035
5,(Clothing),(Towels),0.736264,0.406593,0.32967,0.447761,1.101251,0.03031,1.074547
37,(Water Bottles),(Towels),0.538462,0.406593,0.318681,0.591837,1.455598,0.099746,1.453846
36,(Towels),(Water Bottles),0.406593,0.538462,0.318681,0.783784,1.455598,0.099746,2.134615
22,(Towels),(Sunscreen/Heat-Protectant Skin Products),0.406593,0.56044,0.307692,0.756757,1.350291,0.079821,1.807082
23,(Sunscreen/Heat-Protectant Skin Products),(Towels),0.56044,0.406593,0.307692,0.54902,1.350291,0.079821,1.315815


basket name: bts_basket
length of frequent item sets: 248
minimum support: 0.2


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
2,(Notebooks),(Pen/Marker/Pencil/Scriber),0.879121,0.956044,0.846154,0.9625,1.006753,0.005676,1.172161
3,(Pen/Marker/Pencil/Scriber),(Notebooks),0.956044,0.879121,0.846154,0.885057,1.006753,0.005676,1.051648
0,(Paper/Envelopes/Post-it Notes/),(Pen/Marker/Pencil/Scriber),0.813187,0.956044,0.791209,0.972973,1.017707,0.013766,1.626374
1,(Pen/Marker/Pencil/Scriber),(Paper/Envelopes/Post-it Notes/),0.956044,0.813187,0.791209,0.827586,1.017707,0.013766,1.083516
78,"(Paper/Envelopes/Post-it Notes/, Notebooks)",(Pen/Marker/Pencil/Scriber),0.714286,0.956044,0.692308,0.969231,1.013793,0.009419,1.428571
80,(Paper/Envelopes/Post-it Notes/),"(Notebooks, Pen/Marker/Pencil/Scriber)",0.813187,0.846154,0.692308,0.851351,1.006143,0.004227,1.034965
81,(Pen/Marker/Pencil/Scriber),"(Paper/Envelopes/Post-it Notes/, Notebooks)",0.956044,0.714286,0.692308,0.724138,1.013793,0.009419,1.035714
79,"(Notebooks, Pen/Marker/Pencil/Scriber)",(Paper/Envelopes/Post-it Notes/),0.846154,0.813187,0.692308,0.818182,1.006143,0.004227,1.027473
36,(Bags/School Bags),(Notebooks),0.637363,0.879121,0.593407,0.931034,1.059052,0.033088,1.752747
37,(Notebooks),(Bags/School Bags),0.879121,0.637363,0.593407,0.675,1.059052,0.033088,1.115807
