Performing Association Data Mining on 'ExteriorComponents' and 'InteriorComponents' to identify sets of items that are typically customized together. 

This information can help in the marketing of customization services, by introducing promotional packages/discounts to convince customers to customize more than one component at a time. 

ADM(component, support, confidence, lift) used for only one of 'ExteriorComponents' or 'InteriorComponents'

For ADM on both exterior and interior, use the last cell. 

In [19]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MultiLabelBinarizer
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules


In [20]:
# read excel file 
df = pd.read_excel(r"C:\Users\verlyn\OneDrive\Desktop\SCH5\4829\PRODUCT_SURVEY RESULTS_2023.xlsx")

# rename columns 
df.columns = ["No.", "Age", "Gender", "Category", "MarriageStatus", "FactorsPurchase", "FreeCustomization", "ExteriorComponents", "InteriorComponents", "WTSCustomization", "WantOwnPersonalization", "WTSPersonalization", "PersonalizationJob"]


In [4]:
def ADM(component, support, confidence, lift): 
   # component = str(component)
    
    # fill NaN values with 'None' 
    df[component].fillna("None", inplace=True)
    df_component = df[component]
   
    # one hot encoding 
    mlb = MultiLabelBinarizer()
    encoded_df = pd.DataFrame(mlb.fit_transform(df_component.str.split(';')), columns=mlb.classes_, index=df.index)
    encoded_df = encoded_df.astype(bool)
    
    # Find frequent itemsets using the apriori algorithm
    frequent_itemsets = apriori(encoded_df, min_support=support, use_colnames=True)

    # Generate association rules from the frequent itemsets
    # To compare rules based on two different matrics, need to calculate two matrics for each rule
    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=confidence)
    rules = rules[rules['lift'] >= lift]
    
    print(rules)

In [9]:
ADM('InteriorComponents', 0.2, 0.8, 1)

                            antecedents       consequents  antecedent support  \
0                  (Centre compartment)       (Dashboard)                0.48   
3    (Door handles, Centre compartment)       (Dashboard)                0.24   
4  (Centre compartment, Steering wheel)       (Dashboard)                0.32   
5    (Door handles, Centre compartment)  (Steering wheel)                0.24   
6             (Dashboard, Door handles)  (Steering wheel)                0.30   
7        (Door handles, Steering wheel)       (Dashboard)                0.28   

   consequent support  support  confidence      lift  leverage  conviction  
0                0.84     0.42    0.875000  1.041667    0.0168        1.28  
3                0.84     0.22    0.916667  1.091270    0.0184        1.92  
4                0.84     0.28    0.875000  1.041667    0.0112        1.28  
5                0.60     0.20    0.833333  1.388889    0.0560        2.40  
6                0.60     0.24    0.800000  1.3

In [11]:
ADM('ExteriorComponents', 0.2, 0.5, 1)

      antecedents   consequents  antecedent support  consequent support  \
1  (Side mirrors)  (Headlights)                0.32                0.44   
2        (Wheels)  (Headlights)                0.66                0.44   
3    (Headlights)      (Wheels)                0.44                0.66   

   support  confidence      lift  leverage  conviction  
1     0.20    0.625000  1.420455    0.0592    1.493333  
2     0.34    0.515152  1.170799    0.0496    1.155000  
3     0.34    0.772727  1.170799    0.0496    1.496000  


In [18]:
# combining interior and exterior components
support = 0.2
confidence = 0.6
lift = 2

df['ExteriorComponents'].fillna("None", inplace=True)
df['InteriorComponents'].fillna("None", inplace=True)
ext_components = df['ExteriorComponents']
int_components = df["InteriorComponents"]

df["InteriorComponents"] = df["InteriorComponents"].astype(str)
df["ExteriorComponents"] = df["ExteriorComponents"].astype(str)

df_component = df["InteriorComponents"].str.cat(df["ExteriorComponents"], sep=';')

# one hot encoding 
mlb = MultiLabelBinarizer()
encoded_df = pd.DataFrame(mlb.fit_transform(df_component.str.split(';')), columns=mlb.classes_, index=df.index)
encoded_df = encoded_df.astype(bool)

# Find frequent itemsets using the apriori algorithm
frequent_itemsets = apriori(encoded_df, min_support=support, use_colnames=True)

# Generate association rules from the frequent itemsets
# To compare rules based on two different matrics, need to calculate two matrics for each rule
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=confidence)
rules = rules[rules['lift'] >= lift]

print(rules)

                  antecedents                consequents  antecedent support  \
37  (Dashboard, Door handles)             (Side mirrors)                0.30   
40             (Side mirrors)  (Dashboard, Door handles)                0.32   

    consequent support  support  confidence      lift  leverage  conviction  
37                0.32      0.2    0.666667  2.083333     0.104    2.040000  
40                0.30      0.2    0.625000  2.083333     0.104    1.866667  
