In [None]:
import warnings
warnings.filterwarnings('ignore')

from collections import Counter
from tabulate import tabulate

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb

from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MultiLabelBinarizer

%matplotlib inline
pd.set_option('display.max_rows', None)

In [None]:
df = pd.read_excel('PRODUCT_SURVEY RESULTS_2023.xlsx', index_col = None)

In [None]:
df.set_index('No.', inplace = True)

### General Overview of data

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.columns

### Renaming col_names for readability

In [None]:
col_names = list(df.columns)

new_col_names = [
    'age_group',
    'gender',
    'car_ownership_history',
    'marital_status',
    'car_purchase_factors',
    'customize_vehicle_likelihood_no_extra_charge',
    'desired_exterior_component_customization',
    'desired_interior_component_customization',
    'customization_surcharge_tolerance',
    'design_own_component_likelihood',
    'personalized_design_surcharge_tolerance',
    'require_designer'
]

dict_to_rename = dict()

for idx, col_name in enumerate(col_names):
    dict_to_rename[col_name] = new_col_names[idx]

renamed_df = df.rename(dict_to_rename, axis = 1)

In [None]:
renamed_df.dtypes

In [None]:
renamed_df.head()

### Check for Null Data
* 3 features with null data
    * car_purchase_factors
    * desired_exterior_component
    * require_designer

In [None]:
print("Features", "                                    Null Values")
renamed_df.isnull().sum()

In [None]:
print('features', 'unique_values'.rjust(50,' '))
print('-'*60)
print(renamed_df.apply(lambda col: len(col.unique())))

### Handling car_purchase_factors Null value
* 37 unique values actually arrays of different permutations

In [None]:
renamed_df['car_purchase_factors'].unique()

In [None]:
renamed_df[renamed_df['car_purchase_factors'].isna()]

##### Only 1 entry missing for car_purchase_factor column
* age_group : 20-30
* gender : Male
* marital_status : single
* car_ownership_history : never own car but planning to purchase

In [None]:
# Extracting row entries with similar characteristics as null value entry male
male_youths = renamed_df[(renamed_df['age_group'] == '20-30') & (renamed_df['gender'] == 'Male') & (renamed_df['marital_status'] == 'Single') & (renamed_df['car_ownership_history'] == 'Do not own a car, but planning to purchase in future')].dropna()
male_youths.rename(dict_to_rename, inplace = True)
male_youths

In [None]:
len(male_youths)

In [None]:
male_youths_purchase_factors = list()
male_youths_num_factors = list()

for row in male_youths['car_purchase_factors'].unique(): # For each array of factors of each male
    male_youths_num_factors.append(len(row.split(';'))) # Extract number of factors of each male
    for factor in row.split(';'):
        male_youths_purchase_factors.append(factor)

print("Factors | Count")
for i in Counter(male_youths_num_factors).most_common():
    print(str(i[0]).center(7), '|' ,str(i[1]).rjust(3))
    
print()
for i in Counter(male_youths_purchase_factors).most_common():
    print(i[0] , ':', i[1])

* Most male (age 20-30) (single) have 4 purchase_factors
* Price, Technological features, Brand name & functionality emerge as top 4

In [None]:
# Replace null value with top 4 purchase factors 
renamed_df.at[18,'car_purchase_factors'] = 'Price;Technological features;Brand name;Functionality'

In [None]:
renamed_df.isnull().sum()

### Handling desired__exterior_component_customization Null values

In [None]:
renamed_df['desired_exterior_component_customization'].unique()

In [None]:
renamed_df[renamed_df['desired_exterior_component_customization'].isna()]

Since both null data entries owned a car before, it is highly likely that NaN represents that owner does not want to customize the exterior of their car.

In [None]:
renamed_df.at[3,'desired_exterior_component_customization'] = 'Not customizing'
renamed_df.at[4,'desired_exterior_component_customization'] = 'Not customizing'

In [None]:
renamed_df.isnull().sum()

### Handling require_designer Null values

In [None]:
renamed_df['require_designer'].unique()

In [None]:
renamed_df[renamed_df['require_designer'].isna()]

##### 9 customers have info that can be extracted from another column
* i.e set require_designer = Yes 
* Because they indicated under "design_own_component_likelihood" that they will do it "Only with professional help"

In [None]:
require_professional_df = renamed_df[(renamed_df['require_designer'].isna()) & (renamed_df['design_own_component_likelihood'] == 'Only with professional help')]
require_professional_df # to extract indexes of customers

In [None]:
no_need_designer = renamed_df['require_designer'].unique()[2]
need_designer = renamed_df['require_designer'].unique()[1]

print(no_need_designer)
print(need_designer)

In [None]:
for idx in list(require_professional_df.index):
    renamed_df.at[idx,'require_designer'] = need_designer

In [None]:
renamed_df[renamed_df['require_designer'].isna()]

* 4 out of 6 remaining Null value customers indicated that they will not be designing own component
* Therefore, set require_designer = No

In [None]:
Not_Designing_df = renamed_df[(renamed_df['require_designer'].isna()) & (renamed_df['design_own_component_likelihood'] == 'No')]
Not_Designing_df

In [None]:
for idx in list(Not_Designing_df.index):
    renamed_df.at[idx,'require_designer'] = 'Not Designing' # 0 represents not designing in encoding

In [None]:
renamed_df[renamed_df['require_designer'].isna()]

##### Assume the last 2 customer with NaN require_designer to mean that they do not require personal designer

In [None]:
renamed_df.at[6,'require_designer'] = no_need_designer
renamed_df.at[14,'require_designer'] = no_need_designer

In [None]:
renamed_df.isnull().sum()

### Data Analysis

In [None]:
sb.set_style('darkgrid')

In [None]:
for col in ['age_group', 'gender', 'car_ownership_history', 'marital_status','customize_vehicle_likelihood_no_extra_charge','customization_surcharge_tolerance','design_own_component_likelihood','personalized_design_surcharge_tolerance','require_designer']:
    renamed_df[col] = renamed_df[col].astype('category')

In [None]:
sb.countplot(x = 'age_group', data = renamed_df, order = renamed_df.age_group.cat.categories,
            hue = 'gender', hue_order = ['Male', 'Female'],
            palette = ['lightskyblue', 'lightpink'],
            ec = 'black', hatch = '/');
plt.xlabel("age_group", fontsize = 14)
plt.ylabel("count", fontsize = 14)
plt.xticks(fontsize = 14)
plt.yticks(fontsize = 14)

renamed_df.groupby(['age_group'])['gender'].value_counts()
plt.legend(prop={'size': 14})

* Majority of data are in age_group 20-30
* In age_group 20-30, twice as many males than females
* Possible observation: Young Males are more interested in getting a car

In [None]:
sb.countplot(x = 'age_group', data = renamed_df,
            hue = 'marital_status', hue_order = ['Single', 'Married with no children', 'Married with children'],
            palette = ['lightgreen', 'lightyellow', 'lightcoral'],
            ec = 'black', hatch = '/')
plt.legend(loc = 'upper right',prop={'size': 14});
plt.xlabel("age_group", fontsize = 14)
plt.ylabel("count", fontsize = 14)
plt.xticks(fontsize = 14)
plt.yticks(fontsize = 14)

renamed_df.groupby(['age_group'])['marital_status'].value_counts()

In [None]:
sb.countplot(x = 'age_group', data = renamed_df,
            hue = 'car_ownership_history',
            palette = ['chocolate', 'silver', 'gold'],
            ec = 'black', hatch = '/')
plt.xlabel("age_group", fontsize = 14)
plt.ylabel("count", fontsize = 14)
plt.xticks(fontsize = 14)
plt.yticks(fontsize = 14)

plt.legend(loc = 'upper right');

In [None]:
renamed_df.columns

In [None]:
df_purchase_factors = list()
exterior_components = list()
interior_components = list()

for row in renamed_df['car_purchase_factors'].unique(): # For each array of factors of each male
    for factor in row.split(';'):
        df_purchase_factors.append(factor)
        
for row in renamed_df['desired_exterior_component_customization'].unique(): # For each array of factors of each male
    for factor in row.split(';'):
        exterior_components.append(factor)
        
for row in renamed_df['desired_interior_component_customization'].unique(): # For each array of factors of each male
    for factor in row.split(';'):
        interior_components.append(factor)
    


In [None]:
Counter(df_purchase_factors).values()

In [None]:
plt.figure(figsize=(15, 6), dpi=300)
purchase_keys = list(Counter(df_purchase_factors).keys())
purchase_values = list(Counter(df_purchase_factors).values())

purchase_keys[-2] = "ESG consideration"

plt.bar(purchase_keys,purchase_values , width = 0.3,
       color = ['#2274a5', '#690500', '#183A37', '#16F4D0', '#EFCB68', '#C7A27C', '#FF6F59', '#93A3BC'])
        
plt.xticks(rotation = -30, fontsize = 20)
plt.yticks(fontsize = 14)
# plt.xlabel("car_purchase_factors", fontsize = 14)
plt.ylabel("count", fontsize = 14)

In [None]:
for i in Counter(exterior_components).most_common():
    print(i[0])

In [None]:
exterior_keys = [key[0] for key in Counter(exterior_components).most_common()]
exterior_values = [value[1] for value in Counter(exterior_components).most_common()]
exterior_keys[-1] = 'add body kit'
plt.figure(figsize=(15, 6), dpi=300)
plt.bar(exterior_keys,exterior_values, width = 0.3,
        color = ['#2274a5', '#690500', '#183A37', '#16F4D0', '#EFCB68', '#C7A27C', '#FF6F59']
       )
plt.xticks(rotation = -30, fontsize = 20)
plt.yticks(fontsize = 14)
# plt.xlabel("exterior_components", fontsize = 14)
plt.ylabel("count", fontsize = 14)

# color = ['#2274a5', '#690500', '#183A37', '#16F4D0', '#EFCB68', '#C7A27C', '#FF6F59', '#93A3BC']

In [None]:
interior_keys = [key[0] for key in Counter(interior_components).most_common()]
interior_values = [value[1] for value in Counter(interior_components).most_common()]
interior_keys[4] = 'Sunblocker'
interior_keys[5] = 'Music Player'
plt.figure(figsize=(15, 6), dpi=300)
plt.bar(interior_keys,interior_values,width = 0.3,
        color = ['#2274a5', '#690500', '#183A37', '#16F4D0', '#EFCB68', '#C7A27C', '#FF6F59']
       )
plt.xticks(rotation = -30, fontsize = 20)
plt.yticks(fontsize = 14)
# plt.xlabel("exterior_components", fontsize = 14)
plt.ylabel("count", fontsize = 14)
# color = ['#2274a5', '#690500', '#183A37', '#16F4D0', '#EFCB68', '#C7A27C', '#FF6F59', '#93A3BC']

### Encoding Data
* Nominal Encoding (Rank does not matter)
    * One-hot encoding technique

In [None]:
print('features', 'unique_values'.rjust(50,' '))
print('-'*60)
print(renamed_df.apply(lambda col: len(col.unique())))

##### car_purchase_factors | exterior_components | interior_components have high number of unique values
* permutations of same factors
* Can perform association mining with these features

##### Example Pipeline:
* Given:
    * age_group
    * gender
    * car_ownership_history
    * marital status
* Association Factors:
    * customize_vehicle_likelihood_no_extra_charge
    * desired_exterior_component_customization
    * desired_interior_component_customization
    * design_own_component_likelihood
* Sub association:
    * require_designer

### Encoding features with low unique values first

In [None]:
renamed_df.columns

In [None]:
# Convert int values in certain columns to strings for One Hot Encoding
renamed_df = renamed_df.astype({'customization_surcharge_tolerance' : 'string',
                               'personalized_design_surcharge_tolerance' : 'string',
                               'require_designer' : 'string'})

renamed_df = renamed_df.astype({'customization_surcharge_tolerance' : 'category',
                               'personalized_design_surcharge_tolerance' : 'category',
                               'require_designer' : 'category'})

In [None]:
oheObj = OneHotEncoder()

cat_cols = ['age_group', 'gender', 'car_ownership_history', 'marital_status',
       'customize_vehicle_likelihood_no_extra_charge',
       'customization_surcharge_tolerance', 'design_own_component_likelihood',
       'personalized_design_surcharge_tolerance', 'require_designer']

feature_arr = oheObj.fit_transform(renamed_df[cat_cols]).toarray()
feature_arr = feature_arr.astype('float32')

In [None]:
feature_arr.shape
print(feature_arr.dtype)
print(feature_arr)

In [None]:
oheObj.categories_

In [None]:
feature_labels = oheObj.get_feature_names_out(cat_cols)
feature_labels

In [None]:
feature_encoded_df = pd.DataFrame(feature_arr, columns = feature_labels)
feature_encoded_df.index += 1
feature_encoded_df.head()


In [None]:
feature_encoded_df.shape

##### Number of columns increased to 29 columns, representing 29 unique categories from the previous 9 categories specified.

### Handling 3 columns with permutations of unique values
* car_purchase_factors
* desired_exterior_component_customization
* desired_interior_component_customization

In [None]:
unique_purchase_factors = set()
unique_exterior_components = set()
unique_interior_components = set()

for idx, row in renamed_df.iterrows():
    for factor in row['car_purchase_factors'].split(';'):
        unique_purchase_factors.add(factor)
    
    try:
        for exterior_component in row['desired_exterior_component_customization'].split(';'):
            unique_exterior_components.add(exterior_component)
    except AttributeError:
        unique_exterior_components.add(row['desired_exterior_component_customization'])
        
    try:
        for interior_component in row['desired_interior_component_customization'].split(';'):
            unique_interior_components.add(interior_component)
    except AttributeError:
        unique_interior_components.add(row['desired_interior_component_customization'])
        
print(unique_purchase_factors)
print()
print(len(unique_purchase_factors))
print()

print(unique_exterior_components)
print()
print(len(unique_exterior_components))
print()

print(unique_interior_components)
print()
print(len(unique_interior_components))

In [None]:
renamed_df.head(1)

In [None]:
for idx, row in renamed_df.iterrows():
    renamed_df.at[idx,'car_purchase_factors'] = row['car_purchase_factors'].split(';')
    renamed_df.at[idx, 'desired_exterior_component_customization'] = row['desired_exterior_component_customization'].split(';')
    renamed_df.at[idx, 'desired_interior_component_customization'] = row['desired_interior_component_customization'].split(';')
    

In [None]:
renamed_df.head(1)

* Only 8 unique car_purchase_factors, can encode with A - H and perform association mining
* 7 unique exterior & interior components

In [None]:
mlbObj = MultiLabelBinarizer()
mlb_cols = ['desired_exterior_component_customization', 'desired_interior_component_customization']

multi_encoded_df = pd.DataFrame(
    mlbObj.fit_transform(renamed_df['car_purchase_factors']),
    columns = mlbObj.classes_,
    index = renamed_df['car_purchase_factors'].index
)

for col in mlb_cols:
    multi_encoded_df = multi_encoded_df.join(
        pd.DataFrame(
            mlbObj.fit_transform(renamed_df[col]),
            columns = mlbObj.classes_,
            index = renamed_df[col].index
        )
    )

In [None]:
new_encoded_cols = list(multi_encoded_df.columns)

In [None]:
for col in new_encoded_cols:
    multi_encoded_df[col] = multi_encoded_df[col].astype('float32')

In [None]:
multi_encoded_df

### Combining feature_encoded_df & multi_encoded_df

In [None]:
feature_encoded_df.head(2)

In [None]:
multi_encoded_df.head(2)

In [None]:
final_encoded_df = pd.concat([feature_encoded_df, multi_encoded_df], axis = 1)

In [None]:
final_encoded_df.head()

In [None]:
renamed_df

### Associative Rule Mining

#### Market Basket Analysis
* Association between items
* Bread -> 60% likelihood with Jam
* Wants to find product with association for product suggestion / placement


##### Possible Association
* Aged 20-30 Then exterior component / interior component / customize vehicle / design_own_component

* if-then relationship
    * if aged 20-30 then customize vehicle
    * if : antecedent
    * then : consequent
    * A => B
    * Right placement = earn money
* Above rules
    * single cardinality
    * multi cardinality
    
    
#### Measure Association
* 3 metrics to assist in measuring association
    * Support
        * Frequency of items
        * Filter out items that appears less frequently
        * frequency / total occurrence
    * Confidence
        * How often items occur together
        * Removes anomaly group
    * Lift
        * Strength of any rule (weight)
        * Support / Support(A) * Support(B)
        * 

### Apriori Algorithm
* Uses frequent item sets to generate association rule
* subset of a frequent item set must also be a frequent itemset
* Eliminates many unfrequent itemsets which reduces the number of rules drastically


##### Frequent Itemset
* Support value greater than threshold value
* Threshold value specified by user / domain expert

In [None]:
from mlxtend.frequent_patterns import apriori, association_rules


In [None]:
def convert(x):
    if x <= 0.0:
        return 0
    if x >= 1:
        return 1
final_encoded_df = final_encoded_df.applymap(convert)
final_encoded_df

In [None]:
final_encoded_df.columns

In [None]:
age_groups = list(final_encoded_df.columns)[:3]
genders = list(final_encoded_df.columns)[4:6]
car_ownerships = list(final_encoded_df.columns)[6:9]
marital_status = list(final_encoded_df.columns)[9:12]
customize_veh_likelihood =  list(final_encoded_df.columns)[12:15]
customize_surcharge_tolerance =  list(final_encoded_df.columns)[15:19]
design_own_component_likelihood = list(final_encoded_df.columns)[19:22]
personalized_design_surcharge_tolerance = list(final_encoded_df.columns)[22:26]
require_designer = list(final_encoded_df.columns)[26:29]
purchase_factors = list(final_encoded_df.columns)[29:37]
exterior_components = list(final_encoded_df.columns)[37:44]
interior_components = list(final_encoded_df.columns)[44:]

In [None]:
dataset1 = pd.concat(
    (
        final_encoded_df[age_groups],
        final_encoded_df[marital_status],
        final_encoded_df[genders],
        final_encoded_df[exterior_components]
    ),
    axis = 1)

dataset2 = pd.concat(
    (
        final_encoded_df[age_groups],
        final_encoded_df[marital_status],
        final_encoded_df[genders],
        final_encoded_df[interior_components]
    ),
    axis = 1)

dataset3 = pd.concat(
    (
        final_encoded_df[age_groups],
        final_encoded_df[interior_components],
        final_encoded_df[exterior_components]
    ),
    axis = 1)

In [None]:
print('Dataset 1')
for i in np.arange(0.1, 1.0, 0.1):
    frequent_itemsets = apriori(dataset1, min_support = i.round(2), use_colnames = True)
    print('support: ',i.round(2),'| Unique Itemsets: ',frequent_itemsets.shape[0])
    
print()

print('Dataset 2')
for i in np.arange(0.1, 1.0, 0.1):
    frequent_itemsets = apriori(dataset2, min_support = i.round(2), use_colnames = True)
    print('support: ',i.round(2),'| Unique Itemsets: ',frequent_itemsets.shape[0])
    
print()

print('Dataset 3')
for i in np.arange(0.1, 1.0, 0.1):
    frequent_itemsets = apriori(dataset3, min_support = i.round(2), use_colnames = True)
    print('support: ',i.round(2),'| Unique Itemsets: ',frequent_itemsets.shape[0])

In [None]:
frequent_itemsets_1 = apriori(dataset1, min_support = 0.1, use_colnames = True)
frequent_itemsets_1

frequent_itemsets_2 = apriori(dataset2, min_support = 0.1, use_colnames = True)
frequent_itemsets_2

frequent_itemsets_3 = apriori(dataset3, min_support = 0.05, use_colnames = True)
frequent_itemsets_3.shape

In [None]:
rules_1 = association_rules(frequent_itemsets_1, metric = 'lift', min_threshold = 1.5)
# rules_1[rules_1['confidence'] > 0.8]
rules_2 = association_rules(frequent_itemsets_2, metric = 'lift', min_threshold = 1.5)
# rules_2[rules_2.confidence>=0.5].reset_index(drop = True)
rules_3 = association_rules(frequent_itemsets_3, metric = 'lift', min_threshold = 1)

In [None]:
rules_3.shape

In [None]:
rules_3[rules_3.confidence>=0.3].shape

### Exterior Component Association

In [None]:
female_consequents = list()
male_consequents = list()

female_confidence = list()
male_confidence = list()

for idx, row in rules_1[rules_1.confidence>=0.3].reset_index(drop = True).iterrows():
    if 'gender_Female' in list(row['antecedents']):
        for item in list(row['consequents']):
            female_consequents.append(item)
        female_confidence.append([list(row['consequents']), row['confidence']])
        
    elif 'gender_Male' in list(row['antecedents']):
        male_confidence.append([list(row['consequents']), row['confidence']])
        for item in list(row['consequents']):
            male_consequents.append(item)

updated_female_consequents = {k:v for k,v in Counter(female_consequents).items() if k in exterior_components}
updated_male_consequents = {k:v for k,v in Counter(male_consequents).items() if k in exterior_components}

female_side_mirrors_conf = 0
female_wheels_conf = 0

male_headlight_conf = 0
male_bumpers_conf = 0
male_wheels_conf = 0
male_side_mirrors_conf = 0

for i in male_confidence:
    if 'Headlights' in i[0]:
        male_headlight_conf += i[1]
    elif 'Bumpers' in i[0]:
        male_bumpers_conf += i[1]
    elif 'Wheels' in i[0]:
        male_wheels_conf += i[1]
    elif 'Side mirrors' in i[0]:
        male_side_mirrors_conf += i[1]

for i in female_confidence:
    if 'Side mirrors' in i[0]:
        female_side_mirrors_conf += i[1]
    elif 'Wheels' in i[0]:
        female_wheels_conf += i[1]
        
exterior_male_conf = list(updated_male_consequents.values())
exterior_female_conf = list(updated_female_consequents.values())

male_headlight_conf = round(male_headlight_conf/exterior_male_conf[0],2)
male_bumpers_conf = round(male_bumpers_conf/exterior_male_conf[1],2)
male_wheels_conf = round(male_wheels_conf/exterior_male_conf[2],2)
male_side_mirrors_conf = round(male_side_mirrors_conf/exterior_male_conf[3],2)

female_side_mirrors_conf = round(female_side_mirrors_conf/exterior_female_conf[0],2)
female_wheels_conf = round(female_wheels_conf/exterior_female_conf[1],2)

male_conf = [male_headlight_conf,male_bumpers_conf,male_wheels_conf,male_side_mirrors_conf]
female_conf = [female_side_mirrors_conf,female_wheels_conf]

female_table_1_idx = Counter(updated_female_consequents).keys()
female_table_1_values = Counter(updated_female_consequents).values()

male_table_1_idx = Counter(updated_male_consequents).keys()
male_table_1_values = Counter(updated_male_consequents).values()
print('Female Exterior Component Association')
print(tabulate(list(zip(female_table_1_idx,female_table_1_values,female_conf)),headers = ['Exterior Component','Count','Confidence'], tablefmt = 'fancy_grid'))
print('Male Exterior Component Association')
print(tabulate(list(zip(male_table_1_idx,male_table_1_values,male_conf)),headers = ['Exterior Component','Count','Confidence'], tablefmt = 'fancy_grid'))


### Interior Component Association

In [None]:
rules_2[rules_2.confidence>=0.3].shape

In [None]:
female_consequents_2 = list()
male_consequents_2 = list()

female_confidence_2 = list()
male_confidence_2 = list()

for idx, row in rules_2[rules_2.confidence>=0.3].reset_index(drop = True).iterrows():
    if 'gender_Female' in list(row['antecedents']):
        for item in list(row['consequents']):
            female_consequents_2.append(item)
        female_confidence_2.append([list(row['consequents']), row['confidence']])
        
    elif 'gender_Male' in list(row['antecedents']):
        male_confidence_2.append([list(row['consequents']), row['confidence']])
        for item in list(row['consequents']):
            male_consequents_2.append(item)

In [None]:
updated_female_consequents_2 = {k:v for k,v in Counter(female_consequents_2).items() if k in interior_components}
updated_male_consequents_2 = {k:v for k,v in Counter(male_consequents_2).items() if k in interior_components}


In [None]:
updated_male_consequents_2

In [None]:
female_doorhandles_conf = 0

male_center_compart_conf = 0
male_steering_wheel_conf = 0
male_doorhandles_conf = 0
male_dashboard_mirrors_conf = 0

for i in male_confidence_2:
    if 'Centre compartment' in i[0]:
        male_center_compart_conf += i[1]
    elif 'Steering wheel' in i[0]:
        male_steering_wheel_conf += i[1]
    elif 'Door handles' in i[0]:
        male_doorhandles_conf += i[1]
    elif 'Dashboard' in i[0]:
        male_dashboard_mirrors_conf += i[1]

for i in female_confidence_2:
    if 'Door handles' in i[0]:
        female_doorhandles_conf += i[1]
        
exterior_male_conf_2 = list(updated_male_consequents_2.values())
exterior_female_conf_2 = list(updated_female_consequents_2.values())

male_center_compart_conf = round(male_center_compart_conf/exterior_male_conf_2[0],2)
male_steering_wheel_conf = round(male_steering_wheel_conf/exterior_male_conf_2[1],2)
male_doorhandles_conf = round(male_doorhandles_conf/exterior_male_conf_2[2],2)
male_dashboard_mirrors_conf = round(male_dashboard_mirrors_conf/exterior_male_conf_2[3],2)

female_doorhandles_conf = round(female_doorhandles_conf/exterior_female_conf_2[0],2)

male_conf_2 = [male_center_compart_conf,male_steering_wheel_conf,male_doorhandles_conf,male_dashboard_mirrors_conf]
female_conf_2 = [female_doorhandles_conf]

female_table_1_idx_2 = Counter(updated_female_consequents_2).keys()
female_table_1_values_2 = Counter(updated_female_consequents_2).values()

male_table_1_idx_2 = Counter(updated_male_consequents_2).keys()
male_table_1_values_2 = Counter(updated_male_consequents_2).values()
print('Female Interior Component Association')
print(tabulate(list(zip(female_table_1_idx_2,female_table_1_values_2,female_conf_2)),headers = ['Interior Component','Count','Confidence'], tablefmt = 'fancy_grid'))
print('Male Interior Component Association')
print(tabulate(list(zip(male_table_1_idx_2,male_table_1_values_2,male_conf_2)),headers = ['Interior Component','Count','Confidence'], tablefmt = 'fancy_grid'))

In [None]:
# rules_1[rules_1['confidence'] > 0.8].reset_index(drop = True).iloc[[9,19,24]][['antecedents', 'consequents', 'confidence']]

# rules_2[rules_2['confidence'] > 0.5].reset_index(drop = True).iloc[[12,17]][['antecedents', 'consequents', 'confidence']]

# rules_2[rules_2['confidence'] > 0.5].reset_index(drop = True).iloc[[20,22,27]][['antecedents', 'consequents', 'confidence']]

### Age_Group Association

In [None]:
age_groups

In [None]:
age20_30_consequents = list()
age31_40_consequents = list()
age41_50_consequents = list()

age20_30_confidence = list()
age31_40_confidence = list()
age41_50_confidence = list()

for idx, row in rules_3[rules_3.confidence>=0.01].reset_index(drop = True).iterrows():
    if 'age_group_20-30' in list(row['antecedents']):
        for item in list(row['consequents']):
            age20_30_consequents.append(item)
        age20_30_confidence.append([list(row['consequents']), row['confidence']])
        
    elif 'age_group_31-40' in list(row['antecedents']):
        for item in list(row['consequents']):
            age31_40_consequents.append(item)
        age31_40_confidence.append([list(row['consequents']), row['confidence']])
                
    elif 'age_group_41-50' in list(row['antecedents']):
        for item in list(row['consequents']):
            age41_50_consequents.append(item)
        age41_50_confidence.append([list(row['consequents']), row['confidence']])

In [None]:
updated_20_30_consequents = {k:v for k,v in Counter(age20_30_consequents).items() if (k in interior_components or k in exterior_components)}
updated_31_40_consequents = {k:v for k,v in Counter(age31_40_consequents).items() if (k in interior_components or k in exterior_components)}
updated_41_50_consequents = {k:v for k,v in Counter(age41_50_consequents).items() if (k in interior_components or k in exterior_components)}

In [None]:
age20_30_Centre_compart_conf = 0
age20_30_door_conf = 0
age20_30_steering_conf = 0
age20_30_headlight_conf = 0
age20_30_sidemirror_conf = 0
age20_30_wheels_conf = 0
age20_30_bumpers_conf = 0
age20_30_dashboard_conf = 0
age20_30_grills_conf = 0

age31_40_dashboard_conf = 0
age31_40_grilles_conf = 0
age31_40_wheels_conf = 0

age41_50_steering_conf = 0
age41_50_bumpers_conf = 0
age41_50_headlight_conf = 0
age41_50_wheels_conf = 0
age41_50_dashboard_conf = 0


for i in age20_30_confidence:
    if 'Centre compartment' in i[0]:
        age20_30_Centre_compart_conf += i[1]
    if 'Door handles' in i[0]:
        age20_30_door_conf += i[1]
    if 'Steering wheel' in i[0]:
        age20_30_steering_conf += i[1]
    if 'Headlights' in i[0]:
        age20_30_headlight_conf += i[1]
    if 'Side mirrors' in i[0]:
        age20_30_sidemirror_conf += i[1]
    if 'Wheels' in i[0]:
        age20_30_wheels_conf += i[1]
    if 'Bumpers' in i[0]:
        age20_30_bumpers_conf += i[1]
    if 'Dashboard' in i[0]:
        age20_30_dashboard_conf += i[1]
    if 'Grilles' in i[0]:
        age20_30_grills_conf += i[1]

for i in age31_40_confidence:
    if 'Wheels' in i[0]:
        age31_40_wheels_conf += i[1]
    if 'Dashboard' in i[0]:
        age31_40_dashboard_conf += i[1]
    if 'Grilles' in i[0]:
        age31_40_grilles_conf += i[1]

for i in age41_50_confidence:
    if 'Steering wheel' in i[0]:
        age41_50_steering_conf += i[1]
    if 'Headlights' in i[0]:
        age41_50_headlight_conf += i[1]
    if 'Wheels' in i[0]:
        age41_50_wheels_conf += i[1]
    if 'Bumpers' in i[0]:
        age41_50_bumpers_conf += i[1]
    if 'Dashboard' in i[0]:
        age41_50_dashboard_conf += i[1]

age20_30_conf = list(updated_20_30_consequents.values())
age31_40_conf = list(updated_31_40_consequents.values())
age41_50_conf = list(updated_41_50_consequents.values())

age20_30_Centre_compart_conf  = round(age20_30_Centre_compart_conf/age20_30_conf[0],2)
age20_30_door_conf  = round(age20_30_door_conf/age20_30_conf[1],2)
age20_30_steering_conf  = round(age20_30_steering_conf/age20_30_conf[2],2)
age20_30_headlight_conf  = round(age20_30_headlight_conf/age20_30_conf[3],2)
age20_30_sidemirror_conf   = round(age20_30_sidemirror_conf/age20_30_conf[4],2)
age20_30_wheels_conf   = round(age20_30_wheels_conf/age20_30_conf[5],2)
age20_30_bumpers_conf   = round(age20_30_bumpers_conf/age20_30_conf[6],2)
age20_30_dashboard_conf    = round(age20_30_dashboard_conf/age20_30_conf[7],2)
age20_30_grills_conf   = round(age20_30_grills_conf/age20_30_conf[8],2)

age31_40_dashboard_conf   = round(age31_40_dashboard_conf/age31_40_conf[0],2)
age31_40_wheels_conf    = round(age31_40_wheels_conf/age31_40_conf[1],2)
age31_40_grilles_conf   = round(age31_40_grilles_conf/age31_40_conf[2],2)

age41_50_steering_conf  = round(age41_50_steering_conf/age41_50_conf[0],2)
age41_50_bumpers_conf   = round(age41_50_bumpers_conf/age41_50_conf[1],2)
age41_50_headlight_conf  = round(age41_50_headlight_conf/age41_50_conf[2],2)
age41_50_wheels_conf   = round(age41_50_wheels_conf/age41_50_conf[3],2)
age41_50_dashboard_conf    = round(age41_50_dashboard_conf/age41_50_conf[4],2)


age_2030_conf = [age20_30_Centre_compart_conf,age20_30_door_conf,age20_30_steering_conf,age20_30_headlight_conf,
                age20_30_sidemirror_conf,age20_30_wheels_conf,age20_30_bumpers_conf,age20_30_dashboard_conf,age20_30_grills_conf]

age_3140_conf = [age31_40_wheels_conf,age31_40_dashboard_conf,age31_40_grilles_conf]

age_4150_conf = [age41_50_steering_conf,age41_50_bumpers_conf,age41_50_headlight_conf,age41_50_wheels_conf,age41_50_dashboard_conf]


age2030_table_idx = Counter(updated_20_30_consequents).keys()
age2030_table_values = Counter(updated_20_30_consequents).values()

age3140_table_idx = Counter(updated_31_40_consequents).keys()
age3140_table_values = Counter(updated_31_40_consequents).values()

age4150_table_idx = Counter(updated_41_50_consequents).keys()
age4150_table_values = Counter(updated_41_50_consequents).values()

print('Age 20-30 Components')
print(tabulate(list(zip(age2030_table_idx,age2030_table_values,age_2030_conf)),headers = ['Interior Component','Count','Confidence'], tablefmt = 'fancy_grid'))
print('Age 31-40 Components')
print(tabulate(list(zip(age3140_table_idx,age3140_table_values,age_3140_conf)),headers = ['Interior Component','Count','Confidence'], tablefmt = 'fancy_grid'))
print('Age 41-50 Components')
print(tabulate(list(zip(age4150_table_idx,age4150_table_values,age_4150_conf)),headers = ['Interior Component','Count','Confidence'], tablefmt = 'fancy_grid'))