In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor, plot_tree
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold, KFold
from sklearn.metrics import accuracy_score, log_loss, silhouette_score
from sklearn.compose import make_column_transformer, make_column_selector
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.svm import SVC, SVR
from sklearn.pipeline import Pipeline
from sklearn.ensemble import BaseEnsemble, VotingClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from scipy.cluster.hierarchy import linkage, dendrogram
from mlxtend.frequent_patterns import apriori, association_rules
from sklearn.cluster import AgglomerativeClustering, KMeans
import warnings
warnings.filterwarnings('ignore')

In [3]:
fp_df = pd.read_csv(r'..\Datasets\Cosmetics.csv', index_col=0)
fp_df.head()

Unnamed: 0_level_0,Bag,Blush,Nail Polish,Brushes,Concealer,Eyebrow Pencils,Bronzer,Lip liner,Mascara,Eye shadow,Foundation,Lip Gloss,Lipstick,Eyeliner
Trans.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,0,1,1,1,1,0,1,1,1,0,0,0,0,1
2,0,0,1,0,1,0,1,1,0,0,1,1,0,0
3,0,1,0,0,1,1,1,1,1,1,1,1,1,0
4,0,0,1,1,1,0,1,0,0,0,1,0,0,1
5,0,1,0,0,1,0,1,1,1,1,0,1,1,0


In [4]:
fp_df = fp_df.astype(bool)
itemsets = apriori(fp_df, min_support=0.2, use_colnames=True)
print(fp_df)
print(itemsets)

           Bag  Blush  Nail Polish  Brushes  Concealer  Eyebrow Pencils  \
Trans.                                                                    
1        False   True         True     True       True            False   
2        False  False         True    False       True            False   
3        False   True        False    False       True             True   
4        False  False         True     True       True            False   
5        False   True        False    False       True            False   
...        ...    ...          ...      ...        ...              ...   
996      False  False        False    False      False            False   
997      False  False        False    False      False            False   
998      False   True         True     True       True            False   
999       True   True        False    False       True            False   
1000     False  False        False    False       True            False   

         Bronzer  Lip li

In [5]:
rules = association_rules(itemsets, metric='confidence', min_threshold=0.6)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(Blush),(Concealer),0.363,0.442,0.22,0.606061,1.371178,0.059554,1.416462,0.424961
1,(Eyeliner),(Concealer),0.457,0.442,0.297,0.649891,1.470341,0.095006,1.593787,0.589108
2,(Concealer),(Eyeliner),0.442,0.457,0.297,0.671946,1.470341,0.095006,1.655214,0.573272
3,(Mascara),(Eye shadow),0.357,0.381,0.321,0.89916,2.359999,0.184983,6.138417,0.896222
4,(Eye shadow),(Mascara),0.381,0.357,0.321,0.84252,2.359999,0.184983,4.08305,0.930971
5,(Lip Gloss),(Foundation),0.49,0.536,0.356,0.726531,1.355468,0.09336,1.696716,0.51421
6,(Foundation),(Lip Gloss),0.536,0.49,0.356,0.664179,1.355468,0.09336,1.518667,0.565188


In [6]:
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

    antecedents   consequents  support  confidence      lift
0       (Blush)   (Concealer)    0.220    0.606061  1.371178
1    (Eyeliner)   (Concealer)    0.297    0.649891  1.470341
2   (Concealer)    (Eyeliner)    0.297    0.671946  1.470341
3     (Mascara)  (Eye shadow)    0.321    0.899160  2.359999
4  (Eye shadow)     (Mascara)    0.321    0.842520  2.359999
5   (Lip Gloss)  (Foundation)    0.356    0.726531  1.355468
6  (Foundation)   (Lip Gloss)    0.356    0.664179  1.355468


In [7]:
rules.sort_values(by=['lift'], ascending=False).head(6)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
4,(Eye shadow),(Mascara),0.381,0.357,0.321,0.84252,2.359999,0.184983,4.08305,0.930971
3,(Mascara),(Eye shadow),0.357,0.381,0.321,0.89916,2.359999,0.184983,6.138417,0.896222
1,(Eyeliner),(Concealer),0.457,0.442,0.297,0.649891,1.470341,0.095006,1.593787,0.589108
2,(Concealer),(Eyeliner),0.442,0.457,0.297,0.671946,1.470341,0.095006,1.655214,0.573272
0,(Blush),(Concealer),0.363,0.442,0.22,0.606061,1.371178,0.059554,1.416462,0.424961
6,(Foundation),(Lip Gloss),0.536,0.49,0.356,0.664179,1.355468,0.09336,1.518667,0.565188


In [8]:
rule_df = rules.sort_values(by=['lift', 'confidence'], ascending=False)
rule_df

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
4,(Eye shadow),(Mascara),0.381,0.357,0.321,0.84252,2.359999,0.184983,4.08305,0.930971
3,(Mascara),(Eye shadow),0.357,0.381,0.321,0.89916,2.359999,0.184983,6.138417,0.896222
2,(Concealer),(Eyeliner),0.442,0.457,0.297,0.671946,1.470341,0.095006,1.655214,0.573272
1,(Eyeliner),(Concealer),0.457,0.442,0.297,0.649891,1.470341,0.095006,1.593787,0.589108
0,(Blush),(Concealer),0.363,0.442,0.22,0.606061,1.371178,0.059554,1.416462,0.424961
6,(Foundation),(Lip Gloss),0.536,0.49,0.356,0.664179,1.355468,0.09336,1.518667,0.565188
5,(Lip Gloss),(Foundation),0.49,0.536,0.356,0.726531,1.355468,0.09336,1.696716,0.51421


In [9]:
print(rule_df[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

    antecedents   consequents  support  confidence      lift
4  (Eye shadow)     (Mascara)    0.321    0.842520  2.359999
3     (Mascara)  (Eye shadow)    0.321    0.899160  2.359999
2   (Concealer)    (Eyeliner)    0.297    0.671946  1.470341
1    (Eyeliner)   (Concealer)    0.297    0.649891  1.470341
0       (Blush)   (Concealer)    0.220    0.606061  1.371178
6  (Foundation)   (Lip Gloss)    0.356    0.664179  1.355468
5   (Lip Gloss)  (Foundation)    0.356    0.726531  1.355468


In [10]:
relv_df = rule_df[rule_df['lift']>1]
relv_df

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
4,(Eye shadow),(Mascara),0.381,0.357,0.321,0.84252,2.359999,0.184983,4.08305,0.930971
3,(Mascara),(Eye shadow),0.357,0.381,0.321,0.89916,2.359999,0.184983,6.138417,0.896222
2,(Concealer),(Eyeliner),0.442,0.457,0.297,0.671946,1.470341,0.095006,1.655214,0.573272
1,(Eyeliner),(Concealer),0.457,0.442,0.297,0.649891,1.470341,0.095006,1.593787,0.589108
0,(Blush),(Concealer),0.363,0.442,0.22,0.606061,1.371178,0.059554,1.416462,0.424961
6,(Foundation),(Lip Gloss),0.536,0.49,0.356,0.664179,1.355468,0.09336,1.518667,0.565188
5,(Lip Gloss),(Foundation),0.49,0.536,0.356,0.726531,1.355468,0.09336,1.696716,0.51421


In [11]:
print(relv_df[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

    antecedents   consequents  support  confidence      lift
4  (Eye shadow)     (Mascara)    0.321    0.842520  2.359999
3     (Mascara)  (Eye shadow)    0.321    0.899160  2.359999
2   (Concealer)    (Eyeliner)    0.297    0.671946  1.470341
1    (Eyeliner)   (Concealer)    0.297    0.649891  1.470341
0       (Blush)   (Concealer)    0.220    0.606061  1.371178
6  (Foundation)   (Lip Gloss)    0.356    0.664179  1.355468
5   (Lip Gloss)  (Foundation)    0.356    0.726531  1.355468
