In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor, plot_tree
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold, KFold
from sklearn.metrics import accuracy_score, log_loss, silhouette_score
from sklearn.compose import make_column_transformer, make_column_selector
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.svm import SVC, SVR
from sklearn.pipeline import Pipeline
from sklearn.ensemble import BaseEnsemble, VotingClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from scipy.cluster.hierarchy import linkage, dendrogram
from mlxtend.frequent_patterns import apriori, association_rules
from sklearn.cluster import AgglomerativeClustering, KMeans
import warnings
warnings.filterwarnings('ignore')

In [2]:
fp_df = pd.read_csv(r'..\Datasets\CatalogCrossSell.csv', index_col=0)
fp_df.head()

Unnamed: 0_level_0,Housewares Division,Health Products Division,Automotive Division,Personal Electronics Division,Computers Division,Garden Division,Novelty Gift Division,Jewelry Division
Clothing Division,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,1,1,1,1,0,0,1,0
0,1,1,1,1,0,1,1,1
0,1,1,1,1,0,1,1,1
0,0,1,1,1,0,1,1,0
0,0,1,0,1,0,1,1,0


In [3]:
fp_df = fp_df.astype(bool)
itemsets = apriori(fp_df, min_support=0.2, use_colnames=True)
print(fp_df)
print(itemsets)

                   Housewares Division  Health Products Division  \
Clothing Division                                                  
0                                 True                      True   
0                                 True                      True   
0                                 True                      True   
0                                False                      True   
0                                False                      True   
...                                ...                       ...   
0                                False                      True   
0                                 True                      True   
0                                False                      True   
0                                False                      True   
0                                False                      True   

                   Automotive Division  Personal Electronics Division  \
Clothing Division                         

In [4]:
rules = association_rules(itemsets, metric='confidence', min_threshold=0.6)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(Housewares Division),(Health Products Division),0.393557,1.0,0.393557,1.0,1.0,0.0,inf,0.0
1,(Personal Electronics Division),(Health Products Division),0.467387,1.0,0.467387,1.0,1.0,0.0,inf,0.0
2,(Garden Division),(Health Products Division),0.272109,1.0,0.272109,1.0,1.0,0.0,inf,0.0
3,(Novelty Gift Division),(Health Products Division),0.227491,1.0,0.227491,1.0,1.0,0.0,inf,0.0
4,(Jewelry Division),(Health Products Division),0.356943,1.0,0.356943,1.0,1.0,0.0,inf,0.0
5,"(Personal Electronics Division, Housewares Div...",(Health Products Division),0.235494,1.0,0.235494,1.0,1.0,0.0,inf,0.0


In [5]:
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

                                         antecedents  \
0                              (Housewares Division)   
1                    (Personal Electronics Division)   
2                                  (Garden Division)   
3                            (Novelty Gift Division)   
4                                 (Jewelry Division)   
5  (Personal Electronics Division, Housewares Div...   

                  consequents   support  confidence  lift  
0  (Health Products Division)  0.393557         1.0   1.0  
1  (Health Products Division)  0.467387         1.0   1.0  
2  (Health Products Division)  0.272109         1.0   1.0  
3  (Health Products Division)  0.227491         1.0   1.0  
4  (Health Products Division)  0.356943         1.0   1.0  
5  (Health Products Division)  0.235494         1.0   1.0  


In [6]:
rules.sort_values(by=['lift'], ascending=False).head(6)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(Housewares Division),(Health Products Division),0.393557,1.0,0.393557,1.0,1.0,0.0,inf,0.0
1,(Personal Electronics Division),(Health Products Division),0.467387,1.0,0.467387,1.0,1.0,0.0,inf,0.0
2,(Garden Division),(Health Products Division),0.272109,1.0,0.272109,1.0,1.0,0.0,inf,0.0
3,(Novelty Gift Division),(Health Products Division),0.227491,1.0,0.227491,1.0,1.0,0.0,inf,0.0
4,(Jewelry Division),(Health Products Division),0.356943,1.0,0.356943,1.0,1.0,0.0,inf,0.0
5,"(Personal Electronics Division, Housewares Div...",(Health Products Division),0.235494,1.0,0.235494,1.0,1.0,0.0,inf,0.0


In [7]:
rule_df = rules.sort_values(by=['lift', 'confidence'], ascending=False)
rule_df

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(Housewares Division),(Health Products Division),0.393557,1.0,0.393557,1.0,1.0,0.0,inf,0.0
1,(Personal Electronics Division),(Health Products Division),0.467387,1.0,0.467387,1.0,1.0,0.0,inf,0.0
2,(Garden Division),(Health Products Division),0.272109,1.0,0.272109,1.0,1.0,0.0,inf,0.0
3,(Novelty Gift Division),(Health Products Division),0.227491,1.0,0.227491,1.0,1.0,0.0,inf,0.0
4,(Jewelry Division),(Health Products Division),0.356943,1.0,0.356943,1.0,1.0,0.0,inf,0.0
5,"(Personal Electronics Division, Housewares Div...",(Health Products Division),0.235494,1.0,0.235494,1.0,1.0,0.0,inf,0.0


In [8]:
print(rule_df[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

                                         antecedents  \
0                              (Housewares Division)   
1                    (Personal Electronics Division)   
2                                  (Garden Division)   
3                            (Novelty Gift Division)   
4                                 (Jewelry Division)   
5  (Personal Electronics Division, Housewares Div...   

                  consequents   support  confidence  lift  
0  (Health Products Division)  0.393557         1.0   1.0  
1  (Health Products Division)  0.467387         1.0   1.0  
2  (Health Products Division)  0.272109         1.0   1.0  
3  (Health Products Division)  0.227491         1.0   1.0  
4  (Health Products Division)  0.356943         1.0   1.0  
5  (Health Products Division)  0.235494         1.0   1.0  


In [9]:
relv_df = rule_df[rule_df['lift']>1]
relv_df

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric


In [10]:
print(relv_df[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

Empty DataFrame
Columns: [antecedents, consequents, support, confidence, lift]
Index: []
