In [2]:
!pip install mlxtend

Collecting mlxtend
  Downloading mlxtend-0.23.1-py3-none-any.whl.metadata (7.3 kB)
Downloading mlxtend-0.23.1-py3-none-any.whl (1.4 MB)
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
   --- ------------------------------------ 0.1/1.4 MB 3.3 MB/s eta 0:00:01
   ------------- -------------------------- 0.5/1.4 MB 5.1 MB/s eta 0:00:01
   ------------------------ --------------- 0.9/1.4 MB 7.1 MB/s eta 0:00:01
   ------------------------------------- -- 1.3/1.4 MB 7.7 MB/s eta 0:00:01
   ---------------------------------------- 1.4/1.4 MB 6.6 MB/s eta 0:00:00
Installing collected packages: mlxtend
Successfully installed mlxtend-0.23.1


In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor, plot_tree
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold, KFold
from sklearn.metrics import accuracy_score, log_loss, silhouette_score
from sklearn.compose import make_column_transformer, make_column_selector
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.svm import SVC, SVR
from sklearn.pipeline import Pipeline
from sklearn.ensemble import BaseEnsemble, VotingClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from scipy.cluster.hierarchy import linkage, dendrogram
from mlxtend.frequent_patterns import apriori, association_rules
from sklearn.cluster import AgglomerativeClustering, KMeans
import warnings
warnings.filterwarnings('ignore')

In [4]:
fp_df = pd.read_csv(r'..\Datasets\Faceplate.csv', index_col=0)
fp_df.head()

Unnamed: 0_level_0,Red,White,Blue,Orange,Green,Yellow
Transaction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,1,1,0,0,1,0
2,0,1,0,1,0,0
3,0,1,1,0,0,0
4,1,1,0,1,0,0
5,1,0,1,0,0,0


In [10]:
fp_df = fp_df.astype(bool)
itemsets = apriori(fp_df, min_support=0.2, use_colnames=True)
print(fp_df)
print(itemsets)

               Red  White   Blue  Orange  Green  Yellow
Transaction                                            
1             True   True  False   False   True   False
2            False   True  False    True  False   False
3            False   True   True   False  False   False
4             True   True  False    True  False   False
5             True  False   True   False  False   False
6            False   True   True   False  False   False
7             True  False   True   False  False   False
8             True   True   True   False   True   False
9             True   True   True   False  False   False
10           False  False  False   False  False    True
    support             itemsets
0       0.6                (Red)
1       0.7              (White)
2       0.6               (Blue)
3       0.2             (Orange)
4       0.2              (Green)
5       0.4         (Red, White)
6       0.4          (Red, Blue)
7       0.2         (Green, Red)
8       0.4        (Blue, White

In [11]:
rules = association_rules(itemsets, metric='confidence', min_threshold=0.6)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(Red),(White),0.6,0.7,0.4,0.666667,0.952381,-0.02,0.9,-0.111111
1,(Red),(Blue),0.6,0.6,0.4,0.666667,1.111111,0.04,1.2,0.25
2,(Blue),(Red),0.6,0.6,0.4,0.666667,1.111111,0.04,1.2,0.25
3,(Green),(Red),0.2,0.6,0.2,1.0,1.666667,0.08,inf,0.5
4,(Blue),(White),0.6,0.7,0.4,0.666667,0.952381,-0.02,0.9,-0.111111
5,(Orange),(White),0.2,0.7,0.2,1.0,1.428571,0.06,inf,0.375
6,(Green),(White),0.2,0.7,0.2,1.0,1.428571,0.06,inf,0.375
7,"(Green, Red)",(White),0.2,0.7,0.2,1.0,1.428571,0.06,inf,0.375
8,"(Green, White)",(Red),0.2,0.6,0.2,1.0,1.666667,0.08,inf,0.5
9,(Green),"(Red, White)",0.2,0.4,0.2,1.0,2.5,0.12,inf,0.75


In [8]:
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

      antecedents   consequents  support  confidence      lift
0           (Red)       (White)      0.4    0.666667  0.952381
1           (Red)        (Blue)      0.4    0.666667  1.111111
2          (Blue)         (Red)      0.4    0.666667  1.111111
3         (Green)         (Red)      0.2    1.000000  1.666667
4          (Blue)       (White)      0.4    0.666667  0.952381
5        (Orange)       (White)      0.2    1.000000  1.428571
6         (Green)       (White)      0.2    1.000000  1.428571
7    (Green, Red)       (White)      0.2    1.000000  1.428571
8  (Green, White)         (Red)      0.2    1.000000  1.666667
9         (Green)  (Red, White)      0.2    1.000000  2.500000


In [9]:
rules.sort_values(by=['lift'], ascending=False).head(6)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
9,(Green),"(Red, White)",0.2,0.4,0.2,1.0,2.5,0.12,inf,0.75
3,(Green),(Red),0.2,0.6,0.2,1.0,1.666667,0.08,inf,0.5
8,"(Green, White)",(Red),0.2,0.6,0.2,1.0,1.666667,0.08,inf,0.5
5,(Orange),(White),0.2,0.7,0.2,1.0,1.428571,0.06,inf,0.375
6,(Green),(White),0.2,0.7,0.2,1.0,1.428571,0.06,inf,0.375
7,"(Green, Red)",(White),0.2,0.7,0.2,1.0,1.428571,0.06,inf,0.375


In [12]:
rule_df = rules.sort_values(by=['lift', 'confidence'], ascending=False)
rule_df

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
9,(Green),"(Red, White)",0.2,0.4,0.2,1.0,2.5,0.12,inf,0.75
3,(Green),(Red),0.2,0.6,0.2,1.0,1.666667,0.08,inf,0.5
8,"(Green, White)",(Red),0.2,0.6,0.2,1.0,1.666667,0.08,inf,0.5
5,(Orange),(White),0.2,0.7,0.2,1.0,1.428571,0.06,inf,0.375
6,(Green),(White),0.2,0.7,0.2,1.0,1.428571,0.06,inf,0.375
7,"(Green, Red)",(White),0.2,0.7,0.2,1.0,1.428571,0.06,inf,0.375
1,(Red),(Blue),0.6,0.6,0.4,0.666667,1.111111,0.04,1.2,0.25
2,(Blue),(Red),0.6,0.6,0.4,0.666667,1.111111,0.04,1.2,0.25
0,(Red),(White),0.6,0.7,0.4,0.666667,0.952381,-0.02,0.9,-0.111111
4,(Blue),(White),0.6,0.7,0.4,0.666667,0.952381,-0.02,0.9,-0.111111


In [13]:
print(rule_df[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

      antecedents   consequents  support  confidence      lift
9         (Green)  (Red, White)      0.2    1.000000  2.500000
3         (Green)         (Red)      0.2    1.000000  1.666667
8  (Green, White)         (Red)      0.2    1.000000  1.666667
5        (Orange)       (White)      0.2    1.000000  1.428571
6         (Green)       (White)      0.2    1.000000  1.428571
7    (Green, Red)       (White)      0.2    1.000000  1.428571
1           (Red)        (Blue)      0.4    0.666667  1.111111
2          (Blue)         (Red)      0.4    0.666667  1.111111
0           (Red)       (White)      0.4    0.666667  0.952381
4          (Blue)       (White)      0.4    0.666667  0.952381


In [15]:
relv_df = rule_df[rule_df['lift']>1]
relv_df

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
9,(Green),"(Red, White)",0.2,0.4,0.2,1.0,2.5,0.12,inf,0.75
3,(Green),(Red),0.2,0.6,0.2,1.0,1.666667,0.08,inf,0.5
8,"(Green, White)",(Red),0.2,0.6,0.2,1.0,1.666667,0.08,inf,0.5
5,(Orange),(White),0.2,0.7,0.2,1.0,1.428571,0.06,inf,0.375
6,(Green),(White),0.2,0.7,0.2,1.0,1.428571,0.06,inf,0.375
7,"(Green, Red)",(White),0.2,0.7,0.2,1.0,1.428571,0.06,inf,0.375
1,(Red),(Blue),0.6,0.6,0.4,0.666667,1.111111,0.04,1.2,0.25
2,(Blue),(Red),0.6,0.6,0.4,0.666667,1.111111,0.04,1.2,0.25


In [16]:
print(relv_df[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

      antecedents   consequents  support  confidence      lift
9         (Green)  (Red, White)      0.2    1.000000  2.500000
3         (Green)         (Red)      0.2    1.000000  1.666667
8  (Green, White)         (Red)      0.2    1.000000  1.666667
5        (Orange)       (White)      0.2    1.000000  1.428571
6         (Green)       (White)      0.2    1.000000  1.428571
7    (Green, Red)       (White)      0.2    1.000000  1.428571
1           (Red)        (Blue)      0.4    0.666667  1.111111
2          (Blue)         (Red)      0.4    0.666667  1.111111
