In [None]:
# 1. Import Necessary Libraries
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from google.colab import drive

In [None]:
# 2. Mount Google Drive & Load Dataset
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Update path according to your Drive
df = pd.read_csv('/content/drive/MyDrive/UITS/Data Mining Lab/Project 2/heart_disease.csv')
df.head(10)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,high,1,3,high,medium,1,0,medium,0,high,0,0,1,1
1,low,1,2,medium,medium,0,1,high,0,high,0,0,2,1
2,low,0,1,medium,low,0,0,high,0,high,2,0,2,1
3,medium,1,1,low,medium,0,1,high,0,medium,2,0,2,1
4,medium,0,0,low,high,0,1,high,1,medium,2,0,2,1
5,medium,1,0,high,low,0,1,medium,0,medium,1,0,1,1
6,medium,0,1,high,high,0,0,medium,0,medium,1,0,2,1
7,low,1,1,low,high,0,1,high,0,low,2,0,3,1
8,medium,1,2,high,low,1,1,high,0,medium,2,0,3,1
9,medium,1,2,high,low,0,1,high,0,high,2,0,2,1


In [None]:
# 3. Preprocess the Mushroom Dataset for Association Rule Mining
for col in df.columns:
    print(f"{col} : {df[col].unique()}")

# One-hot encoding
df_encoded = pd.get_dummies(df, columns=df.columns)
df_encoded.head()

age : ['high' 'low' 'medium']
sex : [1 0]
cp : [3 2 1 0]
trestbps : ['high' 'medium' 'low']
chol : ['medium' 'low' 'high']
fbs : [1 0]
restecg : [0 1 2]
thalach : ['medium' 'high' 'low']
exang : [0 1]
oldpeak : ['high' 'medium' 'low']
slope : [0 2 1]
ca : [0 2 1 3 4]
thal : [1 2 3 0]
target : [1 0]


Unnamed: 0,age_high,age_low,age_medium,sex_0,sex_1,cp_0,cp_1,cp_2,cp_3,trestbps_high,...,ca_1,ca_2,ca_3,ca_4,thal_0,thal_1,thal_2,thal_3,target_0,target_1
0,True,False,False,False,True,False,False,False,True,True,...,False,False,False,False,False,True,False,False,False,True
1,False,True,False,False,True,False,False,True,False,False,...,False,False,False,False,False,False,True,False,False,True
2,False,True,False,True,False,False,True,False,False,False,...,False,False,False,False,False,False,True,False,False,True
3,False,False,True,False,True,False,True,False,False,False,...,False,False,False,False,False,False,True,False,False,True
4,False,False,True,True,False,True,False,False,False,False,...,False,False,False,False,False,False,True,False,False,True


In [None]:
# 4. Frequent Itemset Generation Using Apriori Algorithm
frequent_itemsets = apriori(df_encoded, min_support=0.3, use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.343234,(age_high)
1,0.313531,(age_low)
2,0.343234,(age_medium)
3,0.316832,(sex_0)
4,0.683168,(sex_1)
...,...,...
88,0.376238,"(target_1, exang_0, thal_2)"
89,0.336634,"(ca_0, target_1, thal_2)"
90,0.323432,"(fbs_0, target_1, exang_0, ca_0)"
91,0.330033,"(fbs_0, target_1, exang_0, thal_2)"


In [None]:
# 5. Generate Association Rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)
rules = rules.sort_values(by=['confidence', 'lift', 'support'], ascending=False)
rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].head(10)

Unnamed: 0,antecedents,consequents,support,confidence,lift
8,(trestbps_low),(fbs_0),0.313531,0.940594,1.104651
55,"(thal_2, ca_0)",(fbs_0),0.343234,0.912281,1.071399
95,"(target_1, thal_2, ca_0)",(fbs_0),0.30363,0.901961,1.05928
76,"(thal_2, ca_0)",(target_1),0.336634,0.894737,1.643062
45,"(exang_0, ca_0)",(fbs_0),0.386139,0.89313,1.048908
62,"(slope_2, thal_2)",(exang_0),0.30033,0.892157,1.325115
15,(ca_0),(fbs_0),0.511551,0.885714,1.040199
93,"(fbs_0, thal_2, ca_0)",(target_1),0.30363,0.884615,1.624476
58,"(target_1, ca_0)",(fbs_0),0.379538,0.884615,1.038909
16,(thal_2),(fbs_0),0.481848,0.879518,1.032922


In [None]:
# 6. Select One Rule and Explain
first_rule = rules.iloc[0]
print("Selected Rule:")
print(f"Antecedents: {first_rule['antecedents']}")
print(f"Consequents: {first_rule['consequents']}")
print(f"Support: {first_rule['support']:.2f}")
print(f"Confidence: {first_rule['confidence']:.2f}")
print(f"Lift: {first_rule['lift']:.2f}")

Selected Rule:
Antecedents: frozenset({'trestbps_low'})
Consequents: frozenset({'fbs_0'})
Support: 0.31
Confidence: 0.94
Lift: 1.10
