Step 1 - Load the data

In [1]:
import pandas as pd

# Load the heart attack dataset using pandas
original_df = pd.read_csv('archive/heart.csv')
z_score_df = pd.read_csv('data/no_z_scores_outliers.csv')
KNN_df = pd.read_csv('data/no_KNN_outliers.csv')

print(original_df[['age', 'sex', 'cp']].head())

   age  sex  cp
0   63    1   3
1   37    1   2
2   41    0   1
3   56    1   1
4   57    0   0


Step 2 - Preprocessing

In [2]:
from preprocessing import one_hot_encode

columns_to_use = ['age', 'sex', 'cp', 'trtbps', 'chol', 'fbs', 'restecg', 'thalachh', 'exng', 'oldpeak', 'slp', 'caa',
                  'thall', 'output']
arm_original_df = original_df[columns_to_use]
arm_z_score_df = z_score_df[columns_to_use]
arm_KNN_df = KNN_df[columns_to_use]

# Specify the numerical columns
numerical_columns = ['age', 'trtbps', 'chol', 'thalachh', 'oldpeak']
# Specify the categorical columns
categorical_columns = ['cp', 'restecg', 'slp', 'caa', 'thall']

# Perform one-hot encoding on the numerical columns
arm_original_df_encoded = one_hot_encode(arm_original_df, numerical_columns)
# Perform one-hot encoding on the categorical columns
arm_original_df_encoded = one_hot_encode(arm_original_df_encoded, categorical_columns)

arm_z_score_df_encoded = one_hot_encode(arm_z_score_df, numerical_columns)
arm_z_score_df_encoded = one_hot_encode(arm_z_score_df_encoded, categorical_columns)

arm_KNN_df_encoded = one_hot_encode(arm_KNN_df, numerical_columns)
arm_KNN_df_encoded = one_hot_encode(arm_KNN_df_encoded, categorical_columns)

print(arm_original_df_encoded.head())
print(arm_z_score_df_encoded.head())
print(arm_original_df_encoded.head())

   sex  cp  fbs  restecg  exng  slp  caa  thall  output  age_29  ...  \
0    1   3    1        0     0    0    0      1       1       0  ...   
1    1   2    0        1     0    0    0      2       1       0  ...   
2    0   1    0        0     0    2    0      2       1       0  ...   
3    1   1    0        1     0    2    0      2       1       0  ...   
4    0   0    0        1     1    2    0      2       1       0  ...   

   oldpeak_3.2  oldpeak_3.4  oldpeak_3.5  oldpeak_3.6  oldpeak_3.8  \
0            0            0            0            0            0   
1            0            0            1            0            0   
2            0            0            0            0            0   
3            0            0            0            0            0   
4            0            0            0            0            0   

   oldpeak_4.0  oldpeak_4.2  oldpeak_4.4  oldpeak_5.6  oldpeak_6.2  
0            0            0            0            0            0  
1       

Step 3 - ARM

In [4]:
from model.ARM import find_association_rules

# Set minimum thresholds for support and confidence
min_support = 0.4
min_confidence = 0.6

# Apply Apriori algorithm to find frequent itemsets
original_resulting_rules = find_association_rules(arm_original_df_encoded, min_support, min_confidence)
z_score_resulting_rules = find_association_rules(arm_z_score_df_encoded, min_support, min_confidence)
KNN_resulting_rules = find_association_rules(arm_KNN_df_encoded, min_support, min_confidence)

# Explore and analyze the association rules
print("ARM for original data:")
print(original_resulting_rules.head())
print("ARM for z score data:")
print(z_score_resulting_rules.head())
print("ARM for KNN data:")
print(KNN_resulting_rules.head())

  antecedents consequents  antecedent support  consequent support   support  \
1    (output)     (caa_0)            0.544554            0.577558  0.429043   
3    (output)   (thall_2)            0.544554            0.547855  0.429043   
2   (thall_2)    (output)            0.547855            0.544554  0.429043   
0     (caa_0)    (output)            0.577558            0.544554  0.429043   

   confidence      lift  leverage  conviction  zhangs_metric  
1    0.787879  1.364156  0.114531    1.991513       0.586120  
3    0.787879  1.438116  0.130706    2.131542       0.668896  
2    0.783133  1.438116  0.130706    2.100110       0.673779  
0    0.742857  1.364156  0.114531    1.771177       0.631911  


