In [1]:
import pandas as pd
import numpy as np
import sklearn as sk
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler
import yaml

### General understanding

In [2]:
pd.set_option('display.max_columns', 500, 'display.width', 1000, 'display.max_rows', 500)

In [3]:
data = pd.read_csv(r"data\main.csv")
print(data.shape)
data.head(2)

(279730, 32)


Unnamed: 0,Age,Gender,Cholesterol,BloodPressure,HeartRate,BMI,Smoker,Diabetes,Hypertension,FamilyHistory,PhysicalActivity,AlcoholConsumption,Diet,StressLevel,Ethnicity,Income,EducationLevel,Medication,ChestPainType,ECGResults,MaxHeartRate,ST_Depression,ExerciseInducedAngina,Slope,NumberOfMajorVessels,Thalassemia,PreviousHeartAttack,StrokeHistory,Residence,EmploymentStatus,MaritalStatus,Outcome
0,49,Female,163,158,116,31.2,0,1,0,0,4,1,Unhealthy,5,White,132600,High School,No,Typical,ST-T abnormality,199,2.24,Yes,Flat,3,Fixed defect,0,1,Rural,Employed,Widowed,Heart Attack
1,69,Male,274,160,88,20.7,0,1,0,1,6,0,Moderate,1,Asian,179223,Postgraduate,Yes,Non-anginal,Normal,122,2.76,No,Downsloping,1,Fixed defect,0,0,Suburban,Unemployed,Divorced,Heart Attack


In [4]:
data.Outcome.value_counts()

Outcome
No Heart Attack    139993
Heart Attack       139737
Name: count, dtype: int64

In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 279730 entries, 0 to 279729
Data columns (total 32 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   Age                    279730 non-null  int64  
 1   Gender                 279730 non-null  object 
 2   Cholesterol            279730 non-null  int64  
 3   BloodPressure          279730 non-null  int64  
 4   HeartRate              279730 non-null  int64  
 5   BMI                    279730 non-null  float64
 6   Smoker                 279730 non-null  int64  
 7   Diabetes               279730 non-null  int64  
 8   Hypertension           279730 non-null  int64  
 9   FamilyHistory          279730 non-null  int64  
 10  PhysicalActivity       279730 non-null  int64  
 11  AlcoholConsumption     279730 non-null  int64  
 12  Diet                   279730 non-null  object 
 13  StressLevel            279730 non-null  int64  
 14  Ethnicity              279730 non-nu

In [6]:
data.isnull().sum().reset_index()

Unnamed: 0,index,0
0,Age,0
1,Gender,0
2,Cholesterol,0
3,BloodPressure,0
4,HeartRate,0
5,BMI,0
6,Smoker,0
7,Diabetes,0
8,Hypertension,0
9,FamilyHistory,0


In [7]:
data.describe()

Unnamed: 0,Age,Cholesterol,BloodPressure,HeartRate,BMI,Smoker,Diabetes,Hypertension,FamilyHistory,PhysicalActivity,AlcoholConsumption,StressLevel,Income,MaxHeartRate,ST_Depression,NumberOfMajorVessels,PreviousHeartAttack,StrokeHistory
count,279730.0,279730.0,279730.0,279730.0,279730.0,279730.0,279730.0,279730.0,279730.0,279730.0,279730.0,279730.0,279730.0,279730.0,279730.0,279730.0,279730.0,279730.0
mean,56.980985,199.487338,134.532821,89.474386,29.009728,0.49867,0.502084,0.501094,0.499918,3.000383,1.99877,4.99842,109978.796704,149.474318,2.503019,1.498852,0.496372,0.501712
std,15.865561,57.660891,25.9802,17.33841,6.344983,0.499999,0.499997,0.5,0.500001,2.002371,1.41554,2.582351,51986.352592,28.857388,1.442849,1.118355,0.499988,0.499998
min,30.0,100.0,90.0,60.0,18.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,20000.0,100.0,0.0,0.0,0.0,0.0
25%,43.0,150.0,112.0,74.0,23.5,0.0,0.0,0.0,0.0,1.0,1.0,3.0,64864.0,124.0,1.26,0.0,0.0,0.0
50%,57.0,199.0,135.0,89.0,29.0,0.0,1.0,1.0,0.0,3.0,2.0,5.0,109963.5,149.0,2.51,1.0,0.0,1.0
75%,71.0,249.0,157.0,105.0,34.5,1.0,1.0,1.0,1.0,5.0,3.0,7.0,154997.0,174.0,3.75,2.0,1.0,1.0
max,84.0,299.0,179.0,119.0,40.0,1.0,1.0,1.0,1.0,6.0,4.0,9.0,199997.0,199.0,5.0,3.0,1.0,1.0


cat_col_num = []
for i in cat_col:
    if data[i].nunique()==2:
        cat_col_num.append(i)
print(len(cat_col_num))
print(cat_col_num)

feature_mapping_1 = {}
for i in cat_col_num:
    feature_mapping_1[i] = {}
    for p,q in enumerate(data[i].value_counts().keys()):
        feature_mapping_1[i][q] = p
print(feature_mapping_1)
        

In [8]:
feature_mapping=dict()
feature_mapping

{}

In [9]:
feature_mapping_1 = \
{'Gender': {'Male': 0, 'Female': 1}, \
 'Medication': {'Yes': 1, 'No': 0}, \
    'ExerciseInducedAngina': {'No': 0, 'Yes': 1}, \
        'Outcome': {'No Heart Attack': 0, 'Heart Attack': 1}}
feature_mapping.update(feature_mapping_1)
feature_mapping

{'Gender': {'Male': 0, 'Female': 1},
 'Medication': {'Yes': 1, 'No': 0},
 'ExerciseInducedAngina': {'No': 0, 'Yes': 1},
 'Outcome': {'No Heart Attack': 0, 'Heart Attack': 1}}

In [10]:
feature_mapping_2 = \
{'Diet': {'Unhealthy': 0, 'Moderate':0.5, 'Healthy': 1}, \
 'EducationLevel': {'High School': 0, 'College': 0.5, 'Postgraduate':1}, \
 'Slope': {'Upsloping': 0, 'Flat': 0.5, 'Downsloping':1}
}
feature_mapping.update(feature_mapping_2)
feature_mapping

{'Gender': {'Male': 0, 'Female': 1},
 'Medication': {'Yes': 1, 'No': 0},
 'ExerciseInducedAngina': {'No': 0, 'Yes': 1},
 'Outcome': {'No Heart Attack': 0, 'Heart Attack': 1},
 'Diet': {'Unhealthy': 0, 'Moderate': 0.5, 'Healthy': 1},
 'EducationLevel': {'High School': 0, 'College': 0.5, 'Postgraduate': 1},
 'Slope': {'Upsloping': 0, 'Flat': 0.5, 'Downsloping': 1}}

In [11]:
# Ethnicity, ChestPainType, ECGResults, Thalassemia, Residence, EmploymentStatus, 
print(data.MaritalStatus.value_counts())

MaritalStatus
Single      70093
Widowed     70059
Divorced    69807
Married     69771
Name: count, dtype: int64


In [12]:
for i,j in feature_mapping.items():
    data[i] = data[i].map(j)

In [13]:
num_col = list(data.describe().columns)
print(len(num_col))
print(num_col)

25
['Age', 'Gender', 'Cholesterol', 'BloodPressure', 'HeartRate', 'BMI', 'Smoker', 'Diabetes', 'Hypertension', 'FamilyHistory', 'PhysicalActivity', 'AlcoholConsumption', 'Diet', 'StressLevel', 'Income', 'EducationLevel', 'Medication', 'MaxHeartRate', 'ST_Depression', 'ExerciseInducedAngina', 'Slope', 'NumberOfMajorVessels', 'PreviousHeartAttack', 'StrokeHistory', 'Outcome']


In [14]:
cat_col = [i for i in data.columns if i not in num_col]
print(len(cat_col))
print(cat_col)

7
['Ethnicity', 'ChestPainType', 'ECGResults', 'Thalassemia', 'Residence', 'EmploymentStatus', 'MaritalStatus']


In [15]:
for i in cat_col:
    print(i, data[i].nunique())

Ethnicity 5
ChestPainType 4
ECGResults 3
Thalassemia 3
Residence 3
EmploymentStatus 3
MaritalStatus 4


In [16]:
feature_mapping

{'Gender': {'Male': 0, 'Female': 1},
 'Medication': {'Yes': 1, 'No': 0},
 'ExerciseInducedAngina': {'No': 0, 'Yes': 1},
 'Outcome': {'No Heart Attack': 0, 'Heart Attack': 1},
 'Diet': {'Unhealthy': 0, 'Moderate': 0.5, 'Healthy': 1},
 'EducationLevel': {'High School': 0, 'College': 0.5, 'Postgraduate': 1},
 'Slope': {'Upsloping': 0, 'Flat': 0.5, 'Downsloping': 1}}

In [17]:
with open('features.yaml','w') as file:
    yaml.dump(feature_mapping, file)

In [21]:
with open('features2.yaml','w') as file:
    yaml.dump(dict({'num_cols':num_col, 'cat_cols':cat_col}), file)

In [20]:
dict({'num_cols':num_col, 'cat_cols':cat_col})

{'num_cols': ['Age',
  'Gender',
  'Cholesterol',
  'BloodPressure',
  'HeartRate',
  'BMI',
  'Smoker',
  'Diabetes',
  'Hypertension',
  'FamilyHistory',
  'PhysicalActivity',
  'AlcoholConsumption',
  'Diet',
  'StressLevel',
  'Income',
  'EducationLevel',
  'Medication',
  'MaxHeartRate',
  'ST_Depression',
  'ExerciseInducedAngina',
  'Slope',
  'NumberOfMajorVessels',
  'PreviousHeartAttack',
  'StrokeHistory',
  'Outcome'],
 'cat_cols': ['Ethnicity',
  'ChestPainType',
  'ECGResults',
  'Thalassemia',
  'Residence',
  'EmploymentStatus',
  'MaritalStatus']}

### Data handling   --main

In [16]:
data = pd.read_csv(r"data\main.csv")
print(data.shape)
data.head(2)

(279730, 32)


Unnamed: 0,Age,Gender,Cholesterol,BloodPressure,HeartRate,BMI,Smoker,Diabetes,Hypertension,FamilyHistory,PhysicalActivity,AlcoholConsumption,Diet,StressLevel,Ethnicity,Income,EducationLevel,Medication,ChestPainType,ECGResults,MaxHeartRate,ST_Depression,ExerciseInducedAngina,Slope,NumberOfMajorVessels,Thalassemia,PreviousHeartAttack,StrokeHistory,Residence,EmploymentStatus,MaritalStatus,Outcome
0,49,Female,163,158,116,31.2,0,1,0,0,4,1,Unhealthy,5,White,132600,High School,No,Typical,ST-T abnormality,199,2.24,Yes,Flat,3,Fixed defect,0,1,Rural,Employed,Widowed,Heart Attack
1,69,Male,274,160,88,20.7,0,1,0,1,6,0,Moderate,1,Asian,179223,Postgraduate,Yes,Non-anginal,Normal,122,2.76,No,Downsloping,1,Fixed defect,0,0,Suburban,Unemployed,Divorced,Heart Attack


#### Train-Test split

In [17]:
df_train, df_test = train_test_split(data, test_size=0.2, random_state=42, stratify=data['Outcome'])

#### Pre-processing

In [18]:
feature_mapping=dict()
feature_mapping

feature_mapping_1 = \
{'Gender': {'Male': 0, 'Female': 1}, \
 'Medication': {'Yes': 1, 'No': 0}, \
    'ExerciseInducedAngina': {'No': 0, 'Yes': 1}, \
        'Outcome': {'No Heart Attack': 0, 'Heart Attack': 1}}
feature_mapping.update(feature_mapping_1)

feature_mapping_2 = \
{'Diet': {'Unhealthy': 0, 'Moderate':0.5, 'Healthy': 1}, \
 'EducationLevel': {'High School': 0, 'College': 0.5, 'Postgraduate':1}, \
 'Slope': {'Upsloping': 0, 'Flat': 0.5, 'Downsloping':1}
}
feature_mapping.update(feature_mapping_2)
feature_mapping

{'Gender': {'Male': 0, 'Female': 1},
 'Medication': {'Yes': 1, 'No': 0},
 'ExerciseInducedAngina': {'No': 0, 'Yes': 1},
 'Outcome': {'No Heart Attack': 0, 'Heart Attack': 1},
 'Diet': {'Unhealthy': 0, 'Moderate': 0.5, 'Healthy': 1},
 'EducationLevel': {'High School': 0, 'College': 0.5, 'Postgraduate': 1},
 'Slope': {'Upsloping': 0, 'Flat': 0.5, 'Downsloping': 1}}

In [19]:
df_train.replace(feature_mapping, inplace=True)

  df_train.replace(feature_mapping, inplace=True)


In [20]:
num_col = list(df_train.describe().columns)
print(len(num_col))
print(num_col)

cat_col = [i for i in df_train.columns if i not in num_col]
print(len(cat_col))
print(cat_col)

25
['Age', 'Gender', 'Cholesterol', 'BloodPressure', 'HeartRate', 'BMI', 'Smoker', 'Diabetes', 'Hypertension', 'FamilyHistory', 'PhysicalActivity', 'AlcoholConsumption', 'Diet', 'StressLevel', 'Income', 'EducationLevel', 'Medication', 'MaxHeartRate', 'ST_Depression', 'ExerciseInducedAngina', 'Slope', 'NumberOfMajorVessels', 'PreviousHeartAttack', 'StrokeHistory', 'Outcome']
7
['Ethnicity', 'ChestPainType', 'ECGResults', 'Thalassemia', 'Residence', 'EmploymentStatus', 'MaritalStatus']


In [54]:
encoder = OneHotEncoder()
df_train_ohe = pd.concat([df_train[num_col].reset_index(drop=True), \
    pd.DataFrame(encoder.fit_transform(df_train[cat_col]).toarray(), columns=encoder.get_feature_names_out()).reset_index(drop=True)], axis=1)
df_train_ohe

Unnamed: 0,Age,Gender,Cholesterol,BloodPressure,HeartRate,BMI,Smoker,Diabetes,Hypertension,FamilyHistory,PhysicalActivity,AlcoholConsumption,Diet,StressLevel,Income,EducationLevel,Medication,MaxHeartRate,ST_Depression,ExerciseInducedAngina,Slope,NumberOfMajorVessels,PreviousHeartAttack,StrokeHistory,Outcome,Ethnicity_Asian,Ethnicity_Black,Ethnicity_Hispanic,Ethnicity_Other,Ethnicity_White,ChestPainType_Asymptomatic,ChestPainType_Atypical,ChestPainType_Non-anginal,ChestPainType_Typical,ECGResults_LV hypertrophy,ECGResults_Normal,ECGResults_ST-T abnormality,Thalassemia_Fixed defect,Thalassemia_Normal,Thalassemia_Reversible defect,Residence_Rural,Residence_Suburban,Residence_Urban,EmploymentStatus_Employed,EmploymentStatus_Retired,EmploymentStatus_Unemployed,MaritalStatus_Divorced,MaritalStatus_Married,MaritalStatus_Single,MaritalStatus_Widowed
0,84,1,295,174,63,39.2,0,1,1,1,0,4,0.5,2,99877,1.0,0,189,1.38,1,0.0,2,1,0,0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
1,83,0,151,114,119,32.6,0,1,1,0,1,1,0.0,3,176759,0.0,0,129,2.31,0,0.5,0,1,1,1,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
2,34,1,123,120,81,25.1,0,1,0,1,2,0,0.5,8,197155,0.5,1,141,0.45,0,0.0,1,0,1,1,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
3,72,0,124,125,106,28.5,0,0,0,0,6,4,0.0,9,50324,0.0,0,168,3.34,0,1.0,2,1,0,1,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
4,55,1,137,143,77,18.9,1,0,0,0,1,1,1.0,2,99018,0.5,1,121,3.24,1,0.5,3,1,1,0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
223779,78,1,254,172,60,20.2,1,0,1,0,1,2,1.0,7,171657,0.0,0,144,1.24,0,1.0,1,1,1,0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
223780,30,0,134,130,95,31.7,1,0,1,1,6,3,0.0,9,58014,1.0,0,166,1.45,1,0.5,2,0,1,1,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
223781,30,0,199,107,82,20.8,1,1,1,1,6,1,0.5,4,101305,1.0,1,143,3.44,1,0.0,1,0,1,1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
223782,78,1,172,139,73,31.8,0,0,1,1,2,4,0.5,5,33670,0.5,0,126,0.31,0,0.5,2,1,0,1,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0


df_train_ohe = pd.get_dummies(df_train, columns = cat_col, drop_first=False)
df_train_ohe

In [55]:
scaler = MinMaxScaler()
df_train_scaled  = scaler.fit_transform(df_train_ohe)
df_train_scaled

array([[1.        , 1.        , 0.9798995 , ..., 0.        , 1.        ,
        0.        ],
       [0.98148148, 0.        , 0.25628141, ..., 0.        , 0.        ,
        1.        ],
       [0.07407407, 1.        , 0.11557789, ..., 0.        , 0.        ,
        1.        ],
       ...,
       [0.        , 0.        , 0.49748744, ..., 0.        , 0.        ,
        1.        ],
       [0.88888889, 1.        , 0.36180905, ..., 0.        , 0.        ,
        1.        ],
       [0.90740741, 0.        , 0.40703518, ..., 0.        , 0.        ,
        1.        ]], shape=(223784, 50))

In [56]:
featuers = scaler.get_feature_names_out()
featuers

array(['Age', 'Gender', 'Cholesterol', 'BloodPressure', 'HeartRate',
       'BMI', 'Smoker', 'Diabetes', 'Hypertension', 'FamilyHistory',
       'PhysicalActivity', 'AlcoholConsumption', 'Diet', 'StressLevel',
       'Income', 'EducationLevel', 'Medication', 'MaxHeartRate',
       'ST_Depression', 'ExerciseInducedAngina', 'Slope',
       'NumberOfMajorVessels', 'PreviousHeartAttack', 'StrokeHistory',
       'Outcome', 'Ethnicity_Asian', 'Ethnicity_Black',
       'Ethnicity_Hispanic', 'Ethnicity_Other', 'Ethnicity_White',
       'ChestPainType_Asymptomatic', 'ChestPainType_Atypical',
       'ChestPainType_Non-anginal', 'ChestPainType_Typical',
       'ECGResults_LV hypertrophy', 'ECGResults_Normal',
       'ECGResults_ST-T abnormality', 'Thalassemia_Fixed defect',
       'Thalassemia_Normal', 'Thalassemia_Reversible defect',
       'Residence_Rural', 'Residence_Suburban', 'Residence_Urban',
       'EmploymentStatus_Employed', 'EmploymentStatus_Retired',
       'EmploymentStatus_Unemploy

### Experiment 1

##### Preprocessing

In [15]:
pd.get_dummies(data=data, columns=cat_col, drop_first=False)

Unnamed: 0,Age,Cholesterol,BloodPressure,HeartRate,BMI,Smoker,Diabetes,Hypertension,FamilyHistory,PhysicalActivity,AlcoholConsumption,Diet,StressLevel,Income,EducationLevel,MaxHeartRate,ST_Depression,Slope,NumberOfMajorVessels,PreviousHeartAttack,StrokeHistory,Gender_Female,Gender_Male,Ethnicity_Asian,Ethnicity_Black,Ethnicity_Hispanic,Ethnicity_Other,Ethnicity_White,Medication_No,Medication_Yes,ChestPainType_Asymptomatic,ChestPainType_Atypical,ChestPainType_Non-anginal,ChestPainType_Typical,ECGResults_LV hypertrophy,ECGResults_Normal,ECGResults_ST-T abnormality,ExerciseInducedAngina_No,ExerciseInducedAngina_Yes,Thalassemia_Fixed defect,Thalassemia_Normal,Thalassemia_Reversible defect,Residence_Rural,Residence_Suburban,Residence_Urban,EmploymentStatus_Employed,EmploymentStatus_Retired,EmploymentStatus_Unemployed,MaritalStatus_Divorced,MaritalStatus_Married,MaritalStatus_Single,MaritalStatus_Widowed,Outcome_Heart Attack,Outcome_No Heart Attack
0,82,107,157,93,28.7,0,1,1,0,3,4,0.0,1,123643,0.0,150,0.38,0.5,3,0,1,False,True,True,False,False,False,False,True,False,True,False,False,False,True,False,False,True,False,True,False,False,False,True,False,False,True,False,False,False,True,False,True,False
1,30,179,123,87,27.3,1,1,1,1,3,3,0.5,9,86850,0.5,131,1.59,0.0,0,0,1,False,True,False,False,False,True,False,False,True,False,False,False,True,False,False,True,False,True,False,False,True,True,False,False,False,False,True,False,True,False,False,True,False
2,78,225,142,60,25.0,1,1,0,1,3,0,1.0,6,91219,1.0,115,2.66,0.5,1,0,0,True,False,False,False,False,True,False,True,False,False,False,True,False,True,False,False,True,False,False,False,True,False,False,True,False,False,True,False,False,False,True,False,True
3,78,284,160,115,18.4,0,1,1,1,4,0,1.0,8,40831,0.0,142,1.40,1.0,2,1,0,False,True,False,True,False,False,False,False,True,False,False,False,True,False,False,True,True,False,True,False,False,False,False,True,False,True,False,False,False,True,False,False,True
4,75,120,117,73,30.5,1,1,1,0,3,3,1.0,2,162215,1.0,188,3.74,0.5,2,1,0,True,False,True,False,False,False,False,False,True,False,False,False,True,False,False,True,True,False,False,False,True,True,False,False,False,False,True,False,False,False,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
261076,57,238,123,115,23.1,1,1,1,0,0,2,1.0,6,68166,0.0,152,1.31,1.0,3,1,1,False,True,False,False,False,True,False,False,True,True,False,False,False,False,False,True,False,True,False,False,True,True,False,False,True,False,False,False,False,False,True,True,False
261077,71,178,98,94,29.2,0,0,1,1,1,0,1.0,8,29019,0.5,101,2.26,0.5,2,1,1,True,False,False,False,True,False,False,False,True,False,False,False,True,True,False,False,True,False,True,False,False,False,True,False,False,False,True,True,False,False,False,True,False
261078,59,252,145,107,28.5,0,0,1,0,3,4,0.0,9,44775,0.5,198,4.33,0.0,2,1,1,True,False,False,False,False,True,False,False,True,False,False,False,True,False,False,True,False,True,False,False,True,True,False,False,True,False,False,False,False,True,False,False,True
261079,46,270,131,66,36.2,0,0,1,1,1,0,1.0,5,153284,0.0,168,1.70,0.0,3,0,1,False,True,False,True,False,False,False,True,False,True,False,False,False,False,False,True,True,False,True,False,False,False,True,False,False,False,True,False,True,False,False,True,False


In [12]:
for i in cat_col:
    if data[i].nunique()>2:
        print(data[i].value_counts())

Diet
Moderate     87524
Healthy      86962
Unhealthy    86595
Name: count, dtype: int64
Ethnicity
Asian       52520
White       52475
Black       52223
Hispanic    52019
Other       51844
Name: count, dtype: int64
EducationLevel
High School     87355
Postgraduate    86980
College         86746
Name: count, dtype: int64
ChestPainType
Asymptomatic    65695
Atypical        65260
Non-anginal     65165
Typical         64961
Name: count, dtype: int64
ECGResults
LV hypertrophy      87265
ST-T abnormality    86931
Normal              86885
Name: count, dtype: int64
Slope
Flat           87172
Downsloping    87020
Upsloping      86889
Name: count, dtype: int64
Thalassemia
Normal               87360
Fixed defect         87064
Reversible defect    86657
Name: count, dtype: int64
Residence
Suburban    87267
Rural       86982
Urban       86832
Name: count, dtype: int64
EmploymentStatus
Retired       87209
Unemployed    87170
Employed      86702
Name: count, dtype: int64
MaritalStatus
Single      654