# Heart Disease Classification

In this notebook, we will look at the inference part of the heart dieases classification solution

In [1]:
# Import the Modules

import pandas as pd, numpy as np
from sklearn import preprocessing
import joblib
import os

## Get Inference Data

In [8]:
# Select the parent directory -
os.chdir('d:\\Niranjan\\Personal Docs\\UpGrad\\Deployment\\Docker+FastAPI\\Heart Disease Prediction\\Niranjan_Heart_dieases_calssification\\')

In [20]:
# in real-time use cases, this method should be replaced with live flowing data
def get_inference_data(file_path):
    data = pd.read_csv(file_path)
    data.drop_duplicates(subset=None, inplace=True)
    data.duplicated().any()
    inference_df = data.sample(frac=0.1, random_state=2)
    return inference_df[inference_df.columns.drop('target')], inference_df['target']

PATH = r"Data/heart.csv"
inference_data, labels = get_inference_data(PATH)

In [21]:
inference_data.columns

Index(['age', 'sex', 'chest_pain_type', 'resting_bp', 'cholestoral',
       'fasting_blood_sugar', 'restecg', 'max_hr', 'exang', 'oldpeak', 'slope',
       'num_major_vessels', 'thal'],
      dtype='object')

## Apply Same Pre-Processing Steps

In [57]:
# apply same pre-processing and feature engineering techniques as applied during the training process
def encode_features(df, features):
       encoded_df = pd.DataFrame(columns=['age', 'sex', 'resting_bp', 'cholestoral', 'fasting_blood_sugar',
              'max_hr', 'exang', 'oldpeak', 'num_major_vessels', 'thal_1',
              'thal_2', 'thal_3', 'slope_1', 'slope_2', 'chest_pain_type_1',
              'chest_pain_type_2', 'chest_pain_type_3', 'restecg_1', 'restecg_2'])
       df = pd.get_dummies(data=df, columns=features, dtype=int)
       
       # Implement these steps to prevent dimension mismatch during inference
       for f in encoded_df.columns:
              if f in df.columns:
                     encoded_df[f]=df[f]
        
       encoded_df.fillna(0,inplace=True)
       
       return encoded_df

def normalize_data(df,features):
       scaler = preprocessing.MinMaxScaler()
       df[features] = scaler.fit_transform(df[features])
       return df

def apply_pre_processing(data,**kwargs):
       encoded = encode_features(data,kwargs["features_encoding"])
       processed_data = normalize_data(encoded, kwargs["features_scaling"])
       return processed_data      
       


In [63]:
# Applying the pre-processing steps
features_encoding = ['thal', 'slope', 'chest_pain_type', 'restecg']
features_scaling = ['age', 'resting_bp', 'cholestoral', 'max_hr', 'oldpeak', 'num_major_vessels']
processed_inference_data = apply_pre_processing(inference_data, features_scaling=features_scaling, features_encoding=features_encoding)
processed_inference_data.head()

Unnamed: 0,age,sex,resting_bp,cholestoral,fasting_blood_sugar,max_hr,exang,oldpeak,num_major_vessels,thal_1,thal_2,thal_3,slope_1,slope_2,chest_pain_type_1,chest_pain_type_2,chest_pain_type_3,restecg_1,restecg_2
99,0.514286,1,0.428571,0.39548,1,0.902174,0,0.0,1.0,0,1,0,0,1,0,1,0,0,0
296,0.8,0,0.342857,0.118644,0,0.5,1,0.0,0.0,0,1,0,1,0,0,0,0,1,0
89,0.657143,0,0.0,0.40678,0,0.347826,0,0.344828,0.0,0,1,0,1,0,0,0,0,0,0
30,0.171429,0,0.071429,0.124294,0,0.847826,0,0.0,0.333333,0,1,0,0,1,1,0,0,1,0
297,0.685714,1,0.914286,0.0,1,0.0,0,0.344828,0.666667,1,0,0,1,0,0,0,0,0,0


## Load Saved Model

In [64]:
model = joblib.load(r"models/Niranjan_Model_01_adaboost.joblib")
model

## Prediction on Inference data

In [66]:
labels_pred = model.predict(processed_inference_data)
labels_pred

## Score Checking on Prediction

In [67]:
from sklearn.metrics import accuracy_score
accuracy_score(labels, labels_pred)

0.9333333333333333