# **Heart Disease Prediction ( Classification)**

# Importing Libraries and Dataset

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
%matplotlib inline


import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
df = pd.read_csv('/kaggle/input/heart-disease/heart.csv')
df.head()

# Exploratory Data Analysis

In [None]:
df.describe()

In [None]:
df.info()

So, We can see that there is No null values in our dataset.

### Number of people having Heart disease

In [None]:
sns.countplot(df['HeartDisease'], data=df)

### Number of Males and Females having Heart disease

In [None]:
sns.countplot(df['HeartDisease'], hue=df['Sex'], data=df)

### Number of people in each age 

In [None]:
plt.figure(figsize=(7,7))
sns.distplot(df['Age'],kde = False, bins = 15)

### Pairplot

In [None]:
sns.pairplot(df)

### Age vs ExcerciseAngina

In [None]:
sns.barplot(x=df['ExerciseAngina'],y=df['Age'])

### Boxplot to find percentiles of Age

In [None]:
sns.boxplot(y = df['Age'], x = df['Sex'])

### Correlation between variables

In [None]:
sns.heatmap(df.corr(), annot = True, cmap = 'viridis')

### People with fasting blood sugar level and heart disease

In [None]:
sns.countplot(df['HeartDisease'], hue=df['FastingBS'], data=df)

# Feature Engineering

In [None]:
df.head()

In [None]:
def edit(col):
    if col == 'M':
        return 0
    else:
        return 1
    

df['Sex'] = df['Sex'].apply(edit)
df.head()

In [None]:
df['ChestPainType'].unique()

#### As there are only 4 categories, I am going to write a function to convert categorical values to numerical values

In [None]:
def convert(col):
    if col == 'ATA':
        return 0
    elif col == 'NAP':
        return 1
    elif col == 'ASY':
        return 2
    else:
        return 3

df['ChestPainType'] = df['ChestPainType'].apply(convert)
df.head()

In [None]:
df['RestingECG'].unique()

In [None]:
def change(col):
    if col == 'Normal':
        return 0
    elif col == 'ST':
        return 1
    else:
        return 2
    
df['RestingECG'] = df['RestingECG'].apply(change)
df.head()

In [None]:
df['ExerciseAngina'].unique()

In [None]:
df['ExerciseAngina'] = pd.get_dummies(df['ExerciseAngina'], drop_first=True)
df.head()

In [None]:
df['ST_Slope'].unique()

In [None]:
def slope_change(col):
    if col == 'Up':
        return 0
    elif col == 'Flat':
        return 1
    else:
        return 2
    
df['ST_Slope'] = df['ST_Slope'].apply(slope_change)
df.head()

In [None]:
df.info()

### Now our dataset is ready for modeling

# Training Model

In [None]:
X = df.iloc[:,:-1].values
print(X)

In [None]:
y = df.iloc[:,-1].values
print(y)

### Splitting into Training and Test set

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

### Feature Scaling

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

### Training the Kernel SVM model on the Training set

In [None]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'rbf', random_state = 0)
classifier.fit(X_train, y_train)

### Predicting the Test set results

In [None]:
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

### Making the Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

### Applying K-Fold Cross Validation

In [None]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator=classifier , X=X_train , y=y_train , cv=10)
print('Accuracy: {:.2f} %'.format(accuracies.mean()*100))
print('Standard deviation: {:.2f} %'.format(accuracies.std()*100))

### Random Search CV to find the best parameters

In [None]:
from sklearn.model_selection import GridSearchCV
param_grid = {'degree': [1,2,3,4],
          'C': np.logspace(-3, 5, 17),
          'gamma': np.logspace(-3, 5, 17)}
grid = GridSearchCV(classifier, param_grid, refit = True, verbose = 3, cv=10)
grid.fit(X_train, y_train)

In [None]:
# print best parameter after tuning
print(grid.best_params_)
# print how our model looks after hyper-parameter tuning
print(grid.best_estimator_)

### Training the model with the best parameters

In [None]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'rbf',C=1, degree=1, gamma=0.1, random_state = 0)
classifier.fit(X_train, y_train)

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

## Our Model is Ready