In [None]:
import numpy as np
import pandas as pd
from google.colab import files
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,accuracy_score,plot_confusion_matrix,classification_report
from sklearn.model_selection import KFold,GridSearchCV,cross_val_score
from sklearn import preprocessing

import seaborn as sns
import matplotlib.pyplot as plt

from sklearn import svm
import xgboost as xgb
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier,RandomForestClassifier
from sklearn.linear_model  import LogisticRegression




# uploaded = files.upload()

In [None]:
df=pd.read_csv('datasets_33180_43520_heart.csv')

x=df[['age','trestbps','chol','thalach']]
scaler=preprocessing.MinMaxScaler()
x_normalized = scaler.fit_transform(x)
x_normalized=pd.DataFrame(x_normalized,columns=[['age','trestbps','chol','thalach']])
df[['age','trestbps','chol','thalach']]=x_normalized[['age','trestbps','chol','thalach']]

label=df['target']
df=df.drop('target', axis=1) 
table=pd.DataFrame(columns=['Method','Training Accuracy','Cross-Val Accuracy','Sensitivity','Specificity'])

# print(df)

In [None]:
# Heatmap

# plt.figure(figsize=(18,15))
# sns.heatmap(df.corr(), robust=True, fmt="f", cmap='RdBu_r', vmin=-1, vmax=1,annot=True)
# cor=df.corr()
# temp=abs(cor['target'])
# print(temp[temp>0.3])



In [None]:
# Grid search for Hyperparameter Tuning 

def gridsearch(clf,param_grid):
  grid=GridSearchCV(clf,param_grid)
  grid.fit(df,y)
  print(grid.best_estimator_)
  return grid.best_estimator_

In [None]:
#  Performance Evaluation

def perform(clf,a,df=df,label=label):

  # Training 
  clf.fit(df,label)
  y=clf.predict(df)

  # Cross validation
  cv_acc=cross_val_score(estimator=clf, X=df, y=label, cv=10, scoring='accuracy').mean()*100

  # Test 
  train_acc=accuracy_score(label,y)*100

  c=confusion_matrix(label,y)
  sens=c[1][1]/(c[1][1]+c[1][0])
  speci=c[0][0]/(c[0][0]+c[0][1])

  return {'Method':a,'Training Accuracy':train_acc,'Cross-Val Accuracy':cv_acc,'Sensitivity':sens,'Specificity':speci}

In [None]:
# SVM

params = {"C":(0.1, 0.5, 1, 2, 5, 10, 20,100), 
          "gamma":(0.001, 0.01, 0.1, 0.25, 0.5, 0.75, 1), 
          "kernel":('linear', 'poly', 'rbf')}

clf=gridsearch(svm.SVC(),params)
table=table.append(perform(clf,'SVC'),ignore_index=True)

SVC(C=1, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.001, kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)
83.49462365591398
85.47854785478548


In [None]:
# Logistic Regression

params = {"C": np.logspace(-4, 4, 20),
          "solver": ['liblinear','lbfgs']}


clf=gridsearch(LogisticRegression(),params)
table=table.append(perform(clf,'Logistic Regression'),ignore_index=True)
table

LogisticRegression(C=0.23357214690901212, class_weight=None, dual=False,
                   fit_intercept=True, intercept_scaling=1, l1_ratio=None,
                   max_iter=100, multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)
82.80645161290323
85.47854785478548


Unnamed: 0,Method,Training Accuracy,Cross-Val Accuracy,Sensitivity,Specificity
0,SVC,85.478548,83.494624,0.939394,0.753623
1,Logistic Regression,85.478548,82.806452,0.921212,0.775362


In [None]:
# Decision Tree

params = {"criterion":("gini", "entropy"), 
          "splitter":("best", "random"), 
          "max_depth":range(1, 20), 
          "min_samples_split":[2, 3, 4], 
          "min_samples_leaf":range(1, 20)
          }


clf=gridsearch(DecisionTreeClassifier(),params)
table=table.append(perform(clf,'Decision Tree'),ignore_index=True)
table

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=16, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=13, min_samples_split=4,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='random')
78.87096774193549
82.83828382838284


Unnamed: 0,Method,Training Accuracy,Cross-Val Accuracy,Sensitivity,Specificity
0,SVC,85.478548,83.494624,0.939394,0.753623
1,Logistic Regression,85.478548,82.806452,0.921212,0.775362
2,Decision Tree,82.838284,78.870968,0.848485,0.804348


In [None]:
# Random Forest

params = {'n_estimators': [50,500,800,1500,2000], 
          'max_features': ['auto', 'sqrt'],
          'max_depth': [10,20,30,40,50], 
          'min_samples_split': [2, 5, 10],
          'min_samples_leaf': [1, 2, 4,15], 
          'bootstrap': [True, False]
          }

          
clf=gridsearch(RandomForestClassifier(),params)
table=table.append(perform(clf,'Random Forest'),ignore_index=True)
table

In [None]:
# XGBoost

params = {'n_estimators': [100, 500, 900, 1100, 1500],
          'max_depth': [2, 3, 5, 10, 15],
          'learning_rate' : [0.05, 0.1, 0.15, 0.20], 
          'min_child_weight' : [1, 2, 3, 4], 
          'booster' : ['gbtree', 'gblinear'],
          'base_score' : [0.25, 0.5, 0.75, 0.99]
          }

clf=gridsearch(xgb(),params)
table=table.append(perform(clf,'XGBoost'),ignore_index=True)
table