In [105]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier 
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC

from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report 
from sklearn.metrics import confusion_matrix


In [15]:
df = pd.read_csv('Datasets/heart.csv')

In [17]:
df.sample(5)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
69,62,0,0,124,209,0,1,163,0,0.0,2,0,2,1
125,34,0,1,118,210,0,1,192,0,0.7,2,0,2,1
77,59,1,1,140,221,0,1,164,1,0.0,2,0,2,1
103,42,1,2,120,240,1,1,194,0,0.8,0,0,3,1
154,39,0,2,138,220,0,1,152,0,0.0,1,0,2,1


In [33]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(columns=['target']), df['target'], test_size = 0.2, random_state = 40)

In [37]:
df.shape

(303, 14)

In [35]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((242, 13), (61, 13), (242,), (61,))

In [39]:
lor = LogisticRegression()
lor.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [45]:
y_predict_lor = lor.predict(X_test)
a_s_lor = accuracy_score(y_predict_lor, y_test)
print(f" Accuracy Score of Logistic Regression : {a_s_lor}")

 Accuracy Score of Logistic Regression : 0.9016393442622951


In [47]:
rfc = RandomForestClassifier()
rfc.fit(X_train, y_train)

In [49]:
y_predict_rfc = rfc.predict(X_test)
a_s_rfc = accuracy_score(y_predict_rfc, y_test)
print(f" Accuracy Score of Random Forest: {a_s_rfc}")

 Accuracy Score of Random Forest: 0.819672131147541


In [51]:
 gbc = GradientBoostingClassifier()
gbc.fit(X_train, y_train)

In [87]:
y_predict_gbc = gbc.predict(X_test)
a_s_gbc = accuracy_score(y_predict_gbc, y_test)
c_m_gbc = confusion_matrix(y_predict_gbc, y_test)
print(f" Accuracy Score of Gradient Boosting: {a_s_gbc}")
print(f" Confusion Matrix of Gradient Boosting: {pd.DataFrame(c_m_gbc)}")

 Accuracy Score of Gradient Boosting: 0.7868852459016393
 Confusion Matrix of Gradient Boosting:     0   1
0  21   8
1   5  27


In [57]:
svc = SVC ()
svc.fit(X_train, y_train)

In [61]:
y_predict_svc = svc.predict(X_test)
a_s_svc = accuracy_score(y_predict_svc, y_test)
print(f" Accuracy Score of SVC: {a_s_svc}")

 Accuracy Score of SVC: 0.6557377049180327


In [69]:
np.mean(cross_val_score(RandomForestClassifier(), X_train, y_train, cv = 5, scoring= 'accuracy'))

0.8183673469387754

In [79]:
# np.mean(cross_val_score(LogisticRegression(), X_train, y_train, cv = 5, scoring= 'accuracy'))

In [81]:
np.mean(cross_val_score(GradientBoostingClassifier(), X_train, y_train, cv = 5, scoring= 'accuracy'))

0.8058673469387756

HyperParameter Tuning - GridSearchCV

In [93]:
param_grid = {'n_estimators' : [5, 10 , 50, 100], 
              'max_features' : [0.2, 0.3, 0.4, 0.6, 1],
              'max_depth' : [2, 4, 6] ,
              'max_samples' : [0.2, 0.5, 0.7, 1.0]}
              

In [95]:
grid_search = GridSearchCV(RandomForestClassifier(), param_grid=param_grid, cv = 10, verbose=2, n_jobs=-1)

In [97]:
grid_search.fit(X_train, y_train)

Fitting 10 folds for each of 240 candidates, totalling 2400 fits
[CV] END max_depth=2, max_features=0.2, max_samples=0.2, n_estimators=5; total time=   0.0s
[CV] END max_depth=2, max_features=0.2, max_samples=0.2, n_estimators=10; total time=   0.0s
[CV] END max_depth=2, max_features=0.2, max_samples=0.2, n_estimators=50; total time=   0.1s
[CV] END max_depth=2, max_features=0.2, max_samples=0.2, n_estimators=50; total time=   0.1s
[CV] END max_depth=2, max_features=0.2, max_samples=0.2, n_estimators=100; total time=   0.3s
[CV] END max_depth=2, max_features=0.2, max_samples=0.5, n_estimators=10; total time=   0.0s
[CV] END max_depth=2, max_features=0.2, max_samples=0.5, n_estimators=50; total time=   0.1s
[CV] END max_depth=2, max_features=0.2, max_samples=0.5, n_estimators=50; total time=   0.1s
[CV] END max_depth=2, max_features=0.2, max_samples=0.7, n_estimators=5; total time=   0.0s
[CV] END max_depth=2, max_features=0.2, max_samples=0.7, n_estimators=5; total time=   0.0s
[CV] EN

In [99]:
grid_search.best_params_

{'max_depth': 2, 'max_features': 0.3, 'max_samples': 0.5, 'n_estimators': 10}

In [101]:
grid_search.best_score_

0.8596666666666668

Hyperparameter tuning - Randomized CV

In [111]:
grid_search_ramdomized = RandomizedSearchCV(RandomForestClassifier(), param_distributions=param_grid, cv = 10, verbose = 2, n_jobs=-1)

In [113]:
grid_search_ramdomized.fit(X_train, y_train)

Fitting 10 folds for each of 10 candidates, totalling 100 fits

[CV] END max_depth=4, max_features=0.6, max_samples=0.2, n_estimators=50; total time=   0.2s
[CV] END max_depth=4, max_features=0.6, max_samples=0.2, n_estimators=50; total time=   0.2s
[CV] END max_depth=4, max_features=0.6, max_samples=0.5, n_estimators=5; total time=   0.0s
[CV] END max_depth=4, max_features=0.6, max_samples=0.5, n_estimators=5; total time=   0.0s
[CV] END max_depth=4, max_features=0.6, max_samples=0.5, n_estimators=5; total time=   0.0s
[CV] END max_depth=4, max_features=0.6, max_samples=0.5, n_estimators=5; total time=   0.0s
[CV] END max_depth=4, max_features=0.6, max_samples=0.5, n_estimators=5; total time=   0.0s
[CV] END max_depth=4, max_features=0.6, max_samples=0.5, n_estimators=5; total time=   0.0s
[CV] END max_depth=4, max_features=0.6, max_samples=0.5, n_estimators=5; total time=   0.0s
[CV] END max_depth=4, max_features=0.6, max_samples=0.5, n_estimators=5; total time=   0.0s
[CV] END max_d

In [115]:
grid_search_ramdomized.best_params_

{'n_estimators': 100, 'max_samples': 0.2, 'max_features': 0.2, 'max_depth': 6}

In [117]:
grid_search_ramdomized.best_score_

0.8431666666666666