In [1]:
# NeededLibraries
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from lightgbm import LGBMClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler


In [2]:
df = pd.read_csv('heart.csv')
df.head()

Unnamed: 0,age,sex,cp,trtbps,chol,fbs,restecg,thalachh,exng,oldpeak,slp,caa,thall,output
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       303 non-null    int64  
 1   sex       303 non-null    int64  
 2   cp        303 non-null    int64  
 3   trtbps    303 non-null    int64  
 4   chol      303 non-null    int64  
 5   fbs       303 non-null    int64  
 6   restecg   303 non-null    int64  
 7   thalachh  303 non-null    int64  
 8   exng      303 non-null    int64  
 9   oldpeak   303 non-null    float64
 10  slp       303 non-null    int64  
 11  caa       303 non-null    int64  
 12  thall     303 non-null    int64  
 13  output    303 non-null    int64  
dtypes: float64(1), int64(13)
memory usage: 33.3 KB


In [4]:
df.columns

Index(['age', 'sex', 'cp', 'trtbps', 'chol', 'fbs', 'restecg', 'thalachh',
       'exng', 'oldpeak', 'slp', 'caa', 'thall', 'output'],
      dtype='object')

In [5]:
df['output']

0      1
1      1
2      1
3      1
4      1
      ..
298    0
299    0
300    0
301    0
302    0
Name: output, Length: 303, dtype: int64

In [6]:
df.drop_duplicates(keep='first',inplace=True)

## Splitting the data

In [7]:
X,y = df.drop(['output'], axis=1), df['output']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0, stratify=y)

In [9]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Logistic Regression

In [10]:
logmodel = LogisticRegression()
logmodel.fit(X_train_scaled,y_train)

LogisticRegression()

In [11]:
pred_log = logmodel.predict(X_test_scaled)

In [12]:
print(classification_report(y_test,pred_log))

              precision    recall  f1-score   support

           0       0.86      0.86      0.86        28
           1       0.88      0.88      0.88        33

    accuracy                           0.87        61
   macro avg       0.87      0.87      0.87        61
weighted avg       0.87      0.87      0.87        61



In [13]:
r2_score = logmodel.score(X_test_scaled,y_test)
print(r2_score)

0.8688524590163934


In [14]:
cm = confusion_matrix(y_test, pred_log)
print(cm)

[[24  4]
 [ 4 29]]


In [15]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
grid={'C': [100, 10, 1.0, 0.1, 0.01], 
 'penalty': ['l2'],
 'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
  }
logreg=LogisticRegression()
logreg_cv=GridSearchCV(logreg,grid,cv=10)
logreg_cv.fit(X_train_scaled,y_train)

print("tuned hpyerparameters :(best parameters) ",logreg_cv.best_params_)

tuned hpyerparameters :(best parameters)  {'C': 0.1, 'penalty': 'l2', 'solver': 'newton-cg'}


In [16]:
logreg2=LogisticRegression(C= 0.01, penalty='l2', solver='liblinear')
logreg2.fit(X_train_scaled,y_train)
print("score",logreg2.score(X_test_scaled,y_test))

score 0.8688524590163934


## LightGBM Classifier

In [17]:
mod = LGBMClassifier()
mod.fit(X_train_scaled, y_train)

LGBMClassifier()

In [18]:
pred_LGBM = mod.predict(X_test_scaled)

In [19]:
print(classification_report(y_test,pred_LGBM))

              precision    recall  f1-score   support

           0       0.80      0.71      0.75        28
           1       0.78      0.85      0.81        33

    accuracy                           0.79        61
   macro avg       0.79      0.78      0.78        61
weighted avg       0.79      0.79      0.79        61



In [20]:
round(accuracy_score(y_test, pred_LGBM), 2)

0.79

In [21]:
cm = confusion_matrix(y_test, pred_LGBM)
print(cm)

[[20  8]
 [ 5 28]]


In [22]:
param_grid = {'C': [0.1,1, 10, 100], 'gamma': [1,0.1,0.01,0.001],'kernel': ['rbf', 'poly', 'sigmoid']}

In [24]:
from sklearn.svm import SVC
param_grid = {'C': [0.1, 1, 10, 100, 1000],
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['rbf', 'poly', 'sigmoid']}
 
grids = GridSearchCV(SVC(), param_grid, refit = True, verbose = 10)
 
# fitting the model for grid search
grids.fit(X_train_scaled, y_train)
print("tuned hpyerparameters :(best parameters) ",grids.best_params_)

Fitting 5 folds for each of 75 candidates, totalling 375 fits
[CV 1/5; 1/75] START C=0.1, gamma=1, kernel=rbf.................................
[CV 1/5; 1/75] END ..C=0.1, gamma=1, kernel=rbf;, score=0.551 total time=   0.0s
[CV 2/5; 1/75] START C=0.1, gamma=1, kernel=rbf.................................
[CV 2/5; 1/75] END ..C=0.1, gamma=1, kernel=rbf;, score=0.542 total time=   0.0s
[CV 3/5; 1/75] START C=0.1, gamma=1, kernel=rbf.................................
[CV 3/5; 1/75] END ..C=0.1, gamma=1, kernel=rbf;, score=0.542 total time=   0.0s
[CV 4/5; 1/75] START C=0.1, gamma=1, kernel=rbf.................................
[CV 4/5; 1/75] END ..C=0.1, gamma=1, kernel=rbf;, score=0.542 total time=   0.0s
[CV 5/5; 1/75] START C=0.1, gamma=1, kernel=rbf.................................
[CV 5/5; 1/75] END ..C=0.1, gamma=1, kernel=rbf;, score=0.542 total time=   0.0s
[CV 1/5; 2/75] START C=0.1, gamma=1, kernel=poly................................
[CV 1/5; 2/75] END .C=0.1, gamma=1, kernel=poly

In [25]:
from sklearn.pipeline import make_pipeline

svc = SVC(C= 1000, gamma= 0.001, kernel= 'sigmoid')
svc.fit(X_train_scaled,y_train)
predictions1 = svc.predict(X_test_scaled)
scores1 = accuracy_score(y_test,predictions1)
scores1


0.8688524590163934

In [26]:
svc = SVC()
svc.fit(X_train_scaled,y_train)
predictions1 = svc.predict(X_test_scaled)
scores1 = accuracy_score(y_test,predictions1)
scores1

0.8524590163934426

In [None]:
# model = LGBMClassifier()
# grid = {
#     'n_estimators': [400, 700, 1000],
#     'colsample_bytree': [0.7, 0.8],
#     'max_depth': [15,20,25],
#     'num_leaves': [50, 100, 200],
#     'reg_alpha': [1.1, 1.2, 1.3],
#     'reg_lambda': [1.1, 1.2, 1.3],
#     'min_split_gain': [0.3, 0.4],
#     'subsample': [0.7, 0.8, 0.9],
#     'subsample_freq': [20]
# }
# mod_l_cv=GridSearchCV(model,grid,cv=10)
# mod_l_cv.fit(X_train_scaled,y_train)
# print("tuned hpyerparameters :(best parameters) ",logreg_cv.best_params_)

KeyboardInterrupt: 

In [None]:
mod_l_cv2=LogisticRegression(C= 0.01, penalty='l2', solver='liblinear')
mod_l_cv.fit(X_train_scaled,y_train)
print("score",mod_l_cv.score(X_test_scaled,y_test))

In [None]:
param_grid = {'C': [0.1, 1, 10, 100, 1000],
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['rbf']}
 
grids = GridSearchCV(SVC(), param_grid, refit = True, verbose = 3)
 
# fitting the model for grid search
grids.fit(X_train_scaled, y_train)
print("tuned hpyerparameters :(best parameters) ",grids.best_params_)

Fitting 5 folds for each of 25 candidates, totalling 125 fits
[CV 1/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.561 total time=   0.0s
[CV 2/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.537 total time=   0.0s
[CV 3/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.537 total time=   0.0s
[CV 4/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.550 total time=   0.0s
[CV 5/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.550 total time=   0.0s
[CV 1/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.854 total time=   0.0s
[CV 2/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.707 total time=   0.0s
[CV 3/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.780 total time=   0.0s
[CV 4/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.725 total time=   0.0s
[CV 5/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.700 total time=   0.0s
[CV 1/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.732 total time=   0.0s
[CV 2/5] END .....C=0.1, gamma=0.01, kernel=rbf