<h3 style='color:pink'>K-Fold Cross Validation Implementation</h3>

In [1]:
# importing dependencies
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
import warnings
warnings.simplefilter('ignore')

In [2]:
# importing models 
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

In [3]:
# data collection and data preprocessing
df = pd.read_csv('heart.csv')

In [4]:
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [5]:
df.shape

(303, 14)

In [6]:
df.isnull().sum()

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

In [7]:
df.duplicated().sum()

1

In [8]:
df['target'].value_counts()

1    165
0    138
Name: target, dtype: int64

In [9]:
# segregating ther features into independent and dependent
X = df.drop(columns='target',axis=1)
y = df['target']

In [10]:
# train test and split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,stratify=y,random_state=3)

In [11]:
# model training
models = {
    'logreg':LogisticRegression(),
    'svc':SVC(kernel='linear'),
    'knearest':KNeighborsClassifier(),
    'randforest':RandomForestClassifier()
}

In [12]:
algorithm = [LogisticRegression(max_iter=100),SVC(kernel='linear'),KNeighborsClassifier(),RandomForestClassifier()]

In [13]:
def compare_models_performance():
    for ml in algorithm:
        ml.fit(X_train,y_train)
        y_pred = ml.predict(X_test)
        score = accuracy_score(y_test,y_pred)
        print(f'Accuracy Score of model {ml} =',score*100)

In [14]:
compare_models_performance()

Accuracy Score of model LogisticRegression() = 80.32786885245902
Accuracy Score of model SVC(kernel='linear') = 77.04918032786885
Accuracy Score of model KNeighborsClassifier() = 65.57377049180327
Accuracy Score of model RandomForestClassifier() = 78.68852459016394


<h3 style='color:lightblue'>Performing Cross Validation Score</h3>

#### Logistic Regression Model

In [22]:
cv_score_lr = cross_val_score(LogisticRegression(max_iter=1000),X,y,cv=5)
cv_score_lr

array([0.80327869, 0.86885246, 0.85245902, 0.86666667, 0.75      ])

In [24]:
cv_score_lr.mean()

0.8282513661202187

In [25]:
sum = 0 
for i in cv_score_lr:
    sum += i
mean_accuracy = sum/len(cv_score_lr)

In [32]:
round(mean_accuracy * 100,2)

82.83

In [52]:
algorithm = [LogisticRegression(max_iter=100),SVC(kernel='linear'),KNeighborsClassifier(),RandomForestClassifier()]
score_list = []
def model_train_cv():
    for model in algorithm:
        cv_score = cross_val_score(model,X,y,cv=5)
        mean_accuracy = cv_score.mean()*100
        mean_accuracy = round(mean_accuracy,2)
        score_list.append(mean_accuracy)
        print(f'Cross validation accuracy {model}: {mean_accuracy}')
    max = score_list[0]
    for score in score_list:
        if score > max:
            max = score
    print(f'Best Score is {max}')

In [53]:
model_train_cv()

Cross validation accuracy LogisticRegression(): 83.15
Cross validation accuracy SVC(kernel='linear'): 82.83
Cross validation accuracy KNeighborsClassifier(): 64.39
Cross validation accuracy RandomForestClassifier(): 81.49
Best Score is 83.15
