<a href="https://colab.research.google.com/github/Ismail-Amodu/Cross-Validation-In-Machine-Learning/blob/main/Cross_Validation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**COMPARING DIFFERENT MODELS**

In [None]:
# Importing the dependencies
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score

In [None]:
# Importing models
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

**DATA COLLECTION AND PROCESSING**

In [None]:
# Loading the csv dataset into pandas dataframe
heart_data = pd.read_csv('/content/heart_disease.csv')

In [None]:
# priinting the first five rows of the dataframe
heart_data.head()

In [None]:
# priinting the last five rows of the dataframe
heart_data.tail()

In [None]:
# getting the number of rows and column in the dataframe
heart_data.shape

In [None]:
# Getting some info about the data
heart_data.info()

In [None]:
# Checking for missing values
heart_data.isnull().sum()

In [None]:
# Obtaining the statistical measures of the data
heart_data.describe()

In [None]:
# Checking the distribution of the target variable to see if there is almost equal values
heart_data['HeartDisease'].value_counts()

Note: 1 denotes defective heart and 0 denotes healthy heart

In [None]:
# Splitting features and target
X = heart_data.drop(columns='HeartDisease', axis=1)
Y = heart_data['HeartDisease']
print(X)
print(Y)

In [None]:
# Splitting data into training data and test data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=3)
print(X.shape, X_train.shape, X_test.shape)

**MODEL TRAINING: Comparing the models performances**

In [None]:
# list of models
models = [LogisticRegression(max_iter=1000), SVC(kernel='linear'), KNeighborsClassifier(), RandomForestClassifier()]
def compare_models_train_test():
    for model in models:
        # Models Training
        model.fit(X_train, Y_train)

        # Models Evaluation
        test_data_prediction = accuracy_score(X_test)

        accuracy = accuracy_score(Y_test, test_data_prediction)

        print('Accuracy score of the ', model, '=', accuracy)

In [None]:
compare_models_train_test()

**CROSS VALIDATION**

In [None]:
# Logistic regression
cv_score_lr = cross_val_score(LogisticRegression(max_iter=1000), X, Y, cv=5)
print(cv_score_lr)

mean_accuracy_lr = sum(cv_score_lr)/len(cv_score_lr)
mean_accuracy_lr = mean_accuracy_lr*100
mean_accuracy_lr = round(mean_accuracy_lr, 2)
print(mean_accuracy_lr)

In [None]:
# Support Vector Classifier
cv_score_svc = cross_val_score(LogisticRegression(max_iter=1000), X, Y, cv=5)
print(cv_score_svc)

mean_accuracy_svc = sum(cv_score_svc)/len(cv_score_svc)
mean_accuracy_svc = mean_accuracy_svc*100
mean_accuracy_svc = round(mean_accuracy_svc, 2)
print(mean_accuracy_svc)