In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb



### Load The Data

In [2]:
df = pd.read_csv('framingham.csv')

### EDA

In [3]:
df.head()

Unnamed: 0,male,age,education,currentSmoker,cigsPerDay,BPMeds,prevalentStroke,prevalentHyp,diabetes,totChol,sysBP,diaBP,BMI,heartRate,glucose,TenYearCHD
0,1,39,4.0,0,0.0,0.0,0,0,0,195.0,106.0,70.0,26.97,80.0,77.0,0
1,0,46,2.0,0,0.0,0.0,0,0,0,250.0,121.0,81.0,28.73,95.0,76.0,0
2,1,48,1.0,1,20.0,0.0,0,0,0,245.0,127.5,80.0,25.34,75.0,70.0,0
3,0,61,3.0,1,30.0,0.0,0,1,0,225.0,150.0,95.0,28.58,65.0,103.0,1
4,0,46,3.0,1,23.0,0.0,0,0,0,285.0,130.0,84.0,23.1,85.0,85.0,0


In [4]:
df.isna().sum()

male                 0
age                  0
education          105
currentSmoker        0
cigsPerDay          29
BPMeds              53
prevalentStroke      0
prevalentHyp         0
diabetes             0
totChol             50
sysBP                0
diaBP                0
BMI                 19
heartRate            1
glucose            388
TenYearCHD           0
dtype: int64

In [5]:
df = df.dropna()

In [6]:
df.isna().sum()

male               0
age                0
education          0
currentSmoker      0
cigsPerDay         0
BPMeds             0
prevalentStroke    0
prevalentHyp       0
diabetes           0
totChol            0
sysBP              0
diaBP              0
BMI                0
heartRate          0
glucose            0
TenYearCHD         0
dtype: int64

In [7]:
df.corr()

Unnamed: 0,male,age,education,currentSmoker,cigsPerDay,BPMeds,prevalentStroke,prevalentHyp,diabetes,totChol,sysBP,diaBP,BMI,heartRate,glucose,TenYearCHD
male,1.0,-0.024412,0.017729,0.206114,0.330322,-0.052124,-0.002312,0.001424,0.013819,-0.070321,-0.045358,0.051872,0.073111,-0.115285,0.00294,0.091688
age,-0.024412,1.0,-0.159499,-0.210771,-0.189295,0.134732,0.050893,0.306239,0.109092,0.268252,0.388267,0.208283,0.137511,-0.002722,0.118349,0.233983
education,0.017729,-0.159499,1.0,0.025251,0.01379,-0.013739,-0.030386,-0.078828,-0.039627,-0.013522,-0.12436,-0.058059,-0.137555,-0.064214,-0.031998,-0.063279
currentSmoker,0.206114,-0.210771,0.025251,1.0,0.773913,-0.051923,-0.03815,-0.108078,-0.041849,-0.051034,-0.134428,-0.115955,-0.159821,0.050841,-0.053242,0.019165
cigsPerDay,0.330322,-0.189295,0.01379,0.773913,1.0,-0.046504,-0.036286,-0.07046,-0.036961,-0.0304,-0.094781,-0.056746,-0.087395,0.06403,-0.053726,0.052014
BPMeds,-0.052124,0.134732,-0.013739,-0.051923,-0.046504,1.0,0.113125,0.26291,0.049066,0.094083,0.271263,0.19963,0.105642,0.012889,0.054232,0.089152
prevalentStroke,-0.002312,0.050893,-0.030386,-0.03815,-0.036286,0.113125,1.0,0.066057,0.009625,0.012736,0.06107,0.055834,0.036496,-0.017019,0.016061,0.048366
prevalentHyp,0.001424,0.306239,-0.078828,-0.108078,-0.07046,0.26291,0.066057,1.0,0.080556,0.166655,0.697675,0.617734,0.302949,0.146818,0.086942,0.181387
diabetes,0.013819,0.109092,-0.039627,-0.041849,-0.036961,0.049066,0.009625,0.080556,1.0,0.048451,0.102552,0.050686,0.089009,0.060984,0.61482,0.093431
totChol,-0.070321,0.268252,-0.013522,-0.051034,-0.0304,0.094083,0.012736,0.166655,0.048451,1.0,0.219925,0.174422,0.121056,0.093053,0.049884,0.091338


### split data into x and y

In [8]:
x = df[['age','prevalentHyp','sysBP','diaBP','glucose']]
y = df['TenYearCHD']

### split data into train and test dataset

In [9]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=123456)

### create modules 

In [10]:
def create_logistic_regression_model():
    from sklearn.linear_model import LogisticRegressionCV

    model_lg = LogisticRegressionCV(max_iter=1000)
    model_lg.fit(x_train, y_train)
    return model_lg, 'Logistic Regression'

In [11]:
def create_naive_bayes_model():
    from sklearn.naive_bayes import GaussianNB

    model_nb = GaussianNB()
    model_nb.fit(x_train, y_train)
    return model_nb, 'Naive Bayes'

In [12]:
def create_knn_model():
    from sklearn.neighbors import KNeighborsClassifier

    model_knn = KNeighborsClassifier()
    model_knn.fit(x_train, y_train)
    return model_knn, 'KNN'

In [13]:
def create_svm_model():
    from sklearn.svm import SVC

    model_svm = SVC()
    model_svm.fit(x_train, y_train)
    return model_svm, 'SVM'

In [14]:
models = [create_logistic_regression_model(), create_naive_bayes_model(), create_knn_model(), create_svm_model()]

### model evaluation

In [15]:
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
def model_1(mode, model_name):
    y_pred = model.predict(x_test)
    y_true = y_test
    
    accuracy = accuracy_score(y_true, y_pred) 

    precision = precision_score(y_true, y_pred)

    recall = recall_score(y_true, y_pred)

    f1 = f1_score(y_true, y_pred)
    
    return model_name, accuracy, precision, recall, f1

In [16]:
evaluate_rows = []
for (model, model_name) in models:
    evaluate_rows.append(model_1(model, model_name))

df_result = pd.DataFrame(evaluate_rows, columns=['Algorithm','Accuracy','Precision','Recall', 'f1'])
df_result

Unnamed: 0,Algorithm,Accuracy,Precision,Recall,f1
0,Logistic Regression,0.852459,0.6,0.022059,0.042553
1,Naive Bayes,0.827322,0.385417,0.272059,0.318966
2,KNN,0.836066,0.325,0.095588,0.147727
3,SVM,0.851366,0.5,0.007353,0.014493
