In [None]:
import pandas as pd

dfn = pd.read_csv("framingham.csv")

In [None]:
dfn.describe()

In [None]:
dfn.shape

In [None]:
df = dfn.dropna()
df.count

In [None]:
target = df['TenYearCHD']
df = df.drop('TenYearCHD',axis=1)
df

## Pre proccess

In [None]:
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test = train_test_split(df , target,test_size=0.2 ,random_state= 42)

In [None]:
print(f"Features => train  {x_train.shape} -- test {x_test.shape}")
print(f"Label =>  train {y_train.shape} -- test {y_test.shape}")

In [None]:
x_train.shape , x_test.shape

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,1))
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

# Metric

In [None]:
from sklearn.metrics import recall_score,precision_score,accuracy_score

def calculate_metrics(y_train,y_test,y_pred_train,y_pred_test):
    acc_train = accuracy_score(y_train,y_pred_train)
    acc_test = accuracy_score(y_test,y_pred_test)
    
    prec_train = precision_score(y_train,y_pred_train)
    prec_test = precision_score(y_test,y_pred_test)
    
    rec_train = recall_score(y_train,y_pred_train)
    rec_test = recall_score(y_test,y_pred_test)
    
    print(f"train acc : {acc_train} test acc : {acc_test}")
    print(f"train prec : {prec_train} test prec : {prec_test}")
    print(f"train rec : {rec_train} test rec : {rec_test}")
    return acc_test,acc_train,prec_test,rec_test

### KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier

knn=KNeighborsClassifier(n_neighbors=10)
knn.fit(x_train,y_train)

In [None]:
y_train_pred = knn.predict(x_train)
y_test_pred = knn.predict(x_test)

knn_train_acc,knn_test_acc,knn_perc,knn_rec = calculate_metrics(y_train,y_test,y_train_pred,y_test_pred)


# decision tree

In [None]:
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier( max_depth=4,           
    min_samples_split=20,  
    min_samples_leaf=10,   
    random_state=42)

dt.fit(x_train,y_train)

In [None]:
y_train_pred = dt.predict(x_train)
y_test_pred = dt.predict(x_test)

dt_train_acc,dt_test_acc,dt_perc,dt_rec =  calculate_metrics(y_train,y_test,y_train_pred,y_test_pred)


## Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=500,max_depth=64,min_samples_split=8)

rf.fit(x_train,y_train)

In [None]:
y_train_pred = rf.predict(x_train)
y_test_pred = rf.predict(x_test)
rf_train_acc,rf_test_acc,rf_perc,rf_rec = calculate_metrics(y_train,y_test,y_train_pred,y_test_pred)

## Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression()
lr.fit(x_train,y_train)

In [None]:
y_train_pred = lr.predict(x_train)
y_test_pred = lr.predict(x_test)
lr_train_acc,lr_test_acc,lr_perc,lr_rec = calculate_metrics(y_train,y_test,y_train_pred,y_test_pred)

## ANN

In [None]:
from sklearn.neural_network import MLPClassifier

ann = MLPClassifier(hidden_layer_sizes=1024,max_iter=1024,activation='relu',solver='adam', batch_size=64)

ann.fit(x_train,y_train)

In [None]:
y_train_pred = ann.predict(x_train)
y_test_pred = ann.predict(x_test)
ann_train_acc,ann_test_acc,ann_perc,ann_rec = calculate_metrics(y_train,y_test,y_train_pred,y_test_pred)

## Comparison

In [None]:
import matplotlib.pyplot as plt
acc_train = [knn_train_acc,dt_train_acc,rf_train_acc,lr_train_acc,ann_train_acc]
title = ["KNN","DT","RF","LR","ANN"]

colors = ["black",'red','yellow','blue','pink']
plt.grid()
plt.bar(title,acc_train,color=colors)

In [None]:
import matplotlib.pyplot as plt
acc_test =  [knn_train_acc,dt_train_acc,rf_train_acc,lr_train_acc,ann_train_acc]
title = ["KNN","DT","RF","LR","ANN"]

colors = ["black",'red','yellow','blue','pink']
plt.grid()
plt.bar(title,acc_test,color=colors)

In [None]:
p = [knn_perc,dt_perc,rf_perc,lr_perc,ann_perc]
title = ["KNN","DT","RF","LR","ANN"]

colors = ["black",'red','green','blue','pink']
plt.grid()
plt.bar(title,p,color=colors)

In [None]:
r = [knn_rec,dt_rec,rf_rec,lr_rec,ann_rec]
title = ["KNN","DT","RF","LR","ANN"]

colors = ['yellow','orange','green','blue','pink']
plt.grid()
plt.bar(title,r,color=colors)

# ANN is Winner