## Importing DATA

In [1]:
import numpy as np
import pandas as pd

In [2]:
X_train = pd.read_csv("../data/train-data/X_train.csv")
y_train = pd.read_csv("../data/train-data/y_train.csv")
X_test = pd.read_csv("../data/test-data/X_test.csv")
y_test = pd.read_csv("../data/test-data/y_test.csv")
X_validation = pd.read_csv("../data/validation-data/X_validation.csv")
y_validation = pd.read_csv("../data/validation-data/y_validation.csv")

In [3]:
X_train.shape, y_train.shape

((3576, 10), (3576, 1))

In [4]:
X_validation.shape, y_validation.shape

((766, 10), (766, 1))

In [5]:
feature_columns = ['hypertension','heart_disease','ever_married','work_type','age','avg_glucose_level']

## Logistic Regression

In [6]:
from sklearn.linear_model import LogisticRegression

In [7]:
logreg = LogisticRegression(solver='lbfgs', max_iter=1000)
logreg.fit(X_train, y_train.values.ravel())
lg_score = logreg.score(X_validation, y_validation)
print(f"Logistic Regression Accuracy = {lg_score*100:.4f} %")

Logistic Regression Accuracy = 94.7781 %


## Decision Tree

In [8]:
from sklearn.tree import DecisionTreeClassifier

In [9]:
dtree = DecisionTreeClassifier(max_depth=4)
dtree.fit(X_train, y_train)
dt_score = dtree.score(X_validation, y_validation)
print(f"Decision Tree Accuracy = {dt_score*100:.4f} %")

Decision Tree Accuracy = 94.7781 %


## Random Forest & AdaBoost

In [10]:
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier

In [11]:
ranfor = RandomForestClassifier(max_depth=4, n_estimators=100, max_features=6)
adaboo = AdaBoostClassifier(n_estimators=10)
ranfor.fit(X_train, y_train.values.ravel())
adaboo.fit(X_train, y_train.values.ravel())
rf_score = ranfor.score(X_validation, y_validation)
ab_score = adaboo.score(X_validation, y_validation)
print(f"Random Forest Accuracy = {rf_score*100:.4f} %")
print(f"AdaBoost Accuracy = {ab_score*100:.4f} %")

Random Forest Accuracy = 94.7781 %
AdaBoost Accuracy = 94.7781 %


## K Neighbors

In [12]:
from sklearn.neighbors import KNeighborsClassifier

In [13]:
knn = KNeighborsClassifier(n_neighbors=10)
knn.fit(X_train.values, y_train.values.ravel())
kn_score = knn.score(X_validation, y_validation)
print(f"K Neighbors Accuracy = {kn_score*100:.4f} %")

K Neighbors Accuracy = 94.7781 %




## Support Vector Machine

In [14]:
from sklearn.svm import SVC 

In [15]:
sv = SVC()
sv.fit(X_train, y_train.values.ravel())
sv_score = sv.score(X_validation, y_validation)
print(f"SVM Accuracy = {sv_score*100:.4f} %")

SVM Accuracy = 94.7781 %


## Lasso Regression

In [16]:
from numpy import mean, std, absolute
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
from sklearn.linear_model import Lasso, LassoCV

lasso = Lasso(alpha=1.0)
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
scores = cross_val_score(lasso, X_train, y_train, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
scores = absolute(scores)
print(f'Mean MAE: {mean(scores):.3f} {std(scores):.3f}')

lasso.fit(X_train, y_train.values.ravel())
lasso_score = lasso.score(X_validation, y_validation)
print(f"Lasso Accuracy = {lasso_score*100:.4f} %")

Mean MAE: 0.089 0.007
Lasso Accuracy = 1.6026 %


## Model Creation

In [17]:
import pickle
model = pickle.dump(knn, open("../models and app/final_model.pkl", 'wb'))

## Prediction

In [18]:
def ValuePredictor(to_predict_list):
    #print("to-predict", to_predict_list)
    to_predict = np.array(to_predict_list).reshape(1, 10)
    loaded_model = pickle.load(open("../models and app/final_model.pkl", "rb"))
    result = loaded_model.predict(to_predict)
    return result[0]

In [19]:
a = [[1, 78, 0, 0, 0, 1, 1, 109.4, 30.8, 1],[1, 31, 0, 0, 1, 1, 0, 62.68, 35.8, 1]]
print(ValuePredictor(a[0]),ValuePredictor(a[1]))

1 0
