In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split,GridSearchCV,cross_val_score
import seaborn as sns
from sklearn.metrics import accuracy_score,mean_squared_error,classification_report,confusion_matrix,precision_score,recall_score,roc_curve,auc
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.ensemble import ExtraTreesClassifier,RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC 
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Import data
data=pd.read_csv('health care diabetes.csv')
data

In [None]:
data['Outcome'].value_counts()

In [None]:
x=data.drop(['Outcome','BloodPressure','SkinThickness'],axis=1)
x.head()

In [None]:
y=data['Outcome']

In [None]:
trainx,testx,trainy,testy=train_test_split(x,y,test_size=0.20,random_state=44)

In [None]:
#print("Before OverSampling, counts of label '1': {}".format(sum(trainy == 1)))
#print("Before OverSampling, counts of label '0': {} \n".format(sum(trainy == 0)))
  
from imblearn.over_sampling import SMOTE
sm = SMOTE(random_state =63)
trainx_res,trainy_res = sm.fit_resample(trainx,trainy.ravel())  
#print('After OverSampling, the shape of train_X: {}'.format(trainx_res.shape))
#print('After OverSampling, the shape of train_y: {} \n'.format(trainy_res.shape))
  

#print("After OverSampling, counts of label '1': {}".format(sum(trainy_res == 1)))
#print("After OverSampling, counts of label '0': {}".format(sum(trainy_res == 0)))

## LogisticRegression

In [None]:
logreg=LogisticRegression()

In [None]:
logreg.fit(trainx_res,trainy_res)

In [None]:
logreg_test_pred=logreg.predict(testx)
logreg_train_pred=logreg.predict(trainx)

In [None]:
print(accuracy_score(testy,logreg_test_pred))
print(accuracy_score(trainy,logreg_train_pred))

In [None]:
confusion_matrix(testy,logreg_test_pred)

In [None]:
print(classification_report(testy,logreg_test_pred))

## RandomForestClassifier

In [None]:
rf=RandomForestClassifier(random_state=70)

In [None]:
rf.fit(trainx_res,trainy_res)

In [None]:
rf_test_pred=rf.predict(testx)
rf_train_pred=rf.predict(trainx)

In [None]:
print(accuracy_score(testy,rf_test_pred))
print(accuracy_score(trainy,rf_train_pred))

In [None]:
trainx_res.shape

In [None]:
rf_grid=RandomForestClassifier(criterion= 'gini',max_depth=2,max_leaf_nodes=2,max_samples=3,min_samples_leaf= 1,
                               min_samples_split=3,n_estimators=400,random_state=46)

In [None]:
rf_grid.fit(trainx_res,trainy_res)

In [None]:
rf_grid_test_pred=rf_grid.predict(testx)
rf_grid_train_pred=rf_grid.predict(trainx)

In [None]:
print(accuracy_score(testy,rf_grid_test_pred))
print(accuracy_score(trainy,rf_grid_train_pred))

In [None]:
confusion_matrix(testy,rf_grid_test_pred)

In [None]:
print(classification_report(testy,rf_grid_test_pred))

## DecisionTreeClassifier

In [None]:
dc=DecisionTreeClassifier(random_state=42)

In [None]:
dc.fit(trainx_res,trainy_res)

In [None]:
dc_test_pred=dc.predict(testx)
dc_train_pred=dc.predict(trainx)

In [None]:
print(accuracy_score(testy,dc_test_pred))
print(accuracy_score(trainy,dc_train_pred))

In [None]:
grid_dc=DecisionTreeClassifier(criterion= 'entropy', max_depth=1,max_leaf_nodes=2,min_samples_leaf= 1,
min_samples_split= 2,splitter='best',random_state=42)

In [None]:
grid_dc.fit(trainx_res,trainy_res)

In [None]:
dc_test_pred=grid_dc.predict(testx)
dc_train_pred=grid_dc.predict(trainx)

In [None]:
print(accuracy_score(testy,dc_test_pred))
print(accuracy_score(trainy,dc_train_pred))

## XGBClassifier

In [None]:
from xgboost import XGBClassifier

In [None]:
xgb=XGBClassifier()

In [None]:
xgb.fit(trainx_res,trainy_res)

In [None]:
xgb_test_pred=xgb.predict(testx)
xgb_train_pred=xgb.predict(trainx)

In [None]:
print(accuracy_score(testy,xgb_test_pred))
print(accuracy_score(trainy,xgb_train_pred))

## Support Vector Machines

In [None]:
svc=SVC(random_state=42,probability=True)

svc.fit(trainx_res,trainy_res)

In [None]:
svc_test_pred=svc.predict(testx)
svc_train_pred=svc.predict(trainx)

In [None]:
print(accuracy_score(testy,svc_test_pred))
print(accuracy_score(trainy,svc_train_pred))

In [None]:
l

In [None]:
import pickle

In [None]:
file=open('diabetic_model.pkl','wb')

In [None]:
pickle.dump(rf_grid,file)