In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [None]:
data=pd.read_csv('../input/diabetes-data-set/diabetes.csv')
data.head(10)

In [None]:
plotnumber=1
featureList=['Pregnancies','Glucose','BloodPressure','SkinThickness','Insulin','BMI','DiabetesPedigreeFunction','Age']
plt.figure(figsize=(20,15),facecolor='white')
for i in featureList:
    if(plotnumber<=8):
        plt.subplot(4,4,plotnumber)
        sns.histplot(x=i,data=data,hue='Outcome')
        plt.xlabel(i)
        plt.ylabel('Outcome')
        plotnumber+=1

In [None]:
data.Glucose.value_counts()

In [None]:
plotnumber=1
featureList=['Pregnancies','Glucose','BloodPressure','SkinThickness','Insulin','BMI','DiabetesPedigreeFunction','Age']
plt.figure(figsize=(20,15),facecolor='white')
for i in featureList:
    if(plotnumber<=8):
        plt.subplot(4,4,plotnumber)
        sns.stripplot(x=i,data=data,hue='Outcome')
        plt.xlabel(i)
        plt.ylabel('Outcome')
        plotnumber+=1

In [None]:
plotnumber=1
featureList=['Pregnancies','Glucose','BloodPressure','SkinThickness','Insulin','BMI','DiabetesPedigreeFunction','Age']
plt.figure(figsize=(20,15),facecolor='white')
for i in featureList:
    if(plotnumber<=8):
        plt.subplot(4,4,plotnumber)
        sns.boxplot(x=i,data=data,hue='Outcome')
        plt.xlabel(i)
        plt.ylabel('Outcome')
        plotnumber+=1

In [None]:
data.loc[data['Glucose']==0,'Glucose']=np.median(data.Glucose)
data.loc[data['BloodPressure']==0,'BloodPressure']=np.median(data.BloodPressure)
data.loc[data['DiabetesPedigreeFunction']==0,'DiabetesPedigreeFunction']=np.median(data.DiabetesPedigreeFunction)
data.loc[data['BMI']==0,'BMI']=np.median(data.BMI)
data.loc[data['Insulin']==0,'Insulin']=np.median(data.Insulin)
data.loc[data['SkinThickness']==0,'SkinThickness']=np.median(data.SkinThickness)

In [None]:
data.head(20)

In [None]:
from sklearn.preprocessing import MinMaxScaler
temp1=['Glucose','BloodPressure','SkinThickness','Insulin','BMI','DiabetesPedigreeFunction']
scaling=MinMaxScaler()
data.loc[:,temp1]=scaling.fit_transform(data.loc[:,temp1])

In [None]:
data.head(10)

In [None]:
heat=data.corr()
sns.heatmap(heat,annot=True)

## Logistic Reggresion

In [None]:
x=data.iloc[:,:-1]
y=data.Outcome
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=12)

In [None]:
y_train.value_counts()

In [None]:
from imblearn.over_sampling import SMOTE
smote=SMOTE()
x_smote,y_smote=smote.fit_resample(x_train,y_train)

In [None]:
y_smote.value_counts()

In [None]:
                                                                            #Building Model
from sklearn.linear_model import LogisticRegression
LR=LogisticRegression()
LR.fit(x_smote,y_smote)
logisticY_predict=LR.predict(x_test)

In [None]:
from sklearn.metrics import accuracy_score,classification_report
print("Logistic Reggresion----->")
print("Accuracy Score--->",end='')
print(accuracy_score(logisticY_predict,y_test))
print("classification_report---->")
print(classification_report(logisticY_predict,y_test))

## Support Vector Machine

In [None]:
from sklearn.svm import SVC
model1=SVC()
model1.fit(x_smote,y_smote)
scmY_predit=model1.predict(x_test)


In [None]:
print("Support Vector Machine----->")
print("Accuracy Score--->",end='')
print(accuracy_score(scmY_predit,y_test))
print("classification_report---->")
print(classification_report(scmY_predit,y_test))

## Using a Decision Tree Algorithm Without Hyperparameter Tuning

In [None]:
from sklearn.tree import DecisionTreeClassifier
model2=DecisionTreeClassifier()
model2.fit(x_smote,y_smote)
decisionwithotY_predict=model2.predict(x_test)

In [None]:
print("Decision Tree without hyperpara----->")
print("Accuracy Score--->",end='')
print(accuracy_score(decisionwithotY_predict,y_test))
print("classification_report---->")
print(classification_report(decisionwithotY_predict,y_test))

##

## Using a Decision Tree Algorithm With Hyperparameter Tuning

In [None]:
parameter={'criterion':['gini', 'entropy', 'log_loss'],'splitter':['best', 'random']}
from sklearn.model_selection import GridSearchCV
gscv=GridSearchCV(model2,parameter,verbose=2)
gscv.fit(x_smote,y_smote)

In [None]:
gscv.best_params_

In [None]:
model3=DecisionTreeClassifier(criterion='log_loss',splitter='random')
model3.fit(x_smote,y_smote)
decisionY_predict=model3.predict(x_test)

In [None]:
print("Decision Tree with hyperpara----->")
print("Accuracy Score--->",end='')
print(accuracy_score(decisionY_predict,y_test))
print("classification_report---->")
print(classification_report(decisionY_predict,y_test))

## Using a Random Forest Algorithm Without Hyperparameter Tuning

In [None]:
from sklearn.ensemble import RandomForestClassifier
model4=RandomForestClassifier()
model4.fit(x_smote,y_smote)
randomwithotY_predict=model4.predict(x_test)

In [None]:
print("Random Forest without hyperpara----->")
print("Accuracy Score--->",end='')
print(accuracy_score(randomwithotY_predict,y_test))
print("classification_report---->")
print(classification_report(randomwithotY_predict,y_test))

## **Using a Random Forest Algorithm With Hyperparameter Tuning**

In [None]:
parameter={'criterion':['gini', 'entropy', 'log_loss'],'n_estimators':[10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100,105,110,115,120,125]}
from sklearn.model_selection import GridSearchCV
gscv2=GridSearchCV(model4,parameter)
gscv2.fit(x_smote,y_smote)

In [None]:
gscv2.best_params_

In [None]:
                                                                    ## with hyperparameter
model5=RandomForestClassifier(criterion='log_loss',n_estimators=125)
model5.fit(x_smote,y_smote)
randomY_predict=model5.predict(x_test)

In [None]:
print("Random Forest hyperpara----->")
print("Accuracy Score--->",end='')
print(accuracy_score(randomY_predict,y_test))
print("classification_report---->")
print(classification_report(randomY_predict,y_test))

## k-Nearest Neighbors

In [None]:
from sklearn.neighbors import KNeighborsClassifier
model6=KNeighborsClassifier()
model6.fit(x_smote,y_smote)
neighbourY_predict=model6.predict(x_test)

In [None]:
print("KNeighbours----->")
print("Accuracy Score--->",end='')
print(accuracy_score(neighbourY_predict,y_test))
print("classification_report---->")
print(classification_report(neighbourY_predict,y_test))

## Using AdaBoost Algorithm Without Hyperparameter Tuning

In [None]:
from sklearn.ensemble import AdaBoostClassifier
model7=AdaBoostClassifier()
model7.fit(x_smote,y_smote)
adaY_predict=model7.predict(x_test)

In [None]:
print("AdaBoost without----->")
print("Accuracy Score--->",end='')
print(accuracy_score(adaY_predict,y_test))
print("classification_report---->")
print(classification_report(adaY_predict,y_test))

## Using AdaBoost Algorithm With Hyperparameter Tuning

In [None]:
parameter={'n_estimators':[10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100,105,110,115,120,125]}
from sklearn.model_selection import GridSearchCV
gscv3=GridSearchCV(model7,parameter)
gscv3.fit(x_smote,y_smote)

In [None]:
gscv3.best_params_

In [None]:
model10=AdaBoostClassifier(n_estimators=55)
model10.fit(x_smote,y_smote)
adawithY_predict=model10.predict(x_test)

In [None]:
print("AdaBoost with para----->")
print("Accuracy Score--->",end='')
print(accuracy_score(adawithY_predict,y_test))
print("classification_report---->")
print(classification_report(adawithY_predict,y_test))

## Using Gradient Boosting Algorithm Without Hyperparameter Tuning

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
model8=GradientBoostingClassifier()
model8.fit(x_smote,y_smote)
gradientY_predict=model8.predict(x_test)

In [None]:
print("GadientBOOST----->")
print("Accuracy Score--->",end='')
print(accuracy_score(gradientY_predict,y_test))
print("classification_report---->")
print(classification_report(gradientY_predict,y_test))

## Using Gradient Boosting Algorithm With Hyperparameter Tuning

In [None]:
parameter={'loss':['exponential','log_loss'],'n_estimators':[10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100,105,110,115,120,125],'criterion':['friedman_mse','squared_error']}
from sklearn.model_selection import GridSearchCV
gscv4=GridSearchCV(model8,parameter)
gscv4.fit(x_smote,y_smote)

In [None]:
gscv4.best_params_

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
model10=GradientBoostingClassifier(criterion='squared_error',loss='log_loss',n_estimators=70)
model10.fit(x_smote,y_smote)
gradientYwith_predict=model10.predict(x_test)

In [None]:
print("GadientBOOST With parameter----->")
print("Accuracy Score--->",end='')
print(accuracy_score(gradientYwith_predict,y_test))
print("classification_report---->")
print(classification_report(gradientYwith_predict,y_test))

## Using XGBoost Algorithm Without Hyperparameter Tuning

In [None]:
from xgboost import XGBClassifier
model9=XGBClassifier()
model9.fit(x_smote,y_smote)
XGY_predict=model9.predict(x_test)

In [None]:
print("XGBOOST----->")
print("Accuracy Score--->",end='')
print(accuracy_score(XGY_predict,y_test))
print("classification_report---->")
print(classification_report(XGY_predict,y_test))

## Using XGBoost Algorithm With Hyperparameter Tuning

In [None]:
parameter = {
    'learning_rate': [0.01, 0.1],
    'n_estimators': [10, 50, 100, 200, 300],
    'max_depth': [3, 4, 5, 6],
    'min_child_weight': [1, 2, 3],
    'subsample': [0.8, 0.9, 1.0]
}
from sklearn.model_selection import GridSearchCV
gscv5=GridSearchCV(model9,parameter)
gscv5.fit(x_smote,y_smote)

In [None]:
gscv5.best_params_

In [None]:
model11=XGBClassifier(learning_rate=0.1,max_depth=4,min_child_weight=1,n_estimators=200,subsample=0.8)
model11.fit(x_smote,y_smote)
XGYwith_predict=model11.predict(x_test)

In [None]:
print("XGBOOST with parameter tunning----->")
print("Accuracy Score--->",end='')
print(accuracy_score(XGYwith_predict,y_test))
print("classification_report---->")
print(classification_report(XGYwith_predict,y_test))

## Achieving the Highest Accuracy with Random Forest Algorithm Through Hyperparameter Tuning

In [None]:
input_data = [0.35, 0.67, 0.48, 0.3, 0.01, 0.314, 0.234, 0.483]
testing = model5.predict([input_data])

if testing[0]:
    print("\033[1;31mUrgent Warning: Suspected Diabetes - Immediate Consultation with a Doctor\033[m")
else:
    print("You Are Fit (:")
