In [None]:
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from yellowbrick.classifier import ConfusionMatrix
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE

In [None]:
df = pd.read_csv("/kaggle/input/predict-diabities/diabetes.csv")

df.head()

In [None]:
df.describe()

In [None]:
df.isnull().sum()

In [None]:
sns.countplot(x = df['Outcome'])

In [None]:
plt.hist(x = df['Pregnancies'])

In [None]:
plt.hist(x = df['Age'])

In [None]:
plt.hist(x = df['BMI'])

In [None]:
#sns.pairplot(df, hue='Outcome')


In [None]:
dataplot = sns.heatmap(df.corr(numeric_only=True), cmap='BuPu', annot=True)
plt.title('Data correlation', fontsize=18)
plt.show()

In [None]:
Y = df['Outcome']
X = df.drop(['Outcome'], axis=1)

In [None]:
X.columns

In [None]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [None]:
X[0]

In [None]:
X_training, X_test, Y_training, Y_test = train_test_split(X, Y, test_size= 0.2, random_state=0)

In [None]:
from sklearn.naive_bayes import GaussianNB

In [None]:
naive_bayer = GaussianNB()
naive_bayer.fit(X_training, Y_training)

In [None]:
cm = ConfusionMatrix(naive_bayer)
cm.fit(X_training, Y_training)
cm.score(X_test, Y_test)

In [None]:
smote = SMOTE(sampling_strategy='minority')
X_over, Y_over = smote.fit_resample(X, Y)

In [None]:
X_over.shape, X.shape

In [None]:
X_training_o, X_test_o, Y_training_o, Y_test_o = train_test_split(X_over, Y_over, test_size = 0.2, stratify=Y_over)

In [None]:
naive_bayer_o = GaussianNB()
naive_bayer_o.fit(X_training_o, Y_training_o)

In [None]:
cm = ConfusionMatrix(naive_bayer_o)
cm.fit(X_training_o, Y_training_o)
cm.score(X_test_o, Y_test_o)

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
decision_tree = DecisionTreeClassifier(criterion='entropy', random_state = 0)
decision_tree.fit(X_training, Y_training)

In [None]:
cm = ConfusionMatrix(decision_tree)
cm.fit(X_training, Y_training)
cm.score(X_test, Y_test)

In [None]:
decision_tree_o = DecisionTreeClassifier(criterion='entropy', random_state = 0)
decision_tree_o.fit(X_training_o, Y_training_o)

In [None]:
cm = ConfusionMatrix(decision_tree_o)
cm.fit(X_training_o, Y_training_o)
cm.score(X_test_o, Y_test_o)

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
random_forest = RandomForestClassifier(n_estimators=100, criterion='entropy', random_state =0)
random_forest.fit(X_training, Y_training)

In [None]:
cm = ConfusionMatrix(random_forest)
cm.fit(X_training, Y_training)
cm.score(X_test, Y_test)

In [None]:
random_forest_o = RandomForestClassifier(n_estimators=200, criterion='entropy', random_state =0)
random_forest_o.fit(X_training_o, Y_training_o)

In [None]:
cm = ConfusionMatrix(random_forest_o)
cm.fit(X_training_o, Y_training_o)
cm.score(X_test_o, Y_test_o)