In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier

from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score, recall_score, f1_score
from sklearn.metrics import classification_report

# plt.style.use('fivethertyeight')

import warnings
warnings.filterwarnings('ignore')

In [None]:
data = pd.read_csv('./diabetes.csv')

## Explore Data

In [None]:
data.head()

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
data.duplicated().sum()

## Analysis

In [None]:
data.corr()

In [None]:
sns.heatmap(data.corr(), annot=True, fmt='0.1f', linewidth=.5)

In [None]:
sns.countplot(x='Outcome', data=data, palette=['g', 'r'])

In [None]:
plt.figure(figsize=(20, 6))

plt.subplot(1, 3, 1)
plt.title("Counter Plot")
sns.countplot(x="Pregnancies", data=data)

plt.subplot(1, 3, 2)
plt.title("Distribution Plot")
sns.distplot(data["Pregnancies"])

plt.subplot(1, 3, 3)
plt.title("Box Plot")
sns.boxplot(y=data["Pregnancies"])

plt.show()

In [None]:
sns.boxplot(data.Age)

In [None]:
x = data.drop("Outcome", axis=1)
y = data["Outcome"]

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.2)

In [None]:
model_one = LogisticRegression()
model_two = SVC()
model_three = RandomForestClassifier()
Model_four = GradientBoostingClassifier(n_estimators=1000)

In [None]:
columns = ["LogisticRegression", "SVC", "RandomForestClassifier", "GradientBoostingClassifier"]
result1 = []
result2 = []
result3 = []

In [None]:
def calc(model):
    model.fit(x_train, y_train)
    prediction = model.predict(x_test)
    accuracy_score = accuracy_score(prediction, y_test)
    recall_score = recall_score(prediction, y_test)
    f1_score = f1_score(prediction, y_test)
    result1.append(accuracy_score)
    result2.append(recall_score)
    result3.append(f1_score)
    confusion_matrix(prediction, y_test)
    sns.heatmap(confusion_matrix(prediction, y_test), annot=True)
    print(model)
    print("accuracy is : ", accuracy_score, "recall is : ", recall_score, "f1 is : ", f1_score)

In [None]:
calc(model_one)

In [None]:
calc(model_two)

In [None]:
calc(model_three)

In [None]:
calc(Model_four)

In [None]:
df = pd.DataFrame({'Algorithms':columns, "Accuracies":result1, "Recall":result2, "F1":result3})

In [None]:
fig, ax = plt.subplots(figsize=(20,5))
plt.plot(df.Algorithms, result1, label="Accuracy")
plt.plot(df.Algorithms, result2, label="Recall")
plt.plot(df.Algorithms, result3, label="F1")

plt.legend()
plt.show()