In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

df = pd.read_csv("/kaggle/input/pima-indians-diabetes-dataset/diabetes.csv")
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72.0,35,169.5,33.6,0.627,50,1
1,1,85,66.0,29,102.5,26.6,0.351,31,0
2,8,183,64.0,32,169.5,23.3,0.672,32,1
3,1,89,66.0,23,94.0,28.1,0.167,21,0
4,0,137,40.0,35,168.0,43.1,2.288,33,1


In [None]:
df.shape

In [None]:
df.isnull().sum()

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
value_counts = df.Outcome.value_counts()

# Create a count plot using seaborn
plt.figure(figsize=(8, 6))
sns.countplot(x='Outcome', data=df, order=value_counts.index)
plt.xlabel('Outcome')
plt.ylabel('Count')
plt.show()


In [None]:
df['Outcome'].value_counts().plot.pie(autopct='%1.1f%%',explode=[0.1,0])

In [None]:
plt.figure(figsize=(10,8))
sns.heatmap(df.corr(),annot=True,cmap='coolwarm',linewidths=0.1)

In [None]:
df.hist(figsize=(10, 6))
plt.tight_layout()
plt.show()

In [None]:
df.columns

In [None]:
plt.figure(figsize=(15, 8))
sns.set(style="whitegrid")  # Optional: Set the style of the plots

# Loop through each column and create distribution plots
for column in df.columns:
    plt.subplot(3, 3, df.columns.get_loc(column) + 1)  # Adjust subplot layout as needed
    sns.histplot(data=df, x=column, kde=True)

plt.tight_layout()
plt.show()

In [None]:
sns.set(style="whitegrid")
plt.figure(figsize=(12, 8))

for column in df.columns[:-1]:  # Exclude the Outcome column
    plt.subplot(3, 3, df.columns.get_loc(column) + 1)  # Adjust subplot layout as needed
    sns.kdeplot(data=df, x=column, hue="Outcome",fill=True,common_norm=False)
    plt.title(column)

plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(20,15))
sns.set(style="whitegrid")
sns.pairplot(df, hue="Outcome", diag_kind="kde")

plt.show()

<h3>Train test split</h3>

In [None]:
X = df.drop("Outcome",axis="columns")
y = df.Outcome

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled[:3]

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y,test_size=0.2, random_state=10)

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
y_train.value_counts()

In [None]:
y_test.value_counts()

<h3>Train using Random Forest</h3>

In [None]:
from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier(n_estimators=200)
rfc.fit(X_train, y_train)

In [None]:
rfc_train = rfc.predict(X_train)
from sklearn import metrics

print("Accuracy_Score =", format(metrics.accuracy_score(y_train, rfc_train)))

In [None]:
y_pred = rfc.predict(X_test)

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test , y_pred)
sns.heatmap(cm , annot=True)

In [None]:
from sklearn.metrics import classification_report, accuracy_score
accuracy = accuracy_score(y_test, y_pred)

# Generate a classification report
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score



# Initialize classifiers
classifiers = [
    ("Logistic Regression", LogisticRegression()),
    ("Support Vector Machine", SVC()),
    ("Naive Bayes",GaussianNB())
]

# Initialize a dataframe to store results
results_df = pd.DataFrame(columns=['Algorithm', 'Accuracy'])

# Iterate through classifiers, train, predict and store accuracy
for algorithm, clf in classifiers:
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    results_df = results_df.append({'Algorithm': algorithm, 'Accuracy': accuracy}, ignore_index=True)

# Print the results dataframe
print(results_df)
