# Diabetes Classification using Logistic Regression

This notebook demonstrates how to classify diabetes using logistic regression with the PIMA Indian Diabetes dataset.

## Importing libraries

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


## Load dataset

In [8]:
data = pd.read_csv("diabetes.csv")  # Make sure the CSV is in the same folder as this notebook
data.head()


Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,129,71,16,37,26.3,0.182,34,0
1,14,113,70,26,49,24.2,-0.249,56,0
2,10,144,75,14,93,33.8,0.56,37,1
3,7,53,84,18,71,33.5,0.683,41,0
4,6,127,81,20,-20,37.5,0.579,66,1


## Descriptive Statistics

In [24]:
data.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,7.747396,121.826823,69.942708,19.726562,79.322917,31.498177,0.496798,44.141927,0.39974
std,5.089934,29.625167,11.972343,9.662126,47.064298,6.936294,0.297456,14.194198,0.490164
min,0.0,33.0,33.0,-6.0,-54.0,10.9,-0.453,21.0,0.0
25%,3.0,102.0,62.0,13.0,47.0,26.975,0.3,32.0,0.0
50%,7.0,122.0,70.0,20.0,80.0,31.6,0.504,43.0,0.0
75%,12.0,141.0,78.0,26.0,111.25,35.925,0.7,56.0,1.0
max,16.0,198.0,108.0,51.0,256.0,54.7,1.434,69.0,1.0


## Features and Target

In [12]:
X = data.drop("Outcome", axis=1)
y = data["Outcome"]


## Split the data

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## Train Logistic regression model

In [18]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)


## Predict and Evaluate

In [21]:
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.9935064935064936
Confusion Matrix:
 [[94  0]
 [ 1 59]]
Classification Report:
               precision    recall  f1-score   support

           0       0.99      1.00      0.99        94
           1       1.00      0.98      0.99        60

    accuracy                           0.99       154
   macro avg       0.99      0.99      0.99       154
weighted avg       0.99      0.99      0.99       154

