In [None]:
# ðŸ“Œ Step 1: Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# ðŸ“Œ Step 2: Load Dataset
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
columns = ["Pregnancies","Glucose","BloodPressure","SkinThickness","Insulin","BMI","DiabetesPedigreeFunction","Age","Outcome"]
data = pd.read_csv(url, names=columns)

print("âœ… Dataset loaded successfully")
data.head()

# ðŸ“Œ Step 3: Check for missing values
print("\nMissing values per column:")
print(data.isnull().sum())

# ðŸ“Œ Step 4: Split dataset into features & target
X = data.drop("Outcome", axis=1)
y = data["Outcome"]

# ðŸ“Œ Step 5: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ðŸ“Œ Step 6: Build Logistic Regression Model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# ðŸ“Œ Step 7: Make Predictions
y_pred = model.predict(X_test)

# ðŸ“Œ Step 8: Evaluate Model
accuracy = accuracy_score(y_test, y_pred)
print(f"ðŸŽ¯ Model Accuracy: {accuracy*100:.2f}%")

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# ðŸ“Œ Step 9: Confusion Matrix Visualization
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(5,4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()
