In [None]:
# 1. Import libraries

import pandas as pd # Load and handle the data 
from sklearn.model_selection import train_test_split # Break data into training and testing sets
from sklearn.linear_model import LogisticRegression # Create a logistic regression model
from sklearn.metrics import accuracy_score # Measure the model's accuracy (How well it performed)


In [None]:
# 2. Load the dataset 
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv" # Loaded a dataset from a URL where the last column is the target variable (Outcome).
columns = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness',
              'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'] # The dataset contains various health measurements and whether the person has diabetes (1) or not (0).

data = pd.read_csv(url, names=columns) # Read the dataset into a pandas DataFrame and assign column names.

# Show the first few rows of the dataset
data.head() # Display the first few rows of the dataset to understand its structure and contents.


In [None]:
#. 3 Split the dataset into features and target

x = data.drop('Outcome', axis=1) # Features: All columns except 'Outcome'
y = data['Outcome'] # Target: The 'Outcome' column which indicates if the person has diabetes (1) or not (0)

# Split into 80% training and 20% testing
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42) 

print(f"Training set size: {len(x_train)}") # Show the size of the training set
print(f"Testing set size: {len(x_test)}") # Show the size of the testing set

In [None]:
#. 4 Train the model 

# Create the model
model = LogisticRegression(max_iter=1000)

# Train (fit) the model on the training data
# The model learns the relationship between the features (x) and the target variable ((y) diabetes outcome).
model.fit(x_train, y_train)


In [6]:
#. 5 Evaluate the model

# Predict on the test data 
y_pred = model.predict(x_test)

# Measure accuracy
accuracy = accuracy_score(y_test, y_pred)

# Show the results
print(f"Model Accuracy: {accuracy * 100:.2f}%")

Model Accuracy: 74.68%
