In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def initialize_weights(n_features):
    weights = np.zeros(n_features)
    bias = 0
    return weights, bias

def compute_loss(y, y_pred):
    m = len(y)
    loss = -(1 / m) * np.sum(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))
    return loss

def gradient_descent(X, y, weights, bias, learning_rate):
    m = X.shape[0]
    linear_model = np.dot(X, weights) + bias
    y_pred = sigmoid(linear_model)
    dw = (1 / m) * np.dot(X.T, (y_pred - y))
    db = (1 / m) * np.sum(y_pred - y)
    weights -= learning_rate * dw
    bias -= learning_rate * db
    return weights, bias

def train_logistic_regression(X, y, learning_rate=0.01, num_iterations=1000):
    n_features = X.shape[1]
    weights, bias = initialize_weights(n_features)
    for i in range(num_iterations):
        weights, bias = gradient_descent(X, y, weights, bias, learning_rate)
        if i % 100 == 0:
            linear_model = np.dot(X, weights) + bias
            y_pred = sigmoid(linear_model)
            loss = compute_loss(y, y_pred)
            print(f"Iteration {i}: Loss = {loss}")
    return weights, bias

def predict(X, weights, bias, threshold=0.5):
    linear_model = np.dot(X, weights) + bias
    y_pred = sigmoid(linear_model)
    return (y_pred >= threshold).astype(int)

# Example usage with the Pima Indians Diabetes Dataset
if __name__ == "__main__":
    # Load dataset
    url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
    column_names = [
        "Pregnancies", "Glucose", "BloodPressure", "SkinThickness", "Insulin", 
        "BMI", "DiabetesPedigreeFunction", "Age", "Outcome"
    ]
    data = pd.read_csv(url, header=None, names=column_names)

    # Separate features and target
    X = data.iloc[:, :-1].values
    y = data.iloc[:, -1].values

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Standardize the dataset
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Train the model
    weights, bias = train_logistic_regression(X_train, y_train, learning_rate=0.01, num_iterations=1000)

    # Make predictions
    predictions = predict(X_test, weights, bias)
    print("Predictions:", predictions)

    # Evaluate the model
    accuracy = accuracy_score(y_test, predictions)
    print("Accuracy:", accuracy)


Iteration 0: Loss = 0.6918518025427111
Iteration 100: Loss = 0.6024190680269129
Iteration 200: Loss = 0.5577096461682262
Iteration 300: Loss = 0.5320479416198856
Iteration 400: Loss = 0.5157481920456496
Iteration 500: Loss = 0.5046523378009493
Iteration 600: Loss = 0.49672732065551306
Iteration 700: Loss = 0.49086889863693856
Iteration 800: Loss = 0.48642609641017864
Iteration 900: Loss = 0.48298999274245497
Predictions: [0 0 0 0 1 0 0 1 1 1 0 1 0 0 0 0 0 0 1 1 0 0 1 0 1 1 0 0 0 0 1 1 1 1 1 1 1
 0 1 1 0 1 1 0 0 1 1 0 0 1 0 1 1 0 0 0 1 0 0 1 1 0 0 0 0 1 0 1 0 1 1 0 0 0
 0 1 0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 1 1 1 0 0 1 0 1 0
 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 1 1 1 1 1 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0
 0 1 0 1 0 0]
Accuracy: 0.7337662337662337
