## IMPORT LIBRARIES

In [1]:
# Import the pandas library for data manipulation and analysis
import pandas as pd

# Import train_test_split to split the dataset into training and testing sets
from sklearn.model_selection import train_test_split

# Import NumPy for numerical operations and array handling
import numpy as np

## LOAD THE DATASET

In [2]:
# Load the Diabetes dataset from scikit-learn
from sklearn.datasets import load_diabetes

# Load the Diabetes dataset
data = load_diabetes()

# Create a DataFrame for the features
X = pd.DataFrame(data.data, columns=data.feature_names)

# Create a Series for the target variable
Y = pd.Series(data.target, name='Target')

# Combine the features and target into a single DataFrame
df = pd.concat([X, Y], axis=1)

# Save the dataset to a CSV file
df.to_csv('diabetes_dataset.csv', index=False)

## DISPLAY

In [3]:
# Display the first few rows of the dataset
df.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,Target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0


## PREDICTION

In [4]:
# Predict the target value for a single example (one patient) using model parameters
def predict(example, theta):
    # Check if the number of features matches the size of theta (excluding bias)
    if example.shape[0] != theta.shape[0] - 1:
        raise ValueError("The number of features and parameters must match.")  
    # Initialize the prediction value to zero
    prediction = 0    
    # Compute the weighted sum of features
    for i in range(example.shape[0]):
        # Remark: theta[i + 1] because theta[0] is the bias term
        prediction += example[i] * theta[i + 1]    
    # Add the bias term to the prediction
    prediction += theta[0]   
    # Return the final predicted value
    return prediction   

## COST 

In [5]:
# Compute the cost for the given dataset
def compute_cost(features, labels, theta):
    # Initialize the sum squared errors
    loss_sum = 0
    # Loop over each example
    for i in range(features.shape[0]):
        # Predict the target value for the i-th example and compute squared error
        loss_sum += (labels[i] - predict(features[i], theta))**2
    # Compute the final cost
    cost = loss_sum / (2 * features.shape[0])
    # Return the computed cost
    return cost 

## PRINT

In [6]:
def print_progress(cost, theta):
    feature_names = ['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']
    weights_str = ", ".join([f"{name}={theta[i+1]:.1f}" for i, name in enumerate(feature_names)])
    line = f"Cost={cost:.1f} | bias={theta[0]:.1f}, {weights_str}"
    print(line, end="\r")

## GRADIENT

In [11]:
# Compute de gradient of the cost function with respect to the bias (theta 0)
def compute_bias_gradient(features, labels, theta):
    # Initialize the sum of errors
    error_sum = 0
    # Iterate over each training example
    for i in range(features.shape[0]):
        # Compute the prediction error for the i-th example
        error_sum += predict(features[i], theta) - labels[i]
    # Calculate the bias gradient
    bias_gradient = error_sum / features.shape[0]
    # Return the computed bias gradient
    return bias_gradient 

# # Compute the gradient of the cost function with respect to theta
def compute_theta_gradient(features, labels, theta):
    # Initialize gradients list with zeros, one for each feature
    gradients = [0] * features.shape[1]
    # Loop over each feature (j is the index of the feature)
    for j in range(features.shape[1]):
        # Initialize the sum of errors for gradient j
        error_sum = 0
        # Loop over each training example (i is the index of the sample)
        for i in range(features.shape[0]):
            # Compute prediction error for example i
            error = predict(features[i], theta) - labels[i]
            # Accumulate the error weighted by feature j
            error_sum += error * features[i][j]
        # Average the error sum to get the gradient for feature j
        gradients[j] = error_sum / features.shape[0]
    # Return the list of gradients for all features
    return gradients

# Perform gradient descent and track cost at each iteration
def gradient_descent(features, labels, theta, learning_rate):
    # List to store cost values over time
    cost_history = []
    # List to store iteration numbers
    iteration_history = []
    # Initialize cost and iteration counter
    cost = compute_cost(features, labels, theta)
    iteration = 0
    # Repeat until cost is small enough
    while cost > 1400:
        # Compute gradients
        bias_grad = compute_bias_gradient(features, labels, theta)
        theta_grads = compute_theta_gradient(features, labels, theta)
        # Update parameters
        theta[0] -= learning_rate * bias_grad
        for j in range(1, len(theta)):
            theta[j] = theta[j] - learning_rate * theta_grads[j - 1]
        # Recompute cost
        cost = compute_cost(features, labels, theta)
        # Print current cost and theta
        print_progress(cost, theta)
        # Save cost and iteration
        cost_history.append(cost)
        iteration_history.append(iteration)
        # Increment iteration counter
        iteration += 1
    # Return updated parameters and history
    return theta, cost_history, iteration_history

## TRAIN

In [None]:
# Define initial weights (bias + weights) from your current best result
theta_init = np.array([
    151.3,   # bias
     38.7,   # age
   -241.7,   # sex
    545.2,   # bmi
    346.8,   # bp
   -787.5,   # s1
    405.1,   # s2
     98.8,   # s3
    255.5,   # s4
    681.0,   # s5
     49.7    # s6
])

# Set smaller learning rate
learning_rate = 0.01

# Data
X_train, X_test, Y_train, Y_test = train_test_split(X.values, Y.values, test_size=0.2, random_state=42)

# Run gradient descent with the new initial weights
theta_final, cost_history, iteration_history = gradient_descent(X_train, Y_train, theta_init, learning_rate=learning_rate)