In [5]:
import numpy as np
import pandas as pd

df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Data/boston.csv')
df.drop('Unnamed: 0', axis=1, inplace=True)

y = df['PRICE']
x = df.drop('PRICE', axis=1)
x.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


In [6]:
# finding the attribute that best follow the linear relationship with the output price.
correlation = x.corrwith(y)
best_attribute = abs(correlation).idxmax()
print(f"\nAttribute with the highest correlation to price: {best_attribute}")


Attribute with the highest correlation to price: LSTAT


In [7]:
# Calculating regression coefficients
def estimate_coef(x, y):
    n = np.size(x)

    m_x, m_y = np.mean(x), np.mean(y)

    SS_xy = np.sum((x-m_x)*(y-m_y))
    SS_xx = np.sum((x-m_x)**2)

    b_1 = SS_xy / SS_xx
    b_0 = m_y - b_1 * m_x

    return (b_0, b_1)
x = df[best_attribute].values
b0, b1 = estimate_coef(x, y)
print(f'Intercept: {b0}')
print(f'Coefficient: {b1}')

Intercept: 34.5538408793831
Coefficient: -0.9500493537579907


In [9]:
# Prediction
def predict(x, b0, b1):
    return b0 + b1 * x
y_pred = predict(x, b0, b1)

# Sum of Squared Errors (SSE)
def calculate_sse(y_true, y_pred):
    return np.sum((y_true - y_pred) ** 2)

# Coefficient of Determination (R²)
def calculate_r2(y_true, y_pred):
    ss_total = np.sum((y_true - np.mean(y_true)) ** 2)
    ss_error = calculate_sse(y_true, y_pred)
    return 1 - (ss_error / ss_total)

In [10]:
print(f'SSE: {calculate_sse(y, y_pred)}')
print(f'R²: {calculate_r2(y, y_pred)}')

SSE: 19472.38141832644
R²: 0.5441462975864797


In [23]:
# Normalize the feature
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
LSTAT_scaled = scaler.fit_transform(x.reshape(-1, 1)).flatten()

# Normalize the target variable y (optional, but may help with gradient descent convergence)
y_scaled = (y - np.mean(y)) / np.std(y)

# Gradient descent function
def full_batch_gradient_descent(x, y, alpha=0.001, iterations=1000):
    # Initialize coefficients
    b0 = 0
    b1 = 0

    # Number of observations
    n = np.size(x)

    # Gradient descent iterations
    for j in range(iterations):
        # Compute the predicted y values
        y_expected = b0 + b1 * x

        # Calculate gradients
        b0_gradient = -2 * np.sum(y - y_expected) / n
        b1_gradient = -2 * np.sum((y - y_expected) * x) / n

        # Update coefficients
        b0 -= alpha * b0_gradient
        b1 -= alpha * b1_gradient

    return b0, b1

# Calculate coefficients using gradient descent
b0_full_gd, b1_full_gd = full_batch_gradient_descent(LSTAT_scaled, y_scaled, alpha=0.01, iterations=1000)

# Prediction function
def predict(x, b0, b1):
    return b0 + b1 * x

# Predict and evaluate
y_full_gd = predict(LSTAT_scaled, b0_full_gd, b1_full_gd)
sse_full_gd = calculate_sse(y_scaled, y_full_gd) # Sum of squared errors
r2_full_gd = calculate_r2(y_scaled, y_full_gd)

print(f"b0: {b0_full_gd}, b1: {b1_full_gd}")
print(f"SSE: {sse_full_gd}, R²: {r2_full_gd}\n")


b0: -7.772438819035833e-16, b1: -0.7376627249325524
SSE: 230.66197342124124, R²: 0.5441462975864797



In [18]:
# Stochastic Gradient Descent
def stochastic_gradient_descent(x, y, alpha=0.01, iterations=1000):
    b0 = 0
    b1 = 0
    n = len(x)

    for i in range(iterations):
        indices = np.arange(n)
        np.random.shuffle(indices)

        for i in indices:
            # Pick a random sample from the dataset

            # Predicted value
            y_pred = b0 + b1 * x[i]

            # Gradients
            b0_gradient = -2 * (y[i] - y_pred)
            b1_gradient = -2 * (y[i] - y_pred) * x[i]

            # Update coefficients
            b0 -= alpha * b0_gradient
            b1 -= alpha * b1_gradient

    return b0, b1

# Run Stochastic Gradient Descent
print("\nStochastic Gradient Descent:")
b0_sgd, b1_sgd = stochastic_gradient_descent(LSTAT_scaled, y_scaled, alpha=0.01, iterations=1000)
print(f"Coefficients: b0 = {b0_sgd}, b1 = {b1_sgd}")


# Predict and evaluate for SGD
y_pred_sgd = b0_sgd + b1_sgd * LSTAT_scaled.flatten()
sse_sgd = np.sum((y_scaled - y_pred_sgd) ** 2)
r2_sgd = r2_score(y_scaled, y_pred_sgd)

print(f"SGD - SSE: {sse_sgd}, R²: {r2_sgd}")


Stochastic Gradient Descent:
Coefficients: b0 = 0.017443592098574753, b1 = -0.6780374129236578
SGD - SSE: 232.6148586053066, R²: 0.540286840700975
