### 1. Implement Linear Regression and calculate sum of residual error on the following Datasets.


#### Used Analytic Method


In [2]:
import numpy as np

In [3]:
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])


In [4]:
def analytical_linear_regression(x, y):
    # Construct the design matrix X and np.ones is used for create colums
    X = np.column_stack((np.ones_like(x), x))
    # Calcuating the coeff using the nprmal form(beta=X^TX)^-1*X^Ty
    beta = np.linalg.inv(X.T @ X) @ X.T @ y # np.linalg.inv is used to inverse resulting squae matrix
    # calculate some of square errors
    SSE = np.sum((y - X @ beta) ** 2)
    # Calculating r-squared value
    R_squared = 1 - (SSE / np.sum((y - np.mean(y)) ** 2))
    return beta, SSE, R_squared


In [5]:
def gradient_descent(x, y, learning_rate, num_iterations, batch_size=None, stochastic=False, tolerance=1e-4):
    beta = np.zeros(x.shape[1])
    n = len(y)
    
    for _ in range(num_iterations):
        idx = np.random.choice(n, batch_size, replace=False) if stochastic else slice(None)
        x_batch, y_batch = x[idx], y[idx]
        
        y_pred = np.dot(x_batch, beta)
        error = y_batch - y_pred
        
        gradient = -2 * np.dot(x_batch.T, error) / n
        beta -= learning_rate * gradient
        
        if np.linalg.norm(gradient) < tolerance:
            break
    
    SSE = np.sum((y - np.dot(x, beta)) ** 2)
    SST = np.sum((y - np.mean(y)) ** 2)
    R_squared = 1 - (SSE / SST)
    
    return beta, SSE, R_squared


In [6]:
# Gradient Descent - Full-batch
beta_full_batch, SSE_full_batch, R_squared_full_batch = gradient_descent(np.column_stack((np.ones_like(x), x)), y, learning_rate=0.001, num_iterations=10000)
print("\nGradient Descent - Full-batch:")
print("Coefficients:", beta_full_batch)
print("Sum Squared Error (SSE):", SSE_full_batch)
print("R-squared Value:", R_squared_full_batch)


Gradient Descent - Full-batch:
Coefficients: [1.23275837 1.17027192]
Sum Squared Error (SSE): 5.6242800592066855
R-squared Value: 0.9525377210193529


In [7]:
# Gradient Descent - Stochastic
beta_stochastic, SSE_stochastic, R_squared_stochastic = gradient_descent(np.column_stack((np.ones_like(x), x)), y, learning_rate=0.001, num_iterations=10000, batch_size=1, stochastic=True)
print("\nGradient Descent - Stochastic:")
print("Coefficients:", beta_stochastic)
print("Sum Squared Error (SSE):", SSE_stochastic)
print("R-squared Value:", R_squared_stochastic)


Gradient Descent - Stochastic:
Coefficients: [0.66371023 1.26199779]
Sum Squared Error (SSE): 6.574528344960295
R-squared Value: 0.944518748143795


### 2 . Boston Housing


In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# Direct inputs
housing_data = pd.DataFrame({
    'median_income': [3.5, 7.8, 2.9, 6.1, 4.5],
    'median_house_value': [200000, 450000, 150000, 380000, 280000]
})

selected_attribute = 'median_income'
X = housing_data[selected_attribute].values.reshape(-1, 1)
y = housing_data['median_house_value'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train_with_intercept = np.c_[np.ones(X_train.shape[0]), X_train]
X_test_with_intercept = np.c_[np.ones(X_test.shape[0]), X_test]

theta_analytic = np.linalg.inv(X_train_with_intercept.T.dot(X_train_with_intercept)).dot(X_train_with_intercept.T).dot(y_train)
print("Coefficients using Analytic Formulation:", theta_analytic)

def full_batch_gradient_descent(X, y, learning_rate, num_iterations):
    theta = np.zeros(X.shape[1])
    for _ in range(num_iterations):
        y_pred = X.dot(theta)
        theta -= (1/len(y)) * learning_rate * X.T.dot(y_pred - y)
    return theta

learning_rate = 0.01
num_iterations = 1000
theta_full_batch = full_batch_gradient_descent(X_train_with_intercept, y_train, learning_rate, num_iterations)
print("Coefficients using Full-batch Gradient Descent:", theta_full_batch)

def stochastic_gradient_descent(X, y, learning_rate, num_iterations):
    theta = np.zeros(X.shape[1])
    for _ in range(num_iterations):
        for i in range(len(y)):
            rand_index = np.random.randint(0, len(y))
            xi = X[rand_index]
            yi = y[rand_index]
            y_pred = np.dot(xi, theta)
            theta -= learning_rate * xi * (y_pred - yi)
    return theta

theta_stochastic = stochastic_gradient_descent(X_train_with_intercept, y_train, learning_rate, num_iterations)
print("Coefficients using Stochastic Gradient Descent:", theta_stochastic)


Coefficients using Analytic Formulation: [-51950.59625213  71635.43441227]
Coefficients using Full-batch Gradient Descent: [-20469.45491225  64759.44491078]
Coefficients using Stochastic Gradient Descent: [-48237.76935829  71569.28348572]


In [4]:
y_pred_analytic = X_test_with_intercept.dot(theta_analytic)
y_pred_full_batch = X_test_with_intercept.dot(theta_full_batch)
y_pred_stochastic = X_test_with_intercept.dot(theta_stochastic)

SSE_analytic = np.sum((y - y_pred_analytic) ** 2)
SSE_full_batch = np.sum((y - y_pred_full_batch) ** 2)
SSE_stochastic = np.sum((y - y_pred_stochastic) ** 2)

mean_y = np.mean(y)
SST = np.sum((y - mean_y) ** 2)

R_squared_analytic = 1 - (SSE_analytic / SST)
R_squared_full_batch = 1 - (SSE_full_batch / SST)
R_squared_stochastic = 1 - (SSE_stochastic / SST)

print("SSE and R-squared value:")
print("Analytic Formulation: SSE =", SSE_analytic, ", R-squared =", R_squared_analytic)
print("Full-batch Gradient Descent: SSE =", SSE_full_batch, ", R-squared =", R_squared_full_batch)
print("Stochastic Gradient Descent: SSE =", SSE_stochastic, ", R-squared =", R_squared_stochastic)


SSE and R-squared value:
Analytic Formulation: SSE = 292187641735.03864 , R-squared = -3.7525641141027757
Full-batch Gradient Descent: SSE = 247058233541.1697 , R-squared = -3.018513883233079
Stochastic Gradient Descent: SSE = 299105759224.98444 , R-squared = -3.86509042330814
