Q1

In [40]:
# Importing libraries
import numpy as np

# Given data
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])

# Number of observations
n = len(x)

In [41]:
# Compute regression coefficients
X_mean = np.mean(x)
Y_mean = np.mean(y)

In [42]:
# Calculating the coefficients
b1 = np.sum((x - X_mean) * (y - Y_mean)) / np.sum((x - X_mean) ** 2)
b0 = Y_mean - b1 * X_mean

In [43]:
# Predicting y values
y_pred = b0 + b1 * x

In [44]:
# Compute Sum of Squared Errors (SSE)
SSE = np.sum((y - y_pred) ** 2)

In [45]:
# Compute R^2 value
SS_total = np.sum((y - Y_mean) ** 2)
R2 = 1 - (SSE / SS_total)

In [46]:
# Results
print(f"Intercept (b0): {b0.round(2)}")
print(f"Slope (b1): {b1.round(2)}")
print(f"Sum of Squared Errors (SSE): {SSE.round(2)}")
print(f"R^2 value: {R2.round(2)}")

Intercept (b0): 1.24
Slope (b1): 1.17
Sum of Squared Errors (SSE): 5.62
R^2 value: 0.95


In [47]:
# Importing libraries and given datasets
from sklearn.linear_model import LinearRegression
import numpy as np

# Given data
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]).reshape(-1, 1)  # Reshape to 2D array
y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])

In [48]:
# Full-batch Gradient Descent using LinearRegression from sklearn
model_full_batch = LinearRegression()
model_full_batch.fit(x, y)

In [49]:
# Predicted values
y_pred_full_batch = model_full_batch.predict(x)

In [50]:
# Intercept and coefficient
b0_full_batch_sklearn = model_full_batch.intercept_
b1_full_batch_sklearn = model_full_batch.coef_[0]
print(f"Intercept (b0): {b0_full_batch_sklearn}")
print(f"Coefficient (b1): {b1_full_batch_sklearn}")

Intercept (b0): 1.2363636363636399
Coefficient (b1): 1.169696969696969


In [51]:
# SSE from scratch for Full-batch Gradient Descent
SSE_full_batch_sklearn = np.sum((y - y_pred_full_batch) ** 2)
print(f"SSE: {SSE_full_batch_sklearn}")

SSE: 5.624242424242425


In [52]:
# R^2 score from scratch for Full-batch Gradient Descent
SS_tot = np.sum((y - np.mean(y)) ** 2)
R2_full_batch_sklearn = 1 - (np.sum((y - y_pred_full_batch) ** 2) / SS_tot)
print(f"R^2 Score: {R2_full_batch_sklearn}")

R^2 Score: 0.952538038613988


In [53]:
from sklearn.linear_model import SGDRegressor

In [54]:
# Stochastic Gradient Descent using SGDRegressor from sklearn
model_stochastic = SGDRegressor(max_iter=1000, tol=1e-6)
model_stochastic.fit(x, y)

In [55]:
# Predicted values for Stochastic Gradient Descent
y_pred_stochastic_sklearn = model_stochastic.predict(x)

In [56]:
# SSE from scratch for Stochastic Gradient Descent
SSE_stochastic_sklearn = np.sum((y - y_pred_stochastic_sklearn) ** 2)
print(f"SSE (SGD): {SSE_stochastic_sklearn}")

SSE (SGD): 5.953147698505601


In [57]:
# R^2 score from scratch for Stochastic Gradient Descent
R2_stochastic_sklearn = 1 - (np.sum((y - y_pred_stochastic_sklearn) ** 2) / SS_tot)
print(f"R^2 Score (SGD): {R2_stochastic_sklearn}")

R^2 Score (SGD): 0.9497624666792777


Q2

In [58]:
# Importing libraires
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDRegressor

In [59]:
# Define column names based on a typical housing dataset structure
column_names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']

In [60]:
# Load the dataset with column names and the correct delimiter
data = pd.read_csv(r"C:\Users\itanm\Downloads\housing.csv", delim_whitespace=True, names=column_names)

In [61]:
# Calculate the correlation matrix
correlation_matrix = data.corr()

In [62]:
# Extract the correlation of each attribute with 'MEDV'
correlation_with_medv = correlation_matrix['MEDV'].sort_values(ascending=False)

In [63]:
# Display the correlation of each attribute with 'MEDV'
print("Correlation of each attribute with MEDV (output price):")
print(correlation_with_medv)

Correlation of each attribute with MEDV (output price):
MEDV       1.000000
RM         0.695360
ZN         0.360445
B          0.333461
DIS        0.249929
CHAS       0.175260
AGE       -0.376955
RAD       -0.381626
CRIM      -0.388305
NOX       -0.427321
TAX       -0.468536
INDUS     -0.483725
PTRATIO   -0.507787
LSTAT     -0.737663
Name: MEDV, dtype: float64


In [64]:
# Find the attribute with the highest correlation with 'MEDV'
best_attribute = correlation_with_medv.idxmax()
best_correlation = correlation_with_medv.max()

In [65]:
print(f"\nThe attribute that best follows the linear relationship with MEDV is '{best_attribute}' with a correlation of {best_correlation:.2f}.")


The attribute that best follows the linear relationship with MEDV is 'MEDV' with a correlation of 1.00.


In [66]:
# Define the feature matrix (X) and the response vector (y)
X = data.drop('MEDV', axis=1).values  # Assuming 'MEDV' is the target variable
y = data['MEDV'].values

In [67]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=1)

In [68]:
# Normalize the feature matrix X using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [69]:
# Add a column of ones for the intercept term
X_train_b = np.c_[np.ones((X_train_scaled.shape[0], 1)), X_train_scaled]
X_test_b = np.c_[np.ones((X_test_scaled.shape[0], 1)), X_test_scaled]

In [70]:
# Compute the closed-form solution using the normal equation
theta_analytic = np.linalg.inv(X_train_b.T.dot(X_train_b)).dot(X_train_b.T).dot(y_train)

In [71]:
# calculating gradient descent(Full-Batch)
# Set parameters for gradient descent
learning_rate = 0.001  # Lower learning rate to ensure convergence
n_iterations = 1000
m = X_train_b.shape[0]

In [72]:
# Initialize theta (coefficients) with zeros
theta_full_batch = np.zeros(X_train_b.shape[1])

In [73]:
# Perform full-batch gradient descent
for iteration in range(n_iterations):
    gradients = 2/m * X_train_b.T.dot(X_train_b.dot(theta_full_batch) - y_train)
    theta_full_batch -= learning_rate * gradients

In [74]:
# Initialize the SGDRegressor with a lower learning rate
sgd_reg = SGDRegressor(max_iter=1000, tol=1e-3)

In [75]:
# Fit the model to the standardized training data
sgd_reg.fit(X_train_scaled, y_train)

In [76]:
# Get the coefficients (including the intercept)
theta_sgd = np.concatenate(([sgd_reg.intercept_[0]], sgd_reg.coef_))

In [77]:
# Creating columns and comparing results
# Comparison of results
df = pd.DataFrame({
    "Analytic Solution (Closed-Form)": theta_analytic,
    "Full-Batch Gradient Descent": theta_full_batch,
    "Stochastic Gradient Descent": theta_sgd
})

In [78]:
# Print the DataFrame
print("\nComparison of coefficients:")
print(df)


Comparison of coefficients:
    Analytic Solution (Closed-Form)  Full-Batch Gradient Descent  \
0                         22.112541                    19.125921   
1                         -0.796972                    -0.564164   
2                          1.593507                     0.630986   
3                          0.344417                    -0.456908   
4                          0.590303                     0.805394   
5                         -2.062413                    -0.638800   
6                          2.442161                     2.900539   
7                          0.058554                    -0.268819   
8                         -2.924856                    -1.017952   
9                          2.515060                     0.369254   
10                        -2.063689                    -0.265583   
11                        -1.819724                    -1.555737   
12                         0.858254                     0.781418   
13                 