<a href="https://colab.research.google.com/github/Abhiram-108/CSE-LAB-303/blob/main/gradientdescent2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt






In [None]:
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
housing_data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
housing_target = raw_df.values[1::2, 2]

In [None]:
boston_df = pd.DataFrame(data=housing_data, columns=['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM',
                                                     'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT'])
boston_df['PRICE'] = housing_target


In [None]:

correlation_matrix = boston_df.corr()
print("Correlation Matrix:\n", correlation_matrix['PRICE'])

In [None]:
best_feature = correlation_matrix['PRICE'].idxmax(axis=0)
print(f"The attribute with the highest correlation with PRICE is: {best_feature}")

In [None]:
X_feature = boston_df[[best_feature]].values
y_price = boston_df['PRICE'].values
X_train, X_test, y_train, y_test = train_test_split(X_feature, y_price, test_size=0.4, random_state=1)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:

X_train_mean = np.mean(X_train_scaled)
y_train_mean = np.mean(y_train)
numerator = np.sum((X_train_scaled - X_train_mean) * (y_train - y_train_mean))
denominator = np.sum((X_train_scaled - X_train_mean) ** 2)
slope_analytic = numerator / denominator
intercept_analytic = y_train_mean - slope_analytic * X_train_mean
y_pred_analytic = intercept_analytic + slope_analytic * X_test_scaled

In [None]:
sse_analytic = np.sum((y_test - y_pred_analytic) ** 2)
sst_total = np.sum((y_test - np.mean(y_test)) ** 2)
r2_analytic = 1 - sse_analytic / sst_total

In [None]:
print("Analytic Solution:")
print(f"Intercept (b0): {intercept_analytic:.2f}")
print(f"Slope (b1): {slope_analytic:.2f}")
print(f"SSE: {sse_analytic:.2f}")
print(f"R^2: {r2_analytic:.2f}")

In [None]:
intercept_gd = 0.0
slope_gd = 0.0
alpha = 0.001
epochs = 1000

for epoch in range(epochs):
    y_pred_gd = intercept_gd + slope_gd * X_train_scaled
    error = y_pred_gd - y_train
    intercept_gd -= alpha * (1 / len(y_train)) * np.sum(error)
    slope_gd -= alpha * (1 / len(y_train)) * np.sum(error * X_train_scaled)

y_pred_gd_test = intercept_gd + slope_gd * X_test_scaled


In [None]:
sse_gd = np.sum((y_test - y_pred_gd_test) ** 2)
r2_gd = 1 - sse_gd / sst_total

In [None]:


print("\nFull-batch Gradient Descent Solution:")
print(f"Intercept (b0_gd): {intercept_gd:.2f}")
print(f"Slope (b1_gd): {slope_gd:.2f}")
print(f"SSE: {sse_gd:.2f}")
print(f"R^2: {r2_gd:.2f}")

In [None]:
intercept_sgd = 0.0
slope_sgd = 0.0
alpha = 0.001
epochs = 1000

In [None]:

for epoch in range(epochs):
    for i in range(len(X_train_scaled)):
        y_pred_sgd = intercept_sgd + slope_sgd * X_train_scaled[i]
        error = y_pred_sgd - y_train[i]
        intercept_sgd -= alpha * error
        slope_sgd -= alpha * error * X_train_scaled[i]

In [None]:
y_pred_sgd_test = intercept_sgd + slope_sgd * X_test_scaled


In [None]:
sse_sgd = np.sum((y_test - y_pred_sgd_test) ** 2)
r2_sgd = 1 - sse_sgd / sst_total

In [None]:

print("\nStochastic Gradient Descent Solution:")
print(f"Intercept (b0_sgd): {intercept_sgd:.2f}")
print(f"Slope (b1_sgd): {slope_sgd:.2f}")
print(f"SSE: {sse_sgd:.2f}")
print(f"R^2: {r2_sgd:.2f}")


plt.scatter(X_test_scaled, y_test, color='purple', label='Test Data')
plt.plot(X_test_scaled, y_pred_analytic, color='orange', label='Analytic Solution')
plt.plot(X_test_scaled, y_pred_gd_test, color='cyan', linestyle='--', label='Full-batch Gradient Descent Solution')
plt.plot(X_test_scaled, y_pred_sgd_test, color='green', linestyle='-.', label='Stochastic Gradient Descent Solution')
plt.xlabel(best_feature)
plt.ylabel('PRICE')
plt.legend()
plt.title('Linear Regression: Analytic vs. Gradient Descent (Full-batch & Stochastic)')
plt.show()