<a href="https://colab.research.google.com/github/Tejaswidarsi/ML-3-Simple_linear/blob/main/Ass_1_BostonHousing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Import Libraries

In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load dataset
dataset = "/content/BostonHousing.csv"
boston = pd.read_csv(dataset)

# Inspect the dataset
print(boston.head())
print(boston.info())



      crim    zn  indus  chas    nox     rm   age     dis  rad  tax  ptratio  \
0  0.00632  18.0   2.31     0  0.538  6.575  65.2  4.0900    1  296     15.3   
1  0.02731   0.0   7.07     0  0.469  6.421  78.9  4.9671    2  242     17.8   
2  0.02729   0.0   7.07     0  0.469  7.185  61.1  4.9671    2  242     17.8   
3  0.03237   0.0   2.18     0  0.458  6.998  45.8  6.0622    3  222     18.7   
4  0.06905   0.0   2.18     0  0.458  7.147  54.2  6.0622    3  222     18.7   

        b  lstat  medv  
0  396.90   4.98  24.0  
1  396.90   9.14  21.6  
2  392.83   4.03  34.7  
3  394.63   2.94  33.4  
4  396.90   5.33  36.2  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Data columns (total 14 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   crim     506 non-null    float64
 1   zn       506 non-null    float64
 2   indus    506 non-null    float64
 3   chas     506 non-null    int64  
 4   nox      506 non-null    float6

###Analyze the input attributes and find the one with the best linear relationship with the output price

In [11]:

# Correlation matrix to identify best linear attribute
correlation_matrix = boston.corr()
print(correlation_matrix['medv'].sort_values(ascending=False))

# Best correlated feature
best_feature = correlation_matrix['medv'].sort_values(ascending=False).index[1]
print(f"The feature that best follows a linear relationship with price is: {best_feature}")


medv       1.000000
rm         0.695360
zn         0.360445
b          0.333461
dis        0.249929
chas       0.175260
age       -0.376955
rad       -0.381626
crim      -0.388305
nox       -0.427321
tax       -0.468536
indus     -0.483725
ptratio   -0.507787
lstat     -0.737663
Name: medv, dtype: float64
The feature that best follows a linear relationship with price is: rm


###Implement the Analytic Formulation

In [12]:
# Preparing the input and output for regression
X = boston[[best_feature]].values
y = boston['medv'].values

# Add a column of ones to X for the intercept term
X_b = np.c_[np.ones((X.shape[0], 1)), X]

# Compute the closed-form solution (Normal equation)
theta_analytic = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)
print(f"Analytic solution coefficients: {theta_analytic}")


Analytic solution coefficients: [-34.67062078   9.10210898]


###Implement Gradient Descent (Full-batch)

In [13]:
# Full-batch gradient descent
def batch_gradient_descent(X, y, learning_rate=0.01, iterations=1000):
    m = len(y)
    X_b = np.c_[np.ones((X.shape[0], 1)), X]
    theta = np.zeros(2)
    for iteration in range(iterations):
        gradients = 2/m * X_b.T.dot(X_b.dot(theta) - y)
        theta = theta - learning_rate * gradients
    return theta

theta_batch = batch_gradient_descent(X, y)
print(f"Batch Gradient Descent coefficients: {theta_batch}")


Batch Gradient Descent coefficients: [-6.97094389  4.74757935]


###Implement Stochastic Gradient Descent

In [14]:
# Stochastic Gradient Descent
def stochastic_gradient_descent(X, y, learning_rate=0.01, epochs=50):
    m = len(y)
    X_b = np.c_[np.ones((X.shape[0], 1)), X]
    theta = np.zeros(2)
    for epoch in range(epochs):
        for i in range(m):
            random_index = np.random.randint(m)
            xi = X_b[random_index:random_index+1]
            yi = y[random_index:random_index+1]
            gradients = 2 * xi.T.dot(xi.dot(theta) - yi)
            theta = theta - learning_rate * gradients
    return theta

theta_stochastic = stochastic_gradient_descent(X, y)
print(f"Stochastic Gradient Descent coefficients: {theta_stochastic}")



Stochastic Gradient Descent coefficients: [-35.21856853   9.48847488]


###Compare the results

In [15]:
print(f"Analytic solution coefficients: {theta_analytic}")
print(f"Batch Gradient Descent coefficients: {theta_batch}")
print(f"Stochastic Gradient Descent coefficients: {theta_stochastic}")


Analytic solution coefficients: [-34.67062078   9.10210898]
Batch Gradient Descent coefficients: [-6.97094389  4.74757935]
Stochastic Gradient Descent coefficients: [-35.21856853   9.48847488]
