## Gradient Descent

In [14]:
import numpy as np
import pandas as pd

In [15]:
df = pd.read_csv('housing_price_dataset.csv')

In [16]:
df.head()

Unnamed: 0,SquareFeet,Bedrooms,Bathrooms,Neighborhood,YearBuilt,Price
0,2126,4,1,Rural,1969,215355.283618
1,2459,3,2,Rural,1980,195014.221626
2,1860,2,1,Suburb,1970,306891.012076
3,2294,2,1,Urban,1996,206786.787153
4,2130,5,2,Suburb,2001,272436.239065


In [17]:
df.shape

(50000, 6)

In [18]:
from sklearn import preprocessing as pp
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer

In [19]:
xtrain,xtest,ytrain,ytest = train_test_split(df.drop(columns='Price'),df['Price'],test_size=0.2,random_state=42)
xtrain.shape, xtest.shape, ytrain.shape, ytest.shape

((40000, 5), (10000, 5), (40000,), (10000,))

In [20]:
trf = ColumnTransformer(sparse_threshold=False, transformers= [
    ('num',pp.StandardScaler(),['SquareFeet','Bedrooms','Bathrooms','YearBuilt']),
    ('cat',pp.OneHotEncoder(),['Neighborhood']),
     ],remainder='passthrough')

In [21]:
xtrain = trf.fit_transform(xtrain)
xtest = trf.transform(xtest)

In [22]:
xtrain.shape, ytrain.shape

((40000, 7), (40000,))

In [23]:
class GDRegressor : 
    def __init__(self, learning_rate, epochs) :
        self.lr = learning_rate
        self.epochs = epochs
        self.m =None
        self.b = 0
        
    def fit(self, X,y):
        X = np.array(X)
        y = np.array(y)
        
        n_samples, n_features = X.shape
        self.m = np.zeros(n_features)
        self.bias = 0
        
        # Calculate b and m using GD 
        for i in range(self.epochs) :
            y_pred = np.dot(X, self.m) + self.bias
            error = y_pred - y
            
            
            db = (-2/n_samples) * np.sum(error)
            dm = (-2/n_samples) * np.dot(X.T, error)
            
           
            self.b -= (self.lr * db)
            self.m -= (self.lr * dm)
        print(f"b: {self.b}, m: {self.m}")
    
    def predict(self, X) :
        y_pred = np.dot(X, self.m) + self.bias
        return y_pred
           

In [24]:
gd = GDRegressor(learning_rate=0.01, epochs=5000)
gd.fit(xtrain, ytrain)


b: 9.075156399295958e+42, m: [-6.04205122e+47  1.97681691e+46  5.00589829e+46  1.89451327e+47
  2.26151783e+45  2.76070526e+45 -5.01314793e+45]


In [25]:
ypred = gd.predict(xtest)


In [26]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(xtrain, ytrain)
ypred_lr = lr.predict(xtest)

In [27]:
print(lr.coef_, lr.intercept_)
print("GD MSE: ", mean_squared_error(ytest, ypred_lr))

[57145.59775123  5831.92112322  2422.27156096  -118.06462444
  -273.55711806  -869.08798284  1142.6451009 ] 224797.2388804618
GD MSE:  2436249371.3072467


In [28]:
print("GD MSE: ", mean_squared_error(ytest, ypred))

GD MSE:  4.0394391678878184e+95


In [29]:
r2_score(ytest, ypred_lr)

0.5755628630306235

In [30]:
r2_score(ytest, ypred)

-7.037407646247245e+85