## Gradient Descent

In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('housing_price_dataset.csv')

In [3]:
df.head()

Unnamed: 0,SquareFeet,Bedrooms,Bathrooms,Neighborhood,YearBuilt,Price
0,2126,4,1,Rural,1969,215355.283618
1,2459,3,2,Rural,1980,195014.221626
2,1860,2,1,Suburb,1970,306891.012076
3,2294,2,1,Urban,1996,206786.787153
4,2130,5,2,Suburb,2001,272436.239065


In [4]:
df.shape

(50000, 6)

In [90]:
from sklearn.datasets import load_diabetes

X_dia, y_dia = load_diabetes(return_X_y=True)

X_dia.shape, y_dia.shape

((442, 10), (442,))

In [5]:
from sklearn import preprocessing as pp
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer

In [6]:
xtrain,xtest,ytrain,ytest = train_test_split(df.drop(columns='Price'),df['Price'],test_size=0.2,random_state=42)
xtrain.shape, xtest.shape, ytrain.shape, ytest.shape

((40000, 5), (10000, 5), (40000,), (10000,))

In [7]:
trf = ColumnTransformer(sparse_threshold=False, transformers= [
    ('num',pp.StandardScaler(),['SquareFeet','Bedrooms','Bathrooms','YearBuilt']),
    ('cat',pp.OneHotEncoder(),['Neighborhood']),
     ],remainder='passthrough')

In [8]:
xtrain = trf.fit_transform(xtrain)
xtest = trf.transform(xtest)

In [9]:
xtrain.shape, ytrain.shape

((40000, 7), (40000,))

In [30]:
class GDRegressor : 
    def __init__(self, learning_rate=0.01, epochs=100) :
        self.lr = learning_rate
        self.epochs = epochs
        self.m =None
        self.b = None
        
    def fit(self, X,y):
        #X = np.array(X)
       # y = np.array(y)
        
        n_samples, n_features = X.shape
        self.m = np.zeros(n_features)
        self.b = 0
        
        # Calculate b and m using GD 
        for i in range(self.epochs) :
            y_pred = np.dot(X, self.m) + self.b
            error = y_pred - y
            
            
            db = (-2/X.shape[0]) * np.sum(error)
           # dm = (-2/n_samples) * np.dot(X.T, error)
            dm = -2 * np.dot(error,X)/X.shape[0]
            
           
            self.b -= (self.lr * db)
            self.m -= (self.lr * dm)
        print(f"b: {self.b}, m: {self.m}")
    
    def predict(self, X) :
        y_pred = np.dot(X, self.m) + self.bias
        return y_pred
           

In [None]:
gd = GDRegressor(learning_rate=0.00001, epochs=10000)
gd.fit(xtrain, ytrain)

b: -51523.24514398115, m: [-12652.23538594  -1277.76838975   -523.19152995     68.17053328
 -17126.28393644 -17125.34414474 -17271.6170628 ]


In [12]:
ypred = gd.predict(xtest)


In [13]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(xtrain, ytrain)
ypred_lr = lr.predict(xtest)

In [14]:
print(lr.coef_, lr.intercept_)
print("GD MSE: ", mean_squared_error(ytest, ypred_lr))

[57145.59775123  5831.92112322  2422.27156096  -118.06462444
  -273.55711806  -869.08798284  1142.6451009 ] 224797.2388804618
GD MSE:  2436249371.3072467


In [15]:
print("GD MSE: ", mean_squared_error(ytest, ypred))

GD MSE:  4.0394391678878184e+95


In [16]:
r2_score(ytest, ypred_lr)

0.5755628630306235

In [17]:
r2_score(ytest, ypred)

-7.037407646247245e+85

In [91]:
Xtrain,Xtest,Ytrain,Ytest = train_test_split(X_dia,y_dia,test_size=0.2,random_state=42)
Xtrain.shape, Ytrain.shape, Xtest.shape, Ytest.shape                        

((353, 10), (353,), (89, 10), (89,))

In [105]:
class GDRegressors : 
    def __init__(self, learning_rate=0.01, epochs=100) :
        self.lr = learning_rate
        self.epochs = epochs
        self.m =None
        self.b = None
        
    def fit(self, Xtrain,Ytrain):
        #X = np.array(X)
        #y = np.array(y)
        
        n_samples, n_features = Xtrain.shape
        self.m = np.zeros(n_features)
        self.b = 0
        
        # Calculate b and m using GD 
        for i in range(self.epochs) :
            y_pred = np.dot(Xtrain, self.m) + self.b
            error = Ytrain - y_pred
            
            
            db = -2 * np.mean(error)
           # dm = (-2/n_samples) * np.dot(X.T, error)
            dm = -2 * np.dot(error,Xtrain)/Xtrain.shape[0]
            
           
            self.b -= (self.lr * db)
            self.m -= (self.lr * dm)
        print(f"b: {self.b}, m: {self.m}")
    
    def predict(self, X) :
        y_pred = np.dot(X, self.m) + self.b
        return y_pred
           

In [139]:
gds = GDRegressors(learning_rate=0.9, epochs=9000)
gds.fit(Xtrain,Ytrain)
ypred = gds.predict(Xtest)
r2_score(Ytest, ypred)

b: 151.31402329206674, m: [  41.36223135 -240.64461079  554.41800146  343.88293446 -312.36052847
   32.80752815 -114.84803922  189.03431081  499.18721216   53.09348668]


0.4552040165791882