## Stochastic Gradient Descent

In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('housing_price_dataset.csv')

In [3]:
df.head()

Unnamed: 0,SquareFeet,Bedrooms,Bathrooms,Neighborhood,YearBuilt,Price
0,2126,4,1,Rural,1969,215355.283618
1,2459,3,2,Rural,1980,195014.221626
2,1860,2,1,Suburb,1970,306891.012076
3,2294,2,1,Urban,1996,206786.787153
4,2130,5,2,Suburb,2001,272436.239065


In [4]:
df.shape

(50000, 6)

In [5]:
from sklearn.datasets import load_diabetes

X_dia, y_dia = load_diabetes(return_X_y=True)

X_dia.shape, y_dia.shape

((442, 10), (442,))

In [6]:
from sklearn import preprocessing as pp
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer

In [7]:
xtrain,xtest,ytrain,ytest = train_test_split(df.drop(columns='Price'),df['Price'],test_size=0.2,random_state=42)
xtrain.shape, xtest.shape, ytrain.shape, ytest.shape

((40000, 5), (10000, 5), (40000,), (10000,))

In [8]:
trf = ColumnTransformer(sparse_threshold=False, transformers= [
    ('num',pp.StandardScaler(),['SquareFeet','Bedrooms','Bathrooms','YearBuilt']),
    ('cat',pp.OneHotEncoder(),['Neighborhood']),
     ],remainder='passthrough')

In [9]:
xtrain = trf.fit_transform(xtrain)
xtest = trf.transform(xtest)

In [10]:
xtrain.shape, ytrain.shape

((40000, 7), (40000,))

In [11]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(xtrain, ytrain)
ypred_lr = lr.predict(xtest)

In [12]:
Xtrain,Xtest,Ytrain,Ytest = train_test_split(X_dia,y_dia,test_size=0.2,random_state=42)
Xtrain.shape, Ytrain.shape, Xtest.shape, Ytest.shape                        

((353, 10), (353,), (89, 10), (89,))

In [13]:
class GDRegressors : 
    def __init__(self, learning_rate=0.01, epochs=100) :
        self.lr = learning_rate
        self.epochs = epochs
        self.m =None
        self.b = None
        
    def fit(self, Xtrain,Ytrain):
        #X = np.array(X)
        #y = np.array(y)
        
        n_samples, n_features = Xtrain.shape
        self.m = np.zeros(n_features)
        self.b = 0
        
        # Calculate b and m using GD 
        for i in range(self.epochs) :
            for j in range(n_samples) :
                
                random_index = np.random.randint(0, n_samples)
                #y_pred = np.dot(X[j], self.m) + self.b
                #error = y[j] - y_pred
                
                #db = -2 * error
                #dm = (-2/n_samples) * np.dot(X[j].T, error)
                
                #self.b -= (self.lr * db)
                #self.m -= (self.lr * dm)
                y_pred = np .dot(Xtrain[random_index], self.m) + self.b
                error = Ytrain[random_index] - y_pred
            
                db = -2 * error
            # dm = (-2/n_samples) * np.dot(X.T, error)
                dm = -2 * np.dot(error,Xtrain[random_index])
                
            
                self.b -= (self.lr * db)
                self.m -= (self.lr * dm)
    
    def predict(self, X) :
        y_pred = np.dot(X, self.m) + self.b
        return y_pred
           

In [19]:
gds = GDRegressors(learning_rate=0.01, epochs=70)
gds.fit(Xtrain,Ytrain)
ypred = gds.predict(Xtest)
r2_score(Ytest, ypred)

0.4560990171102167