In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.base import BaseEstimator
from sklearn.metrics import mean_squared_error, log_loss, roc_auc_score
from sklearn.model_selection import train_test_split
%matplotlib inline
from matplotlib import pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler

In [2]:
class SGDRegressor(BaseEstimator):
    def __init__(self, eta=1e-3, n_epochs=3):
        self.eta = eta
        self.n_epochs = n_epochs
        self.mse_ = []
        self.weights_ = []

    def fit(self, X, y):
        # add a column of ones to the left from X
        X = np.hstack([np.ones([X.shape[0], 1]), X])

        # initialize w with zeros, (d + 1)-dimensional (2-dimensional)
        w = np.zeros(X.shape[1])

        for it in tqdm(range(self.n_epochs)):
            for i in range(X.shape[0]):

                # new_w is used for simultanious updates of w_0, w_1, ..., w_d
                new_w = w.copy()
                # special (simpler) formula for w_0
                new_w[0] += self.eta * (y[i] - w.dot(X[i, :]))
                for j in range(1, X.shape[1]):
                    new_w[j] += self.eta * (y[i] - w.dot(X[i, :])) * X[i, j]
                w = new_w.copy()

                # store the current weight vector
                self.weights_.append(w)
                # store current loss function
                self.mse_.append(mean_squared_error(y, X.dot(w)))
        # the "best" vector of weights
        self.w_ = self.weights_[np.argmin(self.mse_)]

        return self

    def predict(self, X):
        # add a column of ones to the left from X
        X = np.hstack([np.ones([X.shape[0], 1]), X])
        # linear prediction
        return X.dot(self.w_)

In [None]:
data_demo = pd.read_csv('../datasets/weights_heights.csv')