In [4]:
import numpy as np

In [3]:
SGDRegressor()

<__main__.SGDRegressor at 0x1a332f8a390>

In [8]:
y = [ [1,2], [4,5,6]]

np.shape(y)

#Must be same shape

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part.

In [9]:
y = [ [1,2,3], [4,5,6]]

np.shape(y)

#There are 2 lists, each lists has 3 items within

(2, 3)

In [29]:
a = np.arange(7)

print(a)

[0 1 2 3 4 5 6]


In [28]:
x = np.array([[1, 2], [3, 4]])
y = np.array([[5, 6]])
a = np.concatenate((x, y), axis=0)

# def fit(self, X, y):
#     m, n = X.shape
#     return m

print(x)
print()
print(y)
print()
print(a)


[[1 2]
 [3 4]]

[[5 6]]

[[1 2]
 [3 4]
 [5 6]]


# Main Code

In [26]:
class SGDRegressor:
    def __init__(self, learning_rate=0.01, epochs=100, batch_size=1, reg=None, reg_param=0.0):
        """
        Constructor for the SGDRegressor.

        Parameters:
        learning_rate (float): The step size used in each update.
        epochs (int): Number of passes over the training dataset.
        batch_size (int): Number of samples to be used in each batch.
        reg (str): Type of regularization ('l1' or 'l2'); None if no regularization.
        reg_param (float): Regularization parameter.

        The weights and bias are initialized as None and will be set during the fit method.
        """
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.batch_size = batch_size
        self.reg = reg
        self.reg_param = reg_param
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        """
        Fits the SGDRegressor to the training data.

        Parameters:
        X (numpy.ndarray): Training data, shape (m_samples, n_features).
        y (numpy.ndarray): Target values, shape (m_samples,).

        This method initializes the weights and bias, and then updates them over a number of epochs.
        """
        m, n = X.shape  # m is number of samples, n is number of features
        self.weights = np.zeros(n)
        self.bias = 0

        for _ in range(self.epochs):
            indices = np.random.permutation(m)
            X_shuffled = X[indices]
            y_shuffled = y[indices]

            for i in range(0, m, self.batch_size):
                X_batch = X_shuffled[i:i+self.batch_size]
                y_batch = y_shuffled[i:i+self.batch_size]

                gradient_w = -2 * np.dot(X_batch.T, (y_batch - np.dot(X_batch, self.weights) - self.bias)) / self.batch_size
                gradient_b = -2 * np.sum(y_batch - np.dot(X_batch, self.weights) - self.bias) / self.batch_size

                if self.reg == 'l1':
                    gradient_w += self.reg_param * np.sign(self.weights)
                elif self.reg == 'l2':
                    gradient_w += self.reg_param * self.weights

                self.weights -= self.learning_rate * gradient_w
                self.bias -= self.learning_rate * gradient_b

    def predict(self, X):
        """
        Predicts the target values using the linear model.

        Parameters:
        X (numpy.ndarray): Data for which to predict target values.

        Returns:
        numpy.ndarray: Predicted target values.
        """
        return np.dot(X, self.weights) + self.bias

    def compute_loss(self, X, y):
        """
        Computes the loss of the model.

        Parameters:
        X (numpy.ndarray): The input data.
        y (numpy.ndarray): The true target values.

        Returns:
        float: The computed loss value.
        """
        return (np.mean((y - self.predict(X)) ** 2) + self._get_regularization_loss()) ** 0.5

    def _get_regularization_loss(self):
        """
        Computes the regularization loss based on the regularization type.

        Returns:
        float: The regularization loss.
        """
        if self.reg == 'l1':
            return self.reg_param * np.sum(np.abs(self.weights))
        elif self.reg == 'l2':
            return self.reg_param * np.sum(self.weights ** 2)
        else:
            return 0

    def get_weights(self):
        """
        Returns the weights of the model.

        Returns:
        numpy.ndarray: The weights of the linear model.
        """
        return self.weights

## Step 1: Initialization 

```learning_rate``` - The step size used in updating the model (e.g. 0.01)

```epochs``` - The number of passes over the entire dataset.

```batch_size``` - The number of samples used in each batch for SGD

```reg``` - The type of regularization (either *l1* or *l2* ; `None` if no regularization is used).

`reg_param` - The regularization parameter.

`weights` and  `bias` are set to `None` initially and will be initialized in the `fit` method.
 

In [2]:
    def __init__(self, learning_rate=0.01, epochs=100, batch_size=1, reg=None, reg_param=0.0):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.batch_size = batch_size
        self.reg = reg
        self.reg_param = reg_param
        self.weights = None
        self.bias = None

## Step 2: Fit the Model

`fit` takes in the arguement `X` , `y` and does the following:

1. Starts initializing `weights` as a zero vector of the number of features (or length `n`)

2. Initializes the `bias` to zero
3. Takes array of `X` and uses `numpy.shape`.
    Ex.

In [None]:
def fit(self, X, y):
    m, n = X.shape  # m is number of samples, n is number of features
    self.weights = np.zeros(n)
    self.bias = 0

    for _ in range(self.epochs):
        indices = np.random.permutation(m)
        X_shuffled = X[indices]
        y_shuffled = y[indices]

        for i in range(0, m, self.batch_size):
            X_batch = X_shuffled[i:i+self.batch_size]
            y_batch = y_shuffled[i:i+self.batch_size]

            gradient_w = -2 * np.dot(X_batch.T, (y_batch - np.dot(X_batch, self.weights) - self.bias)) / self.batch_size
            gradient_b = -2 * np.sum(y_batch - np.dot(X_batch, self.weights) - self.bias) / self.batch_size

            if self.reg == 'l1':
                gradient_w += self.reg_param * np.sign(self.weights)
            elif self.reg == 'l2':
                gradient_w += self.reg_param * self.weights

            self.weights -= self.learning_rate * gradient_w
            self.bias -= self.learning_rate * gradient_b

## Step 3: Random Selection and Batches


In [None]:
for _ in range(self.epochs):
    indices = np.random.permutation(m)
    X_shuffled = X[indices]
    y_shuffled = y[indices]

## Step 4: Compute the Gradient, Update the Parameters

In [None]:
gradient_w = -2 * np.dot(X_batch.T, (y_batch - np.dot(X_batch, self.weights) - self.bias)) / self.batch_size
gradient_b = -2 * np.sum(y_batch - np.dot(X_batch, self.weights) - self.bias) / self.batch_size

## Step 5: Repeat steps until converge

According to Stanford Prof. Andrew Ng, SGD rarely converges due to it oscillating frequently -- tester should stop when 1.) it is going to a negative value or 2.) decreasing.

In [32]:
def predict(self, X):

        return np.dot(X, self.weights) + self.bias