# Stochastic Gradient Descent - Linear Regression

## Data for the linear regression model

In [2]:
import numpy as np

In [3]:
# Data points
data_amount = 15
max_num = 10
X = np.random.randint(max_num, size=(data_amount, 3))

# We generate them by "knowing" the output weights for this example (this is not the case for real data!)
final_weights = np.random.rand(X.shape[1])
final_weights = final_weights / np.sum(final_weights)

final_bias = 0.2

# Corresponding labels
random_noise = np.random.rand(X.shape[0]) / 7.5 # ranges from 0-1. We divide that by 7.5 to not get to much noise in here
y = np.dot(final_weights, X.T) + final_bias + random_noise

#print('data set X\n', X)
#print('labels y\n', y)

# Training and test data

In [4]:
train_len = int(data_amount * 0.75)

# We train with the following data
X_train = X[:train_len]
y_train = y[:train_len]

# We test / evaluate with the following data
X_test = X[train_len:]
y_test = y[train_len:]

## Information about the model

In [5]:
# We set the inital weights randomly
weights = np.random.rand(X.shape[1])

# The bias value is set to 1 initially
bias = np.array([1])

### Some more information

We know the regression equation:

$y_{pred}= w_1x_1 + w_2x_2 + \ldots + w_nx_n + b$

In [6]:
# What are the current results of the untrained model?
y_untrained = np.dot(weights, X_test.T) + bias
print('Outputs for our untrained model:', y_untrained)

# What are the results of the final model (that we want to achieve by updating the weights by the Stochastic gradient descent method)
y_final = np.dot(final_weights, X_test.T) + final_bias
print('Outputs for the final model:', y_final)

Outputs for our untrained model: [ 6.17202369 13.93490165  7.06279375  8.89593029]
Outputs for the final model: [6.15062032 6.60612773 2.3166546  4.77131621]


### Loss function

We want to use the mean squarred error to calculate the loss for the model outputs which is defined as follows:

$$MSE = \frac{1}{n}\sum_{i=1}^n (y_i-y_{i_{pred}})^2$$

In [7]:
mse = lambda y, y_pred: np.mean(np.sum((y-y_pred)**2))

In [8]:
# In our example the loss for our untrained model is:
loss_untrained = mse(y_test, y_untrained)
print('The loss of the untrained model is:', loss_untrained)

# Loss for the final model
loss_final = mse(y_test, y_final)
print('The loss of the final model is:', loss_final)

The loss of the untrained model is: 91.37580517566529
The loss of the final model is: 0.01966863103431316


## Your stochastic gradient descent implementation to optimize the weights of your model

In [9]:
# Summary on what we know so far:

# We know the loss function: Variable 'mse' (Mean squared error)
# We know the initial weights that we want to optimize: variable 'weights'
# We know the initial bias value: variable 'bias'

In [25]:
import numpy as np

class LinearRegression:
    def __init__(self, learning_rate=0.005, iterations=1000):
        self.learning_rate = learning_rate
        self.iterations = iterations
        self.weights = None
        self.bias = None

    def fit(self, x, y):
        n_samples, n_features = x.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for i in range(self.iterations):
            # y_pred = (w1.x1) + b
            y_pred = np.dot(x, self.weights) + self.bias

            # gradient in terms of weight
            dw = (1/n_samples) * np.dot(x.T, (y_pred-y))
            db = (1/n_samples) * np.sum(y_pred-y)

            self.weights= self.weights - self.learning_rate * dw
            self.bias = self.bias - self.learning_rate * db

    def predict(self, X):
        y_pred = np.dot(X, self.weights) + self.bias
        return y_pred


reg = LinearRegression()
reg.fit(X_train, y_train)
predictions = reg.predict(X_test)

print(predictions)
print(y_test)

[6.21255851 6.68838397 2.27466833 4.78833736]
[6.22384471 6.63565508 2.40487691 4.84649453]


## Compare the results with the Test data

In [32]:
def mse(y_test, predictions):
    return np.mean((y_test-predictions)**2)

mse = mse(y_test, predictions)
print(f"The difference between test and the predicted value = {mse}")

The difference between test and the predicted value = 0.005811060839403353
