# Stochastic Gradient Descent - Linear Regression

## Data for the linear regression model

In [38]:
import numpy as np
from sklearn.linear_model import SGDRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [39]:
# Data points
data_amount = 15
max_num = 10
X = np.random.randint(max_num, size=(data_amount, 3))

# We generate them by "knowing" the output weights for this example (this is not the case for real data!)
final_weights = np.random.rand(X.shape[1])
final_weights = final_weights / np.sum(final_weights)
#print(final_weights)

final_bias = 0.2

# Corresponding labels
random_noise = np.random.rand(X.shape[0]) / 7.5 # ranges from 0-1. We divide that by 7.5 to not get to much noise in here
y = np.dot(final_weights, X.T) + final_bias + random_noise

#print('data set X\n', X)
#print('labels y\n', y)

[0.40067454 0.47262097 0.12670449]
data set X
 [[5 3 3]
 [3 2 0]
 [6 8 8]
 [0 3 4]
 [0 0 9]
 [4 5 3]
 [6 3 0]
 [9 4 6]
 [7 5 7]
 [3 1 9]
 [0 5 2]
 [2 4 3]
 [6 6 4]
 [6 4 5]
 [1 5 6]]
labels y
 [4.03687902 2.39109502 7.52842956 2.22160481 1.3973937  4.60950238
 4.11109351 6.54301188 6.33446805 3.02737781 2.87385897 3.31139534
 5.9721888  5.22251394 3.74349917]


# Training and test data

In [40]:
train_len = int(data_amount * 0.75)

# We train with the following data
X_train = X[:train_len]
y_train = y[:train_len]

# We test / evaluate with the following data
X_test = X[train_len:]
y_test = y[train_len:]

#print(X_train)
#print(y_train)
#print(X_test)
#print(y_test)
#print(train_len)

[[5 3 3]
 [3 2 0]
 [6 8 8]
 [0 3 4]
 [0 0 9]
 [4 5 3]
 [6 3 0]
 [9 4 6]
 [7 5 7]
 [3 1 9]
 [0 5 2]]
[4.03687902 2.39109502 7.52842956 2.22160481 1.3973937  4.60950238
 4.11109351 6.54301188 6.33446805 3.02737781 2.87385897]
[[2 4 3]
 [6 6 4]
 [6 4 5]
 [1 5 6]]
[3.31139534 5.9721888  5.22251394 3.74349917]
11


## Information about the model

In [41]:
# We set the inital weights randomly
weights = np.random.rand(X.shape[1])

# The bias value is set to 1 initially
bias = np.array([1])
#print(weights)
#print(bias)

[0.18069874 0.62089145 0.47324743]
[1]


### Some more information

We know the regression equation:

$y_{pred}= w_1x_1 + w_2x_2 + \ldots + w_nx_n + b$

In [42]:
#print(X_test.T)
#print(weights)

[[2 6 6 1]
 [4 6 4 5]
 [3 4 5 6]]
[0.18069874 0.62089145 0.47324743]


In [43]:
# What are the current results of the untrained model?
y_untrained = np.dot(weights, X_test.T) + bias
#print('Outputs for our untrained model:', y_untrained)

# What are the results of the final model (that we want to achieve by updating the weights by the Stochastic gradient descent method)
y_final = np.dot(final_weights, X_test.T) + final_bias
#print('Outputs for the final model:', y_final)

Outputs for our untrained model: [5.26470558 7.70253086 6.93399538 7.12464059]
Outputs for the final model: [3.27194644 5.94659102 5.12805356 3.72400634]


### Loss function

We want to use the mean squarred error to calculate the loss for the model outputs which is defined as follows:

$$MSE = \frac{1}{n}\sum_{i=1}^n (y_i-y_{i_{pred}})^2$$

In [44]:
mse = lambda y, y_pred: np.mean(np.sum((y-y_pred)**2))
#print(y_test)
#print(y_untrained)

[3.31139534 5.9721888  5.22251394 3.74349917]
[5.26470558 7.70253086 6.93399538 7.12464059]


In [45]:
# In our example the loss for our untrained model is:
loss_untrained = mse(y_test, y_untrained)
#print('The loss of the untrained model is:', loss_untrained)

# Loss for the final model
loss_final = mse(y_test, y_final)
#print('The loss of the final model is:', loss_final)

The loss of the untrained model is: 21.170790532028754
The loss of the final model is: 0.011514195561751869


## Your stochastic gradient descent implementation to optimize the weights of your model

In [46]:
# Summary on what we know so far:

# We know the loss function: Variable 'mse' (Mean squared error)
# We know the initial weights that we want to optimize: variable 'weights'
# We know the initial bias value: variable 'bias'
#print("test for MSE:", mse(3,4))
#print(bias)

test for MSE: 1.0
[1]


In [63]:
# Use the training data to optimize the weights of the linear regression model
import random
# use these variables for your sgd implementation
learning_rate = 0.005
iterations = 1000

# YOUR CODE FOR THE STOCHASTIC GRADIENT DESCENT IMPLEMENTATION
class SGD:
    def __init__(self,y,z):
        self.iter = 1000
        self.learning_rate = 0.005
        self.bias = 1
        self.weights = y
        self.train_len = z
        self.sum = 0
        
        
    def fit(self,X_train,y_train):
        self.X_train = X_train
        self.y_train = y_train
        self.train = list(zip(self.X_train,self.y_train))
     
    def get_updated_y(self, x_value):
        predicted = self.bias
        for i in range(3):
            predicted += (self.weights[i] * x_value[i])
      
        return predicted
    
    def predict(self,X):
        for iterations in range(self.iter):
            for i in range(len(self.X_train[1])):    # 3 x values
                randint = random.randint(0,(train_len-1))
                test = self.train[randint]
                
                if i == 0:
                    # update bias here
                    a = 2 * ((self.get_updated_y(test[0])) - test[1])
                    step_size = self.learning_rate * a
                    self.bias = self.bias - step_size
                    continue
    
                
                y_new = self.get_updated_y(test[0])
                b = -2 * test[0][i] *(test[1] - y_new) # b = gradient, a = intercept/bias
                step_size = self.learning_rate * b
                self.weights[i] -= step_size

        
            

           
        #print("bias is",self.bias,".      ","weights are", self.weights)
        value = np.dot(self.weights, X.T) + self.bias
        
        return value
            
                
                
    
    
    
SGD1 = SGD(weights,train_len)
SGD1.fit(X_train,y_train)
#print(SGD1.predict(X_test[2]))
#print(y_test[2])
#print(mse(SGD1.predict(X_test[1]), y_test[0]))

bias is 0.4611143967264565 .       weights are [0.37262727 0.46325658 0.13920522]
5.24593044339942
5.222513942721454


## Compare the results with the Test data

In [11]:
from sklearn.linear_model import SGDRegressor
skSGD = SGDRegressor(learning_rate='constant',eta0=0.005)
skSGD.fit(X_train,y_train)
#print(skSGD.predict(X_test))
#print(y_test)
#print(X_test)
print("loaded SGD")

[4.52838931 3.01560947 2.41482485 3.65803938]
[4.65374608 3.08426877 2.48621388 3.74795434]
[[2 4 5]
 [1 4 2]
 [5 0 4]
 [3 0 7]]
