In [1]:
#Project Description

In [2]:
#Action items

In [3]:
'''
Sample code
'''
import time
import numpy as np
from sklearn.linear_model import SGDRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [4]:
def stochastic_gradient_descent(feature_array, target_array, to_predict, learn_rate_type="invscaling"):
    """ Computes Ordinary Least SquaresLinear Regression with Stochastic Gradient Descent as the optimization algorithm.
        :param feature_array: array with all feature vectors used to train the model
        :param target_array: array with all target vectors used to train the model
        :param to_predict: feature vector that is not contained in the training set. Used to make a new prediction
        :param learn_rate_type: algorithm used to set the learning rate at each iteration.
        :return: Predicted cooking time for the vector to_predict and the R-squared of the model.
"""    # Pipeline of transformations to apply to an estimator. First applies Standard Scaling to the feature array.
    # Then, when the model is fitting the data it runs Stochastic Gradient Descent as the optimization algorithm.
    # The estimator is always the last element.
    
    start_time = time.time()
    linear_regression_pipeline = make_pipeline(StandardScaler(), SGDRegressor(learning_rate=learn_rate_type))
    
    linear_regression_pipeline.fit(feature_array, target_array)
    stop_time = time.time()
     
    print("Total runtime: %.6fs" % (stop_time - start_time))
    print("Algorithm used to set the learning rate: " + learn_rate_type)
    print("Model Coeffiecients: " + str(linear_regression_pipeline[1].coef_))
    print("Number of iterations: " + str(linear_regression_pipeline[1].n_iter_))    # Make a prediction for a feature vector not in the training set
    prediction = np.round(linear_regression_pipeline.predict(to_predict), 0)[0]
    print("Predicted cooking time: " + str(prediction) + " minutes")    
    r_squared = np.round(linear_regression_pipeline.score(feature_array, target_array).reshape(-1, 1)[0][0], 2)
    print("R-squared: " + str(r_squared))
    


In [5]:
feature_array = [[500, 80, 30, 10],
                 [550, 75, 25, 0],
                 [475, 90, 35, 20],
                 [450, 80, 20,25],
                 [465, 75, 30, 0],
                 [525, 65, 40, 15],
                 [400, 85, 33, 0],
                 [500, 60, 30, 30],
                 [435, 45, 25, 0]]

In [6]:
target_array = [17, 11, 21, 23, 22, 15, 25, 18, 16]

In [7]:
to_predict = [[510, 50, 35, 10]]


In [8]:
stochastic_gradient_descent(feature_array, target_array, to_predict)

Total runtime: 0.001998s
Algorithm used to set the learning rate: invscaling
Model Coeffiecients: [-3.44034236  1.64723444  0.28599174  1.10821407]
Number of iterations: 249
Predicted cooking time: 13.0 minutes
R-squared: 0.9


In [9]:
stochastic_gradient_descent(feature_array, target_array, to_predict)

Total runtime: 0.002000s
Algorithm used to set the learning rate: invscaling
Model Coeffiecients: [-3.44226981  1.64672679  0.2866046   1.10902962]
Number of iterations: 248
Predicted cooking time: 13.0 minutes
R-squared: 0.9


In [10]:
stochastic_gradient_descent(feature_array, target_array, to_predict, learn_rate_type="adaptive")

Total runtime: 0.001999s
Algorithm used to set the learning rate: adaptive
Model Coeffiecients: [-3.49884884  1.64454487  0.3091154   1.15922034]
Number of iterations: 97
Predicted cooking time: 13.0 minutes
R-squared: 0.91


'''
With the limitations of Gradient Descent in mind, Stochastic Gradient Descent emerged as a way to tackle performance issues and speed up the convergence in large datasets.

Stochastic Gradient Descent is a probabilistic approximation of Gradient Descent. It is an approximation because, at each step, the algorithm calculates the gradient for one observation picked at random, instead of calculating the gradient for the entire dataset.
'''