# Bock 3: Gradient Descent

In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns

In [27]:
# load dataset
df = pd.read_json('../data/cars.json')

# Filter dataframe
required_cols = ['Miles_per_Gallon', 'Cylinders', 'Displacement', 'Horsepower', 'Weight_in_lbs', 'Acceleration', 'Origin']

# only include rows where ALL columns are not nan
ix_included = np.sum(pd.isna(df[required_cols]), axis=1) == 0

# exclude examples with no horsepower or mpg
print("Before: ", df.shape)
df = df[ix_included]
print("After: ", df.shape)

Before:  (406, 9)
After:  (392, 9)


In [28]:
def optimize(x, y, eta, steps):

    # z-score
    x = (x - np.mean(x)) / np.std(x)

    # randomly initialize solution
    beta0, beta1 = np.random.randn(2)

    # iterate for steps
    history = []

    for i in range(steps):
        # compute model predictions
        yhat = x * beta1 + beta0
        mse = np.mean(np.square(yhat - y))
        history.append([(beta0, beta1), mse])

        # compute gradient at those predictions
        beta0_grad = np.mean(2 * (yhat - y))
        beta1_grad = np.mean(2 * (yhat - y) * x)
        
        # update solution
        beta0 = beta0 - eta * beta0_grad
        beta1 = beta1 - eta * beta1_grad
        
    return history 

history = optimize(df.Horsepower.to_numpy(), df.Miles_per_Gallon.to_numpy(), 0.1, 100)
final_p, final_mse = history[-1]
print(final_p)
print(final_mse)

(np.float64(23.445918361194206), np.float64(-6.067872401517799))
23.943662938603108


## Other Input Features

**WARNING**: Do not do this in real world; you must not select the best feature based on the whole dataset. This is just for illustration purposes.

In [29]:
features = ['Cylinders', 'Displacement', 'Horsepower', 'Weight_in_lbs', 'Acceleration']
for feature in features:
    history = optimize(df[feature].to_numpy(), df.Miles_per_Gallon.to_numpy(), 0.1, 100)
    final_p, final_mse = history[-1]
    print(f"Using {feature}, the MSE is {final_mse}")


Using Cylinders, the MSE is 24.02017956815553
Using Displacement, the MSE is 21.37454494009465
Using Horsepower, the MSE is 23.943662938603108
Using Weight_in_lbs, the MSE is 18.6766165974193
Using Acceleration, the MSE is 49.87362732665226
