<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Load-in-Data-&amp;-Explore" data-toc-modified-id="Load-in-Data-&amp;-Explore-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Load in Data &amp; Explore</a></span><ul class="toc-item"><li><span><a href="#Arbitrarily-choosing-a-feature-and-target" data-toc-modified-id="Arbitrarily-choosing-a-feature-and-target-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Arbitrarily choosing a feature and target</a></span><ul class="toc-item"><li><span><a href="#Removing-Outliers" data-toc-modified-id="Removing-Outliers-1.1.1"><span class="toc-item-num">1.1.1&nbsp;&nbsp;</span>Removing Outliers</a></span></li></ul></li></ul></li><li><span><a href="#Our-Linear-Regression-Line" data-toc-modified-id="Our-Linear-Regression-Line-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Our Linear Regression Line</a></span><ul class="toc-item"><li><span><a href="#Gradient-descent-one-step-at-a-time" data-toc-modified-id="Gradient-descent-one-step-at-a-time-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Gradient descent one step at a time</a></span><ul class="toc-item"><li><span><a href="#First-Iteration" data-toc-modified-id="First-Iteration-2.1.1"><span class="toc-item-num">2.1.1&nbsp;&nbsp;</span>First Iteration</a></span></li><li><span><a href="#Second-Iteration" data-toc-modified-id="Second-Iteration-2.1.2"><span class="toc-item-num">2.1.2&nbsp;&nbsp;</span>Second Iteration</a></span></li><li><span><a href="#Third-Iteration" data-toc-modified-id="Third-Iteration-2.1.3"><span class="toc-item-num">2.1.3&nbsp;&nbsp;</span>Third Iteration</a></span></li></ul></li><li><span><a href="#Multiple-iterations" data-toc-modified-id="Multiple-iterations-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Multiple iterations</a></span></li></ul></li><li><span><a href="#Using-Scikit-Learn" data-toc-modified-id="Using-Scikit-Learn-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Using Scikit-Learn</a></span></li></ul></div>

Lesson adapted from Jeff Herman: https://github.com/sik-flow/Study_Groups/blob/master/Gradient_Descent.ipynb

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load in Data & Explore

In [None]:
# CSV from https://www.kaggle.com/c/house-prices-advanced-regression-techniques
df = pd.read_csv('data/house-prices-advanced-regression-techniques/train.csv')

In [None]:
df.head()

In [None]:
df.columns

## Arbitrarily choosing a feature and target

We'll focus on the `GrLivAre` vs the `SalePrice` (target)

In [None]:
x = df['GrLivArea']
y = df['SalePrice']

In [None]:
plt.scatter(x, y)
plt.show()

Looks like there are some extreme values that might affect our linear regression

### Removing Outliers

In [None]:
df[(df['GrLivArea'] > 4000) & (df['SalePrice'] < 300000)]

In [None]:
df.drop([523, 1298], inplace = True)
df.reset_index(inplace = True)

In [None]:
x = df['GrLivArea']
y = df['SalePrice']

In [None]:
plt.scatter(x, y)
plt.show()

# Our Linear Regression Line

In [None]:
def regression_formula(x, slope, intercept):
    return slope * x + intercept

In [None]:
my_slope = 0
my_intercept = 0

In [None]:
fig = plt.figure(figsize = (12, 8))
plt.scatter(x, y, label = 'Raw Data')
plt.plot(x, regression_formula(x, my_slope, my_intercept), color = 'k', label = 'Regression')
plt.legend()
plt.show()

In [None]:
def error(intercept, slope, x, y):
    totalError = 0
    for i in range(0, len(x)):
        totalError += (y[i] - (slope * x[i] + intercept)) ** 2
    return totalError / len(x)

In [None]:
error(my_intercept, my_slope, x, y)

## Gradient descent one step at a time

We need to find the gradient for each cost function (2-dimensions: b & m):

$$ \frac{\partial}{\partial b} (y_i - (b + ax_i))^2 = 2 \cdot (y_i-ax_i + b)$$

$$ \frac{\partial}{\partial a} (y_i - (b + ax_i))^2 = 2 \cdot x_i \cdot (y_i-ax_i + b)$$

In [None]:
def partial_deriv(b, a, x_i, y_i, respect_to):
    if respect_to == 'b': 
        return (y_i - (a * x_i + b))
    elif respect_to == 'a':
        return (x_i * (y_i - (a * x_i + b)))
    else:
        print('Choose either respect_to: a or b ')
        return 0       

In [None]:
def step_gradient(b, a, x, y, learning_rate):
    db = 0
    da = 0 
    # For each data point, update the derivative for the slope & intercept
    N = len(x)
    for i in range(len(x)):
        # Derivatives already pre-done
        
        # Partial derivatives of loss/cost function with respect to b & a
        db +=  -(2/N) * partial_deriv(b,a,x[i],y[i],respect_to='b')
        da +=  -(2/N) * partial_deriv(b,a,x[i],y[i],respect_to='a')
        
    # Adjust the slope & intercept by the gradient
    new_b = b - (learning_rate * db)
    new_a = a - (learning_rate * da)
    
    return (new_b, new_a)

### First Iteration

In [None]:
first_step = step_gradient(my_intercept, my_slope, x, y, learning_rate=0.1)

In [None]:
first_step

In [None]:
fig = plt.figure(figsize = (12, 8))
plt.scatter(x, y, label = 'Raw Data')
plt.plot(x, regression_formula(x, first_step[1], first_step[0]), 
         color = 'k', label = 'Regression')
plt.legend()
plt.show()

In [None]:
error(my_intercept, my_slope, x, y)

In [None]:
error(first_step[0], first_step[1], x, y)

In [None]:
first_step = step_gradient(my_intercept, my_slope, x, y, learning_rate=0.00000001)

error(first_step[0], first_step[1], x, y)

In [None]:
fig = plt.figure(figsize = (12, 8))
plt.scatter(x, y, label = 'Raw Data')
plt.plot(x, regression_formula(x, first_step[1], first_step[0]), 
         color = 'k', label = 'Regression')
plt.legend()
plt.text(500, 650000, 'Iteration Number 1')
plt.text(500, 600000, f'Error {round(error(first_step[0], first_step[1], x, y), 0)}')
plt.show()

### Second Iteration

In [None]:
second_step = step_gradient(first_step[0], first_step[1], x, y, learning_rate=0.00000001)

In [None]:
fig = plt.figure(figsize = (12, 8))
plt.scatter(x, y, label = 'Raw Data')
plt.plot(x, regression_formula(x, second_step[1], second_step[0]), 
         color = 'k', label = 'Regression')
plt.legend()
plt.text(500, 650000, 'Iteration Number 2')
plt.text(500, 600000, f'Error {round(error(second_step[0], second_step[1], x, y), 0)}')
plt.show()

### Third Iteration

In [None]:
third_step = step_gradient(second_step[0], second_step[1], x, y, learning_rate=0.00000001)

In [None]:
fig = plt.figure(figsize = (12, 8))
plt.scatter(x, y, label = 'Raw Data')
plt.plot(x, regression_formula(x, third_step[1], third_step[0]), 
         color = 'k', label = 'Regression')
plt.legend()
plt.text(500, 650000, 'Iteration Number 3')
plt.text(500, 600000, f'Error {round(error(third_step[0], third_step[1], x, y), 0)}')
plt.show()

## Multiple iterations

In [None]:
errs = []
b = 0
m = 0

for i in range(100):
    err = error(b, m, x, y)
    errs.append(err)
    b, m = step_gradient(b, m, x, y, 0.00000001)

In [None]:
plt.plot(range(100), errs)
plt.show()

Looks like we didn't have to do the full 100 since it doesn't do much better. We can do **early stopping** when the error doesn't change too much

In [None]:
errs = []
b = 0
m = 0

for i in range(100):
    err = error(b, m, x, y)
    errs.append(err)
    b, m = step_gradient(b, m, x, y, 0.00000001)
    # Stop after errors don't change too much
    if len(errs) > 1:
        if (errs[-2] - errs[-1]) / errs[-2] < 0.01:
            print (i)
            break

In [None]:
plt.plot(range(len(errs)), errs)
plt.show()

In [None]:
errs = []
b = 0
m = 0

for i in range(100):
    err = error(b, m, x, y)
    errs.append(err)
    if (i > 1) & (i % 10 == 0):
        fig = plt.figure(figsize = (12, 8))
        ax1 = plt.subplot(1, 2, 1)
        ax1.scatter(x, y, label = 'Raw Data')
        ax1.plot(x, regression_formula(x, m, b), 
                 color = 'k', label = 'Regression')
        plt.legend()
        ax1.set_title(f'Iteration Number {i}')
        ax2 = plt.subplot(1, 2, 2)
        ax2.plot(range(i + 1), errs)
        ax2.set_title(f'Error {round(error(m, b, x, y), 0)}')
        plt.show()
    b, m = step_gradient(b, m, x, y, 0.00000001)

# Using Scikit-Learn

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
linreg = LinearRegression()

In [None]:
linreg.fit(x.values.reshape(-1, 1), y.values.reshape(-1, 1))

In [None]:
linreg.coef_[0][0]

In [None]:
linreg.intercept_[0]

In [None]:
m, b

In [None]:
fig = plt.figure(figsize = (12, 8))
ax1 = plt.subplot(1, 2, 1)
ax2 = plt.subplot(1, 2, 2)

ax1.scatter(x, y, label = 'Raw Data')
ax1.plot(x, regression_formula(x, m, b), 
         color = 'k', label = 'Regression')
ax1.set_title('Gradient Descent')
ax2.scatter(x, y, label = 'Raw Data')
ax2.plot(x, regression_formula(x, linreg.coef_[0][0], linreg.intercept_[0]), 
         color = 'k', label = 'Regression')
ax2.set_title('Sklearn')

In [None]:
b

In [None]:
np.sqrt(error(m, b, x, y))

In [None]:
np.sqrt(error(linreg.coef_[0][0], linreg.intercept_[0], x, y))

In [None]:
error(m, b, x, y)