# SLU9 - Regression: Exercise notebook

In this notebook you will practice the following:
     - Simple Linear Regression
     - Gradient Descent
     - Multiple Linear Regression
     - Using scikit learn linear regression implementations

In [None]:
# This cell creates the data and parameters that 
# you can use to test your implementations.

import numpy as np
import pandas as pd

np.random.seed(59)

data = pd.read_csv('data/boston (scaled).csv')

x = data.drop('MEDV', axis=1)
y = data['MEDV']

betas = pd.Series(np.random.rand(x.shape[1] + 1))
b0 = 1
b1 = 1
learning_rate = 0.3
epochs = 100

## 1. Simple Linear Regression & Gradient Descent

#### 1.1 Simple Linear Regression formula

$$\hat{y} = \beta_0 + \beta_1 x$$

where $\hat{y}$ are the predictions, $\beta_0$ is the intercept, $\beta_1$ is the coefficient and $x$ is the input.

In [None]:
def simple_linear_regression(x, b0, b1):
    """
    Args:
        x : pandas.Series with shape (num_observations, 1)
            The input data to be used in y_hat.
        b0 : float
            The intercept in y_hat.
        b1 : float
            The coefficient in y_hat.
    
    Returns:
        y_hat : numpy.array with shape
            The prediction made by the simple linear regression.
    """
    # y_hat = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    
    return y_hat

In [None]:
b0 = 1
b1 = 1

simple_linear_regression(x['INDUS'].head(10), b0, b1)

Expected output:
```
0   -0.287909
1    0.406619
2    0.406619
3   -0.306878
4   -0.306878
5   -0.306878
6    0.523346
7    0.523346
8    0.523346
9    0.523346
Name: INDUS, dtype: float64
```

In [None]:
np.testing.assert_array_almost_equal(
    simple_linear_regression(np.arange(100, 110), -12, 30), 
    np.array([2988, 3018, 3048, 3078, 3108, 3138, 3168, 3198, 3228, 3258])
)

np.testing.assert_array_almost_equal(
    simple_linear_regression(np.arange(100, 110), 0, 1), 
    np.array([100, 101, 102, 103, 104, 105, 106, 107, 108, 109])
)

np.testing.assert_array_almost_equal(
    simple_linear_regression(np.arange(-10, 0), 0, -1.1), 
    np.array([11. ,  9.9,  8.8,  7.7,  6.6,  5.5,  4.4,  3.3,  2.2,  1.1])
)

#### 1.2 Simple Linear Regression cost function

$$J = \frac{1}{N} \sum_{n=1}^N (y_n - \hat{y}_n)^2 = \sum_{n=1}^N (y_n - (\beta_0 + \beta_1 x_n))^2$$

In [None]:
def linear_regression_cost_function(y, y_hat):
    """
    Args: 
        y : pandas.Series
            The targets.
        y_hat : pandas.Series
            The predictions made by linear regression.
    
    Returns:
        cost : float
    """
    # Perform the difference
    # e = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    
    # Now, square the difference
    # s = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    
    # Finally, take the mean.
    # m = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    
    return m

In [None]:
b0 = 1
b1 = 1

y_hat = simple_linear_regression(x['INDUS'], b0, b1)

linear_regression_cost_function(y, y_hat)

Expected output:
```
557.9702490579907
```

In [None]:
import math

np.random.seed(109)

a = np.random.rand(5)
b = np.random.rand(5)

assert math.isclose(
    linear_regression_cost_function(a, b), 
    0.04809326223470955)


np.random.seed(1)

a = np.random.rand(5)
b = np.random.rand(5)

assert math.isclose(
    linear_regression_cost_function(a, b), 
    0.13452136234584286)

#### 1.3 Simple Linear Regression cost function partial derivatives

$$\frac{\partial J}{\partial b_0} = - \frac{1}{N} \sum_{n=1}^N 2(y_n - \hat{y}_n) $$

$$\frac{\partial J}{\partial b_1} = - \frac{1}{N} \sum_{n=1}^N 2(y_n - \hat{y}_n)x_n $$

In [None]:
def simple_linear_regression_partial_derivatives(x, y, b0, b1):
    """
    Args:
        x : pandas.Series with shape (num_observations, 1)
            The input data to be used in y_hat. 
        y : pandas.Series with shape (num_observations, 1)
            The targets.
        b0 : float
            The intercept in y_hat.
        b1 : float
            The coefficient in y_hat.
    
    Returns:
        dJ_db0 : float
            Partial derivative of J in order to b0.
        dJ_db1 : floats
            Partial derivative of J in order to b1.
    """
    # Get the predictions.
    # y_hat = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    
    # Compute the difference between the targets and 
    # the predictions.
    # y_dif = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    
    # Perform the mean as in the formula.
    # dJ_db0 = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    
    # Same thing as 'dJ_db0' but this time you must 
    # account for the input 'x'.
    # dJ_db1 = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    
    return dJ_db0, dJ_db1

In [None]:
b0 = 1
b1 = 1

simple_linear_regression_partial_derivatives(x['INDUS'], y, b0, b1)

Expected output:
```
(-43.065612648221354, 10.888944710164607)
```

In [None]:
import math

dJ_db0, dJ_db1 = simple_linear_regression_partial_derivatives(x['AGE'], y, b0, b1)
assert math.isclose(dJ_db0, -43.065612648221354)
assert math.isclose(dJ_db1, 8.92692579061515)

dJ_db0, dJ_db1 = simple_linear_regression_partial_derivatives(x['INDUS'], y, b0, b1)
assert math.isclose(dJ_db0, -43.065612648221354)
assert math.isclose(dJ_db1, 10.888944710164607)

dJ_db0, dJ_db1 = simple_linear_regression_partial_derivatives(x['TAX'], y, b0, b1)
assert math.isclose(dJ_db0, -43.06561264822135)
assert math.isclose(dJ_db1, 10.60982713399667)

dJ_db0, dJ_db1 = simple_linear_regression_partial_derivatives(x['PTRATIO'], y, b0, b1)
assert math.isclose(dJ_db0, -43.06561264822137)
assert math.isclose(dJ_db1, 11.331099858515751)

#### 1.4 Adjusting Simple Linear Regression $\beta_0$ and $\beta_1$ parameters with batch gradient descent

1. _For epoch in 1...epochs:_
    1. $\beta_0 = \beta_0 - \alpha \frac{\partial J}{\partial \beta_0} = \beta_0 + \alpha \frac{1}{N} \sum_{n=1}^N 2 (y - \hat{y})$ 
    2. _For i in 1..K:_
        1. $\beta_i = \beta_i - \alpha \frac{\partial J}{\partial \beta_i} = \beta_i + \alpha \frac{1}{N} \sum_{n=1}^N 2 (y - \hat{y}) x_{i_n}$ 
        
It might look too complicated when you look at it the first time. But if you use one of the functions that you already implemented in this notebook, you will only need to fill 3 lines of code.

In [None]:
def simple_linear_regression_batch_gradient_descent(x, y, b0, b1, learning_rate, epochs): 
    """
    Args:
        x : pandas.Series
            The input features
        y : pandas.Series
            The target
        b0 : float
            The intercept
        b1 : float
            The coefficient
        learning_rate : float
            Learning rate
        epochs : integer
            Number of iterations to repeat the produce of 
            aggregation & adaptation of parameters.
    
    Returns:
        b0 : float
            The updated b0.
        b1 : float
            The updated b1.
    """
    # For a number of epochs...
    for epoch in range(epochs):
        # Get the partial derivatives for b0 and b1.
        # dJ_db0, dJ_db1 = ...
        # YOUR CODE HERE
        raise NotImplementedError()
        
        # Update b0 using the gradient descent update rule.
        # b0 = ...
        # YOUR CODE HERE
        raise NotImplementedError()
        
        # Update b1 using the gradient descent update rule.
        # b1 = ...
        # YOUR CODE HERE
        raise NotImplementedError()
        
    return b0, b1

In [None]:
b0 = 1
b1 = 1
learning_rate = 0.3
epochs = 100

simple_linear_regression_batch_gradient_descent(x['INDUS'], y, 
                                                b0, b1, learning_rate, 
                                                epochs)

Expected output:
```
(22.532806324110656, -4.444472355082235)
```

In [None]:
import math

b0, b1 = simple_linear_regression_batch_gradient_descent(x['AGE'], y, 1, 1, 0.1, 2)
assert math.isclose(b0, 8.751810276679842)
assert math.isclose(b1, -0.606846642310726)

b0, b1 = simple_linear_regression_batch_gradient_descent(x['AGE'], y, 1, 1, 0.2, 2)
assert math.isclose(b0, 14.78099604743083)
assert math.isclose(b1, -1.8566162529968449)

b0, b1 = simple_linear_regression_batch_gradient_descent(x['AGE'], y, 1, -2, 0.1, 10)
assert math.isclose(b0, 20.220738850281734)
assert math.isclose(b1, -3.3063247634511757)

b0, b1 = simple_linear_regression_batch_gradient_descent(x['AGE'], y, 10, 1, 0.1, 100)
assert math.isclose(b0, 22.53280632155769)
assert math.isclose(b1, -3.4634628943983317)

## 2. Multiple Linear Regression

#### 2.1 Multiple Linear Regression formula

$$\hat{y} = \beta_0 + \sum_{i=1}^K \beta_i$$

you won't need to implement this one since (1) you already have too many things to do in this notebook and (2) we used `numpy.dot` method to implement `multiple_linear_regression`. Sicne we have no learning units for numpy, this will serve as a small intro to numpy matrix operations. Also, you can use this function in the remaining exercises. :)

In [None]:
def multiple_linear_regression(x, betas):
    """
    Args:
        x : pandas.DataFrame with shape (num_observations, num_features)
            The input features.
        betas : pandas.Series with shape (num_features + 1,)
            The intercept is in betas[0].
            The remaining indexes are for the coefficients.
    
    Returns:
        y_hat : pandas.Series with shape (num_observations,)
            The prediction made by the simple linear regression.
    """
    # betas is a pandas.Series with shape (num_features+1,) 
    # and you need to have an numpy array with shape 
    # (num_features+1, 1). Use the numpy.reshape function 
    # to do that. Don't forget that you need to extract 
    # the numpy array from betas.
    betas = betas.values.reshape((betas.shape[0], 1))
    
    # Extract the numpy array of x.
    x = x.values
    
    # Perform the dot product between x and betas. 
    # Remember that the first index of betas contains 
    # the intercept.
    dot_product = x.dot(betas[1:])
    
    # Sum between the intercept and 
    # the result of numpy dot product.
    y_hat = betas[0] + dot_product
    
    # y_hat is, at this point, a numpy array with shape
    # (num_observations,1) but we need to return a 
    # pandas Series. To do that, first, you need to turn 
    # y_hat into a numpy array with shape (num_observations,). 
    # Then, just create a pandas Series with it.
    y_hat = y_hat[:, 0]
    
    return y_hat

In [None]:
np.random.seed(59)
betas = pd.Series(np.random.rand(x.shape[1] + 1))

multiple_linear_regression(x, betas)[:10]

#### 2.2 Multiple Linear Regression partial derivatives

$$\frac{\partial J}{\partial b_0} = - \frac{1}{N} \sum_{n=1}^N 2(y_n - \hat{y}_n) $$

$$\frac{\partial J}{\partial b_1} = - \frac{1}{N} \sum_{n=1}^N 2(y_n - \hat{y}_n)x_{1_n} $$

$$...$$

$$\frac{\partial J}{\partial b_K} = - \frac{1}{N} \sum_{n=1}^N 2(y_n - \hat{y}_n)x_{K_n} $$

In [None]:
def multiple_linear_regression_partial_derivatives(x, y, betas):
    """
    Args:
        x : pandas.DataFrame with shape (num_observations, num_features)
            
        y : pandas.Series with shape (num_observations,)
            
        betas : pandas.Series with shape (num_features,)
            
    Returns:
        dJ_dbetas : pandas.Series shape (num_features + 1,)
            
    """
    # Get the predictions.
    # y_hat = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    
    # Compute the difference between the targets and 
    # the predictions.
    # y_dif = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    
    # Initialize the numpy array of partial 
    # derivatives dJ_dbetas
    dJ_dbetas = np.zeros((x.shape[1] + 1, 1))
    
    # Compute the partial derivative for b0.
    # dJ_dbetas[0] = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    
    # Extract the partial derivatives of 
    # the remaining betas by iterating 
    # through x columns. Do not forget 
    # that you already computed the 
    # difference between y and y_hat.
    for k, col in enumerate(x.columns): 
        # dJ_dbetas[k+1] = ...
        # YOUR CODE HERE
        raise NotImplementedError()
    
    # Return dJ_dbetas as a pandas Series.
    return pd.Series(dJ_dbetas[:, 0])

In [None]:
np.random.seed(59)
betas = pd.Series(np.random.rand(x.shape[1] + 1))

dJ_dbetas = multiple_linear_regression_partial_derivatives(x, y, betas)
dJ_dbetas

Expected output: 
```
0    -43.217542
1      9.291666
2     -6.925295
3     11.225624
4     -2.165538
5     10.230802
6    -13.486925
7      9.110905
8     -6.241895
9     10.198382
10    11.840369
11    10.982597
12    -7.077173
13    15.731230
dtype: float64
```

In [None]:
np.random.seed(10)
betas = pd.Series(np.random.rand(x.shape[1] + 1))
dJ_dbetas = multiple_linear_regression_partial_derivatives(x, y, betas)
np.testing.assert_array_almost_equal(
    dJ_dbetas.values, 
    np.array([
        -43.52297136,   9.84995035,  -8.61164594,  13.15861134,
        -2.3471559 ,  11.61824741, -14.66608858,  10.66169585,
        -7.75951691,  11.0529071 ,  13.01747345,  12.5651279 ,
        -8.23394387,  17.23850363
    ]))

np.random.seed(31)
betas = pd.Series(np.random.rand(x.shape[1] + 1))
dJ_dbetas = multiple_linear_regression_partial_derivatives(x, y, betas)
np.testing.assert_array_almost_equal(
    dJ_dbetas.values, 
    np.array([
        -44.493505  ,  10.06408594,  -6.90398047,  11.43074961,
        -2.75243775,  10.03880847, -12.62970789,   8.75624782,
        -6.36955005,  10.10041933,  11.68181673,  10.82887913,
        -6.80454959,  15.29274361
    ]))

#### 2.3 Adjusting Multiple Linear Regression $\beta_i, 0 \leq i \leq K$  parameters with batch gradient descent

It is almost the same thing as BGD for the simple linear regression. The difference is that we include all gradients within `dJ_dbetas` instead of having variable for each coefficient and intercept, as we had in `simple_linear_regression_batch_gradient_descent`.

In [None]:
def multiple_linear_regression_batch_gradient_descent(x, y, betas, learning_rate, epochs): 
    # Let's create a copy of betas in order to avoid modifying 
    # the original pandas series.
    betas = betas.copy()
    
    for epoch in range(epochs): 
        # Get the partial derivatives of the cost function 
        # in relation to betas
        # dJ_dbetas = ...
        # YOUR CODE HERE
        raise NotImplementedError()
        
        # Change betas with the partial derivatives 
        # using the update rule.
        # betas = ...
        # YOUR CODE HERE
        raise NotImplementedError()
    
    return betas

In [None]:
np.random.seed(59)
betas = pd.Series(np.random.rand(x.shape[1] + 1))
learning_rate = 0.1
epochs = 100

multiple_linear_regression_batch_gradient_descent(x, y, betas, learning_rate, epochs)

Expected output:
```
0     22.532806
1     -0.865834
2      0.986377
3     -0.091423
4      0.715177
5     -1.943218
6      2.732713
7     -0.026260
8     -3.057927
9      2.051976
10    -1.417653
11    -2.025991
12     0.856775
13    -3.720943
dtype: float64
```

In [None]:
np.random.seed(2)
betas = pd.Series(np.random.rand(x.shape[1] + 1))
learning_rate = 0.1
epochs = 10

betas_ = multiple_linear_regression_batch_gradient_descent(x, y, betas, learning_rate, epochs)
np.testing.assert_array_almost_equal(
    betas_.values, 
    np.array([
        20.16017926, -0.6730492 ,  0.6389492 , -0.39083784,  0.94646059,
        -0.45076874,  3.38156555,  0.07365744, -1.11285391,  0.24015042,
        -0.2833516 , -1.56644959,  0.88648266, -2.82315201]), 
    decimal=4)



np.random.seed(3812)
betas = pd.Series(np.random.rand(x.shape[1] + 1))
learning_rate = 0.1
epochs = 1

betas_ = multiple_linear_regression_batch_gradient_descent(x, y, betas, learning_rate, epochs)
np.testing.assert_array_almost_equal(
    betas_.values, 
    np.array([
        4.59634601, -0.94864253,  1.74053584, -0.80294128,  0.5242436 ,
        -0.4184868 ,  2.25130406, -0.14264096,  1.04598222, -0.76944881,
        -0.85169055, -0.61287616,  0.90117628, -0.83743241]), 
    decimal=4)

## 3. Using scikit learn linear regression implementations

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
def get_sklearn_close_form_linear_regression_details(x, y):
    """
    Fit a scikit linear regression using the closed 
    form method. Return coefficients (original and 
    normalized), intercept and R² score.
    
    Args: 
        x : pandas.DataFrame with shape (num_observations, num_features)
        
        y : pandas.Series with shape (num_observations)
        
    Return:
        coefs : numpy array with shape (num_features,)
        
        normalized_coefs : numpy array with shape (num_features,)
        
        intercept : numpy array with shape (1,)
        
        score : float
    """
    # Create the class instance.
    # YOUR CODE HERE
    raise NotImplementedError()
    
    # Fit the regressor.
    # lr...
    # YOUR CODE HERE
    raise NotImplementedError()
    
    # Extract the coefficients and intercept.
    # YOUR CODE HERE
    raise NotImplementedError()
    
    # Compute the R² score.
    # score = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    
    # Normalize coefficients. 
    # Don't forget to INCLUDE THE SIGN 
    # of each coefficient.
    # YOUR CODE HERE
    raise NotImplementedError()
    
    return coefs, normalized_coefs, intercept, score

In [None]:
coefs, normalized_coefs, intercept, score = get_sklearn_close_form_linear_regression_details(x, y)

print('Feature coefficients: ')
print(pd.Series(coefs, x.columns))
print('\n')

print('Normalized feature coefficients: ')
print(pd.Series(normalized_coefs, x.columns))
print('\n')

print('Intercept: {}'.format(intercept))
print('\n')

print('R² score: {}'.format(score))

Expected output: 
```
Feature coefficients: 
CRIM      -0.920411
ZN         1.080981
INDUS      0.142967
CHAS       0.682203
NOX       -2.060092
RM         2.670641
AGE        0.021121
DIS       -3.104448
RAD        2.658787
TAX       -2.075898
PTRATIO   -2.062156
B          0.856640
LSTAT     -3.748680
dtype: float64


Normalized feature coefficients: 
CRIM      -0.041676
ZN         0.048946
INDUS      0.006473
CHAS       0.030890
NOX       -0.093280
RM         0.120925
AGE        0.000956
DIS       -0.140568
RAD        0.120389
TAX       -0.093996
PTRATIO   -0.093373
B          0.038788
LSTAT     -0.169739
dtype: float64


Intercept: 22.532806324110684


R² score: 0.7406077428649427
```

In [None]:
import math

from sklearn.datasets import load_diabetes

data_ = load_diabetes()

x_ = pd.DataFrame(data_['data'])
y_ = pd.Series(data_['target'])

_c, _n, _i, _s = get_sklearn_close_form_linear_regression_details(x_, y_)

np.testing.assert_array_almost_equal(
    _c,
    np.array([
        -10.01219782, -239.81908937,  519.83978679,  324.39042769, 
        -792.18416163,  476.74583782,  101.04457032,  177.06417623, 
        751.27932109,   67.62538639]))

np.testing.assert_array_almost_equal(
    _n,
    np.array([
        -0.00289369, -0.06931178,  0.1502425 ,  0.09375432, -0.22895463,
        0.13778762,  0.02920359,  0.05117454,  0.21713244,  0.01954488]))

assert math.isclose(_i, 152.1334841628965)
assert math.isclose(_s, 0.5177494254132934)