# SLU9 - Regression: Exercise notebook

In [None]:
import pandas as pd
import numpy as np

In this notebook you will practice the following:
* Gradient Descent
* Simple Linear Regression
* Multiple Linear Regression
* Using scikit learn linear regression implementations

## Simple Linear Regression & Gradient Descent

#### Simple Linear Regression formula

$$\hat{y} = \beta_0 + \beta_1 x$$

In [None]:
def simple_linear_regression_output(x, b0, b1):
    """
    Args:
        x : pandas.Series with shape (num_observations, 1)
            The input data to be used in y_hat.
        b0 : float
            The intercept in y_hat.
        b1 : float
            The coefficient in y_hat.
    
    Returns:
        y_hat : numpy.array with shape
            The prediction made by the simple linear regression.
    """
    y_hat = b0 + b1 * x
    
    return y_hat

#### Simple Linear Regression cost function

$$J = \frac{1}{N} \sum_{n=1}^N (y_n - \hat{y}_n)^2 = \sum_{n=1}^N (y_n - (\beta_0 + \beta_1 x_n))^2$$

In [None]:
def simple_linear_regression_cost_function(y, y_hat):
    """
    Args: 
        y : pandas.Series
            The targets.
        y_hat : pandas.Series
            The predictions made by a simple linear regression.
    
    Returns:
        cost : pandas.Series
    """
    #1) Perform the difference
    e = (y - y_hat)
    
    #2) Now, square the difference
    s = e ** 2
    
    #3) Finally, take the mean.
    m = s.mean()
    
    return m

$$
\frac{\partial J}{\partial b_0} = 
\sum_{n=1}^N \frac{\partial J}{\partial \hat{y}_n} \frac{\partial \hat{y}_n}{\partial b_0} = 
-\frac{1}{N} \sum_{n=1}^N 2(y - \hat{y}_n) $$

$$
\frac{\partial J}{\partial b_1} = 
\sum_{n=1}^N \frac{\partial J}{\partial \hat{y}_n} \frac{\partial \hat{y}_n}{\partial b_1} = 
-\frac{1}{N} \sum_{n=1}^N 2(y - \hat{y}_n) x_n $$

In [None]:
dJ_b0 = -(2 * (y - y_hat)).mean()

In [None]:
dJ_b1 = -(2 * (y - y_hat) * x).mean()

#### Simple Linear Regression cost function partial derivatives

$$\frac{\partial J}{\partial b_0} = - \frac{1}{N} \sum_{n=1}^N 2(y_n - \hat{y}_n) $$

$$\frac{\partial J}{\partial b_1} = - \frac{1}{N} \sum_{n=1}^N 2(y_n - \hat{y}_n)x_n $$

In [None]:
def simple_linear_regression_gradient(x, y, b0, b1):
    """
    Args:
        x : pandas.Series with shape (num_observations, 1)
            The input data to be used in y_hat. 
        y : pandas.Series with shape (num_observations, 1)
            The targets.
        b0 : float
            The intercept in y_hat.
        b1 : float
            The coefficient in y_hat.
    
    Returns:
        dJ_db0 : float
            Partial derivative of J in order to b0.
        dJ_db1 : floats
            Partial derivative of J in order to b1.
    """
    # 1) Get the predictions.
    y_hat = simple_linear_regression_output(x, b0, b1)
    
    # 2) Compute the difference between the targets and 
    #    the predictions.
    y_dif = y - y_hat
    
    # 3) Perform the mean as in the formula.
    dJ_db0 = -(2 * y_dif).mean()
    
    # 4) Same thing as 'dJ_db0' but this time you must 
    #    account for the input 'x'.
    dJ_db1 = -((2 * y_dif) * x).mean()
    
    return dJ_db0, dJ_db1

#### Adjusting Simple Linear Regression $\beta_0$ and $\beta_1$ parameters with gradient descent

1. _For epoch in 1...epochs:_
    1. $d_y = (y - \hat{y})$
    2. $\beta_0 = \beta_0 - \alpha \frac{\partial J}{\partial \beta_0} = \beta_0 + \alpha \frac{1}{N} \sum_{n=1}^N 2 d_y$ 
    3. _For i in 1..K:_
        1. $\beta_i = \beta_i - \alpha \frac{\partial J}{\partial \beta_i} = \beta_i + \alpha \frac{1}{N} \sum_{n=1}^N 2 d_y x_{i_n}$ 

In [None]:
def simple_linear_regression_gradient_descent(x, y, b0, b1, learning_rate, epochs): 
    """
    Args:
        x : pandas.Series
            TODO
        y : pandas.Series
            TODO
        b0 : float
            TODO
        b1 : float
            TODO
        learning_rate : float
            TODO
        epochs : integer
            TODO
    
    Returns
        b0 : float
            TODO
        b1 : float
            TODO
    """
    # 1) For a number of epochs:
    for epoch in range(epochs):
        # 1.1) Get the gradients
        dJ_db0, dJ_db1 = simple_linear_regression_gradient(x, y, b0, b1)
        
        # 1.2) Change b0
        b0 = b0 - learning_rate * dJ_db0
        
        # 1.3) Change b1
        b1 = b1 - learning_rate * dJ_db1
        
    return b0, b1

#### Adjusting Simple Linear Regression $\beta_0$ and $\beta_1$ parameters with stochastic gradient descent

1. _For epoch in 1...epochs:_
    1. _X' = shuffle(X)_
    2. _For each $x_n$ in $X'$_:
        1. $b_0 = b_0 - \alpha \frac{\partial SE}{\partial b_0} = b_0 + 2 \alpha (y - \hat{y})$
        2. $b_1 = b_1 - \alpha \frac{\partial SE}{\partial b_1} = b_1 + 2 \alpha (y - \hat{y})x_n$

In [None]:
from sklearn.utils import check_random_state

def simple_linear_regression_stochastic_gradient_descent(x, y, b0, b1, learning_rate, epochs, random_state): 
    """
    """
    # TODO
    # ) Create a random numbers generator using TODO
    random_state = check_random_state(random_state)
    
    # ) TODO
    data = pd.concat(
        (x.to_frame(), y.to_frame()), 
        axis=1)
    data.columns = ['x', 'y']
    
    for epoch in range(epochs):
        # Get a shuffled version of x
        data_ = data.sample(n=data.shape[0], random_state=random_state)
        x_ = data['x']
        y_ = data['y']
        for n in range(x.shape[0]): 
            dJ_db0, dJ_db1 = simple_linear_regression_gradient(x_.iloc[[n]], y_.iloc[[n]], b0, b1)
            
            b0 = b0 - learning_rate * dJ_db0

            b1 = b1 - learning_rate * dJ_db1
    
    return b0, b1

## Multiple Linear Regression

#### Multiple Linear Regression formula

$$\hat{y} = \beta_0 + \sum_{i=1}^K \beta_i$$

In [None]:
def multiple_linear_regression_output(x, betas):
    """
    Args:
        x : pandas.DataFrame with shape (num_observations, num_features)
            TODO
        betas : pandas.Series with shape (num_features, 1)
            TODO
    
    Returns:
        y_hat : numpy.array with shape
            The prediction made by the simple linear regression.
    """
    # 1) TODO
    betas = betas.values.reshape((betas.shape[0], 1))
    
    # 2) TODO: TA ERRADO
    dot_product = x.dot(betas)
    
    # 3) TODO
    y_hat = betas[0] + dot_product
    
    # 3) TODO
    y_hat = y_hat[0]
    
    return y_hat

#### Multiple Linear Regression cost function

$$J = \frac{1}{N} \sum_{n=1}^N (y_n - \hat{y}_n)^2 = \sum_{n=1}^N (y_n - (\beta_0 + \sum_{i=1}^K \beta_i))^2$$

In [None]:
def multiple_linear_regression_cost_function(x, y, betas):
    """
    """
    

#### Multiple Linear Regression partial derivatives

$$\frac{\partial J}{\partial b_0} = - \frac{1}{N} \sum_{n=1}^N 2(y_n - \hat{y}_n) $$

$$\frac{\partial J}{\partial b_1} = - \frac{1}{N} \sum_{n=1}^N 2(y_n - \hat{y}_n)x_{1_n} $$

$$...$$

$$\frac{\partial J}{\partial b_K} = - \frac{1}{N} \sum_{n=1}^N 2(y_n - \hat{y}_n)x_{K_n} $$

In [None]:
def multiple_linear_regression_gradient(x, y, betas):
    """
    Args:
        x : pandas.DataFrame with shape (num_observations, num_features)
            TODO
        y : pandas.Series with shape (num_observations,)
            TODO
        betas : pandas.Series with shape (num_features,)
            TODO
    
    Returns:
        dMSE_dbetas : pandas.Series shape (num_features + 1,)
            TODO
    """
    # TODO
    # 1) Get the predictions.
    y_hat = multiple_linear_regression_output(x, betas)
    
    # 2) Compute the difference between the targets and 
    #    the predictions.
    y_dif = y - y_hat
    
    dMSE_dbetas = np.zeros((x.shape[1], 1))
    print(dMSE_dbetas.shape)
    
    # 3) TODO
    dMSE_dbetas[0] = -(2 * y_dif).mean()
    
    # 4) TODO
    for k, col in enumerate(x.columns): 
        dMSE_dbetas[k] = -((2 * y_dif) * x[col]).mean()
    
    return pd.Series(dMSE_dbetas[:, 0])

In [None]:
betas = pd.Series(np.random.rand(x.shape[1],))

xx = pd.DataFrame(scaler.fit_transform(x), columns=x.columns)
multiple_linear_regression_gradient(xx, y, betas)

#### Adjusting Multiple Linear Regression $\beta_i, 0 \leq i \leq K$  parameters with gradient descent

#### Adjusting Multiple Linear Regression $\beta_i, 0 \leq i \leq K$  parameters with stochastic gradient descent

## Using scikit learn linear regression implementations

In [19]:
from sklearn.linear_model import LinearRegression, SGDRegressor

In [1]:
def get_sklearn_stochastic_gradient_linear_regression_details(x, y, learning_rate, epochs, random_state): 
    """
    Args: 
        x : pandas.DataFrame with shape (num_observations, num_features)
        
        y : pandas.Series with shape (num_observations)
        
        learning_rate : float
        
        epochs : integer
        
        random_state : None, integer or numpy.random.RandomState
        
    Return:
        coefs : numpy array with shape (num_features,)
        
        intercept : numpy array with shape (1,)
        
        score : float
    """
    # 1) Create the class instance.
    raise NotImplementedError()
    
    # 2) Fit the regressor.
    raise NotImplementedError()
    
    # 3) Extract the coefficients and intercept.
    raise NotImplementedError()
    
    # 4) Compute the R² score.
    raise NotImplementedError()
    
    # 5) Normalize coefficients.
    raise NotImplementedError()
    
    return coefs, normalized_coefs, intercept, score

In [2]:
def get_sklearn_close_form_linear_regression_details(x, y):
    """
    Args: 
        x : pandas.DataFrame with shape (num_observations, num_features)
        
        y : pandas.Series with shape (num_observations)
        
    Return:
        coefs : numpy array with shape (num_features,)
        
        intercept : numpy array with shape (1,)
        
        score : float
    """
    # 1) Create the class instance.
    raise NotImplementedError()
    
    # 2) Fit the regressor.
    raise NotImplementedError()
    
    # 3) Extract the coefficients and intercept.
    raise NotImplementedError()
    
    # 4) Compute the R² score.
    raise NotImplementedError()
    
    # 5) Normalize coefficients.
    raise NotImplementedError()
    
    return coefs, normalized_coefs, intercept, score