# SLU12 - Validation metrics for regression: Exercise Notebook

In this notebook, you will implement:
    - Mean Absolute Error (MAE)
    - Mean Squared Error (MSE)
    - Root Mean Squared Error (RMSE)
    - Coefficient of Determination (R²)
    - Adjusted R²
    - Regularized Linear Regression loss
    - (BONUS) Partial derivatives for the Regularized Linear Regression loss

In [49]:
# This cell creates the data and parameters that 
# you can use to test your implementations.

import numpy as np
import pandas as pd
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression

np.random.seed(60)

data = load_boston()

x = pd.DataFrame(data['data'], columns=data['feature_names'])
y = pd.Series(data['target'])

lr = LinearRegression()
lr.fit(x, y)

y_hat = lr.predict(x)

betas = pd.Series([lr.intercept_] + list(lr.coef_))

## Mean Absolute Error

$$MAE = \frac{1}{N} \sum_{n=1}^N \left| y_n - \hat{y}_n \right|$$

In [50]:
def mean_absolute_error(y, y_hat): 
    """
    Args: 
        y : pandas.Series with shape (num_observations,)
            The targets
        
        y_hat : pandas.Series with shape (num_observations,)
            The predictions
        
    Returns: 
        mae : pandas.Series with shape (num_observations,)
    """
    # 1) Compute the difference.
    """
    e = ...
    """
    raise NotImplemented()
    
    # 2) Compute the absolute value of the difference.
    """
    a = ...
    """
    raise NotImplemented()
    
    # 3) Compute the mean of the absolute value of the difference.
    """
    mae = ...
    """
    raise NotImplemented()
    
    return mae

## Mean Squared Error

$$MSE = \frac{1}{N} \sum_{n=1}^N (y_n - \hat{y}_n)^2$$

In [10]:
def mean_squared_error(y, y_hat):
    """
    Args: 
        y : pandas.Series with shape (num_observations,)
            The targets
        
        y_hat : pandas.Series with shape (num_observations,)
            The predictions
        
    Returns: 
        mse : pandas.Series with shape (num_observations,)
    """
    # 1) Compute the difference.
    """
    e = ...
    """
    raise NotImplemented()
    
    # 2) Compute the squares of the difference
    """
    s = ...
    """
    raise NotImplemented()
    
    # 3) Compute the mean of the squares of the difference.
    """
    mse = ...
    """
    raise NotImplemented()
    
    return mse

## Root Mean Squared Error

$$RMSE = \sqrt{MSE}$$

In [11]:
def root_mean_squared_error(y, y_hat): 
    """
    Args: 
        y : pandas.Series with shape (num_observations,)
            The targets
        
        y_hat : pandas.Series with shape (num_observations,)
            The predictions
        
    Returns: 
        rmse : pandas.Series with shape (num_observations,)
    """
    # Compute the mean squared error.
    """
    mse = ...
    """
    raise NotImplemented()
    
    # Compute the root square.
    """
    rmse = ...
    """
    raise NotImplemented()
    
    return rmse

## R² score

$$\bar{y} = \frac{1}{N} \sum_{n=1}^N y_n$$

$$R² = 1 - \frac{MSE(y, \hat{y})}{MSE(y, \bar{y})} 
= 1 - \frac{\frac{1}{N} \sum_{n=1}^N (y_n - \hat{y}_n)^2}{\frac{1}{N} \sum_{n=1}^N (y_n - \bar{y})^2}
= 1 - \frac{\sum_{n=1}^N (y_n - \hat{y}_n)^2}{\sum_{n=1}^N (y_n - \bar{y})^2}$$

In [12]:
def r_squared(y, y_hat): 
    # Compute the mean squared error between 
    # the target and the predictions.
    """
    mse_top = ...
    """
    raise NotImplemented()
    
    # Compute the mean squared error between 
    # the target and the target mean.
    """
    mse_bottom = ...
    """
    raise NotImplemented()
    
    # Now, take both mean square errors
    # and finish the computation of R².
    """
    r2 = ...
    """
    raise NotImplemented()
    
    return r2

## Adjusted R² score

$$\bar{R}^2 = 1 - \frac{N - 1}{N - K - 1} (1 - R^2)$$

where $N$ is the number of observations in the dataset used for training the model (i.e. number of rows of the pandas dataframe) and $K$ is the number of features used by your model (i.e. number of columns of the pandas dataframe)

In [13]:
def adjusted_r_squared(y, y_hat, K):
    """
    Args: 
        y : pandas.Series with shape (num_observations,)
            
        y_hat : pandas.Series with shape (num_observations,)
            
        K : integer
            Number of features used in the model 
            that computed y_hat.
        
    Returns:
        adj_r2 : float
            The adjusted value of R².
    """
    # Compute R².
    """
    r2 = ...
    """
    raise NotImplemented()
    
    # Adjust R²
    """
    adj_r2 = ...
    """
    raise NotImplemented()
    
    return adj_r2

## Regularization

#### Compute Regularized Linear Regression loss

$$L_{L_1} = \lambda_1 \|\beta\|_1^1 = \lambda_1 \sum_{k=1}^K \left|\beta_k\right|$$

$$L_{L_2} = \lambda_2 \|\beta\|_2^2 = \lambda_2 \sum_{k=1}^K \beta_k^2$$

$$L = \frac{1}{N} \sum_{n=1}^N (y_n - \hat{y}_n)^2 + L_{L_1} + L_{L_2}$$

In [25]:
def regularized_linear_regression_loss(y, y_hat, betas, lamb1, lamb2):
    """
    Args: 
        y : pandas.Series with shape (num_observations,)
            The targets.
        y_hat : pandas.Series with shape (num_observations,)
            THe predictions.
        betas : pandas.Series with shape (num_features+1,)
            The parameters of your regression model. 
            The first value is the intercept and the 
            remaining ones are the feature coefficients.
        lamb1 : float
            The strength of the L1 regularizer.
        lamb2 : float
            The strength of the L2 regularizer.
            
    Returns:
        loss : float
    """
    # Compute the L1 part of 
    # the general loss function.
    """
    l1_loss = ...
    """
    raise NotImplemented()
    
    # Compute the L2 part of 
    # the general loss function.
    """
    l2_loss = ...
    """
    raise NotImplemented()
    
    # Compute the mean square loss part 
    # of the general loss function.
    """
    mse = ...
    """
    raise NotImplemented()
    
    # Compute the total loss by 
    # combining all 3 parts.
    """
    L = ...
    """
    raise NotImplemented()
    
    return L

#### (BONUS) Compute Regularized Linear Regression partial derivatives

$$\frac{\partial L}{\partial \beta_0} = - \sum_{n=1}^{N} 2 (y_n - \hat{y}_n)$$

$$\frac{\partial L}{\partial \beta_k}
= - \sum_{n=1}^{N} 2 (y_n - \hat{y}_n) x_{k_n} + \lambda_1 \frac{\beta_k}{\left|\beta_k\right|} + 2 \lambda_2 \beta_k 
= - \sum_{n=1}^{N} 2 (y_n - \hat{y}_n) x_{k_n} + \lambda_1 sign(\beta_k) + 2 \lambda_2 \beta_k $$

$$sign(\beta_k) = 
\begin{cases}
    +1,& \text{if } \beta_k > 0\\
    -1,& \text{if } \beta_k < 0\\
    0,& \text{if } \beta_k = 0
\end{cases}$$

In [87]:
def regularized_linear_regression_partial_derivatives(x, y, betas, lamb1, lamb2):
    """
    Args:
        x : pandas.DataFrame with shape (num_observations, num_features)
            The input features.
        y : pandas.Series with shape (num_observations,)
            The targets.
        betas : pandas.Series with shape (num_features+1,)
            The intercept at index 0.
            The coefficients in the remaining indexes.
        lamb1 : float
            The strength of the L1 regularizer.
        lamb2 : float
            The strength of the L2 regularizer.
    
    Returns: 
        dL_dbetas : pandas.Series with shape (num_features+1,)
    """
    # To make your life easier, extract 
    # the numpy array from x.
    """
    x = ...
    """
    raise NotImplemented()
    
    # Make predictions y_hat by 
    # using the dot product between 
    # x and betas. 
    """
    y_hat = ...
    """
    raise NotImplemented()
    
    # Compute the difference between the 
    # target and the predictions. 
    # Also, multiply it by 2.
    """
    dy = ...
    """
    raise NotImplemented()
    
    # Initialize (with zeros) the pandas 
    # Series that will store the partial 
    # derivatives for the betas.
    """
    dL_dbetas = ...
    """
    raise NotImplemented()
    
    # Calculate the partial derivative 
    # for beta_0.
    """
    dL_dbetas.loc[0] = ...
    """
    raise NotImplemented()
    
    # Calculate the partial derivatives 
    # for each beta_k, for k > 0.
    K = x.shape[1]
    for k in range(1, K):
        # Perform 3 sums in order to make 
        # things clear. 
        # 1) First, add the part correspoding to the 
        #    MSE derivative in order to beta_k.
        #    Don't forget to put a minus at the 
        #    beginning!
        """
        dL_dbetas.loc[k] += ...
        """
        raise NotImplemented()
        
        # 2) Add the part corresponding to the L1 
        #    regularization. To make it simpler, 
        #    use numpy.sign function.
        """
        dL_dbetas.loc[k] += ...
        """
        raise NotImplemented()
        
        # 3) Finally, add the part corresponding 
        #    to the L2 regularization.
        """
        dL_dbetas.loc[k] += ...
        """
        raise NotImplemented()
    
    return dL_dbetas