# SLU12 - Validation metrics for regression: Exercise Notebook

In this notebook, you will implement:
    - Mean Absolute Error (MAE)
    - Mean Squared Error (MSE)
    - Root Mean Squared Error (RMSE)
    - Coefficient of Determination (R²)
    - Adjusted R²
    - Regularized Linear Regression loss
    - (BONUS) Partial derivatives for the Regularized Linear Regression loss

## 1 Metrics

In [None]:
# This cell creates the data and parameters that 
# you can use to test your implementations.

import numpy as np
import pandas as pd
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression

np.random.seed(60)

data = load_boston()

x = pd.DataFrame(data['data'], columns=data['feature_names'])
y = pd.Series(data['target'])

lr = LinearRegression()
lr.fit(x, y)

y_hat = lr.predict(x)

betas = pd.Series([lr.intercept_] + list(lr.coef_))

### 1.1 Mean Absolute Error

$$MAE = \frac{1}{N} \sum_{n=1}^N \left| y_n - \hat{y}_n \right|$$

In [None]:
def mean_absolute_error(y, y_hat): 
    """
    Args: 
        y : pandas.Series with shape (num_observations,)
            The targets
        
        y_hat : pandas.Series with shape (num_observations,)
            The predictions
        
    Returns: 
        mae : pandas.Series with shape (num_observations,)
    """
    # 1) Compute the difference.
    # e = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    
    # 2) Compute the absolute value of the difference.
    # a = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    
    # 3) Compute the mean of the absolute value of the difference.
    # mae = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    
    return mae

In [None]:
print("MAE: {}".format(mean_absolute_error(y, y_hat)))

Expected output:
```
MAE: 3.2729446379969387
```

In [None]:
import math

assert math.isclose(0.33316349496726444, 
                    mean_absolute_error(pd.Series(np.random.RandomState(10).rand(10)), 
                                        pd.Series(np.random.RandomState(20).rand(10))))

assert math.isclose(0.39070816989559587, 
                    mean_absolute_error(pd.Series(np.random.RandomState(30).rand(10)), 
                                        pd.Series(np.random.RandomState(20).rand(10))))

assert math.isclose(0.2567117528634928, 
                    mean_absolute_error(pd.Series(np.random.RandomState(30).rand(10)), 
                                        pd.Series(np.random.RandomState(500).rand(10))))

### 1.2 Mean Squared Error

$$MSE = \frac{1}{N} \sum_{n=1}^N (y_n - \hat{y}_n)^2$$

In [None]:
def mean_squared_error(y, y_hat):
    """
    Args: 
        y : pandas.Series with shape (num_observations,)
            The targets
        
        y_hat : pandas.Series with shape (num_observations,)
            The predictions
        
    Returns: 
        mse : pandas.Series with shape (num_observations,)
    """
    # 1) Compute the difference.
    # e = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    
    # 2) Compute the squares of the difference
    # s = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    
    # 3) Compute the mean of the squares of the difference.
    # mse = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    
    return mse

In [None]:
print("MSE: {}".format(mean_squared_error(y, y_hat)))

Expected output: 
```
MSE: 21.8977792176875
```

In [None]:
import math

assert math.isclose(0.16469788257519086, 
                    mean_squared_error(pd.Series(np.random.RandomState(10).rand(10)), 
                                       pd.Series(np.random.RandomState(20).rand(10))))

assert math.isclose(0.22325626250313846, 
                    mean_squared_error(pd.Series(np.random.RandomState(30).rand(10)), 
                                       pd.Series(np.random.RandomState(20).rand(10))))

assert math.isclose(0.13478449093337383, 
                    mean_squared_error(pd.Series(np.random.RandomState(30).rand(10)), 
                                       pd.Series(np.random.RandomState(500).rand(10))))

### 1.3 Root Mean Squared Error

$$RMSE = \sqrt{MSE}$$

In [None]:
def root_mean_squared_error(y, y_hat): 
    """
    Args: 
        y : pandas.Series with shape (num_observations,)
            The targets
        
        y_hat : pandas.Series with shape (num_observations,)
            The predictions
        
    Returns: 
        rmse : pandas.Series with shape (num_observations,)
    """
    # Compute the mean squared error.
    # mse = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    
    # Compute the root square.
    # rmse = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    
    return rmse

In [None]:
print("RMSE: {}".format(root_mean_squared_error(y, y_hat)))

Expected output:
```
RMSE: 4.679506300635516
```

In [None]:
import math

assert math.isclose(0.4058298690032448, 
                    root_mean_squared_error(pd.Series(np.random.RandomState(10).rand(10)), 
                                            pd.Series(np.random.RandomState(20).rand(10))))

assert math.isclose(0.4725000132308342, 
                    root_mean_squared_error(pd.Series(np.random.RandomState(30).rand(10)), 
                                            pd.Series(np.random.RandomState(20).rand(10))))

assert math.isclose(0.36713007358887645, 
                    root_mean_squared_error(pd.Series(np.random.RandomState(30).rand(10)), 
                                            pd.Series(np.random.RandomState(500).rand(10))))

### 1.4 R² score

$$\bar{y} = \frac{1}{N} \sum_{n=1}^N y_n$$

$$R² = 1 - \frac{MSE(y, \hat{y})}{MSE(y, \bar{y})} 
= 1 - \frac{\frac{1}{N} \sum_{n=1}^N (y_n - \hat{y}_n)^2}{\frac{1}{N} \sum_{n=1}^N (y_n - \bar{y})^2}
= 1 - \frac{\sum_{n=1}^N (y_n - \hat{y}_n)^2}{\sum_{n=1}^N (y_n - \bar{y})^2}$$

In [None]:
def r_squared(y, y_hat): 
    # Compute the mean squared error between 
    # the target and the predictions.
    # mse_top = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    
    # Compute the mean squared error between 
    # the target and the target mean.
    # mse_bottom = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    
    # Now, take both mean square errors
    # and finish the computation of R².
    # r2 = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    
    return r2

In [None]:
print("R²: {}".format(r_squared(y, y_hat)))

Expected output: 
```
R²: 0.7406077428649427
```

In [None]:
import math

assert math.isclose(-1.012757734643532, 
                    r_squared(pd.Series(np.random.RandomState(10).rand(10)), 
                              pd.Series(np.random.RandomState(20).rand(10))))

assert math.isclose(-2.075782802360925, 
                    r_squared(pd.Series(np.random.RandomState(30).rand(10)), 
                              pd.Series(np.random.RandomState(20).rand(10))))

assert math.isclose(-0.8569146262224803, 
                    r_squared(pd.Series(np.random.RandomState(30).rand(10)), 
                              pd.Series(np.random.RandomState(500).rand(10))))

### 1.5 Adjusted R² score

$$\bar{R}^2 = 1 - \frac{N - 1}{N - K - 1} (1 - R^2)$$

where $N$ is the number of observations in the dataset used for training the model (i.e. number of rows of the pandas dataframe) and $K$ is the number of features used by your model (i.e. number of columns of the pandas dataframe)

In [None]:
def adjusted_r_squared(y, y_hat, K):
    """
    Args: 
        y : pandas.Series with shape (num_observations,)
            
        y_hat : pandas.Series with shape (num_observations,)
            
        K : integer
            Number of features used in the model 
            that computed y_hat.
        
    Returns:
        adj_r2 : float
            The adjusted value of R².
    """
    # Compute R².
    # r2 = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    
    # Adjust R²
    # adj_r2 = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    
    return adj_r2

In [None]:
print("Adj. R²: {}".format(adjusted_r_squared(y, y_hat, x.shape[1])))

Expected output: 
```
Adj. R²: 0.7337538824121872
```

In [None]:
import math


assert math.isclose(-1.891075622505615, 
                    adjusted_r_squared(pd.Series(np.random.RandomState(65).rand(10)), 
                                       pd.Series(np.random.RandomState(10).rand(10)), 2))

assert math.isclose(-3.0475058715078607, 
                    adjusted_r_squared(pd.Series(np.random.RandomState(65).rand(10)), 
                                       pd.Series(np.random.RandomState(10).rand(10)), 4))

assert math.isclose(-5.745843119179767, 
                    adjusted_r_squared(pd.Series(np.random.RandomState(65).rand(10)), 
                                       pd.Series(np.random.RandomState(10).rand(10)), 6))


assert math.isclose(-2.138572002282621, 
                    adjusted_r_squared(pd.Series(np.random.RandomState(1).rand(10)), 
                                       pd.Series(np.random.RandomState(42).rand(10)), 2))

assert math.isclose(-3.394000803195669, 
                    adjusted_r_squared(pd.Series(np.random.RandomState(1).rand(10)), 
                                       pd.Series(np.random.RandomState(42).rand(10)), 4))

assert math.isclose(-6.323334671992782, 
                    adjusted_r_squared(pd.Series(np.random.RandomState(1).rand(10)), 
                                       pd.Series(np.random.RandomState(42).rand(10)), 6))



assert math.isclose(-3.6986284467219077, 
                    adjusted_r_squared(pd.Series(np.random.RandomState(23).rand(10)), 
                                       pd.Series(np.random.RandomState(13).rand(10)), 2))

assert math.isclose(-5.57807982541067, 
                    adjusted_r_squared(pd.Series(np.random.RandomState(23).rand(10)), 
                                       pd.Series(np.random.RandomState(13).rand(10)), 4))

assert math.isclose(-9.96346637568445, 
                    adjusted_r_squared(pd.Series(np.random.RandomState(23).rand(10)), 
                                       pd.Series(np.random.RandomState(13).rand(10)), 6))

## 2 Regularization

#### 2.1 Compute Regularized Linear Regression loss

$$L_{L_1} = \lambda_1 \|\beta\|_1^1 = \lambda_1 \sum_{k=1}^K \left|\beta_k\right|$$

$$L_{L_2} = \lambda_2 \|\beta\|_2^2 = \lambda_2 \sum_{k=1}^K \beta_k^2$$

$$L = \frac{1}{N} \sum_{n=1}^N (y_n - \hat{y}_n)^2 + L_{L_1} + L_{L_2}$$

In [None]:
def regularized_linear_regression_loss(y, y_hat, betas, lamb1, lamb2):
    """
    Args: 
        y : pandas.Series with shape (num_observations,)
            The targets.
        y_hat : pandas.Series with shape (num_observations,)
            THe predictions.
        betas : pandas.Series with shape (num_features+1,)
            The parameters of your regression model. 
            The first value is the intercept and the 
            remaining ones are the feature coefficients.
        lamb1 : float
            The strength of the L1 regularizer.
        lamb2 : float
            The strength of the L2 regularizer.
            
    Returns:
        loss : float
    """
    # Compute the L1 part of 
    # the general loss function.
    # l1_loss = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    
    # Compute the L2 part of 
    # the general loss function.
    # l2_loss = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    
    # Compute the mean square loss part 
    # of the general loss function.
    # mse = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    
    # Compute the total loss by 
    # combining all 3 parts.
    # L = ...
    # YOUR CODE HERE
    raise NotImplementedError()
    
    return L

In [None]:
lamb1 = 0.5
lamb2 = 0.1
L = regularized_linear_regression_loss(y, y_hat, betas, lamb1, lamb2)

print("Regularized loss for lin. reg.: {}".format(L))

Expected output
```
Regularized loss for lin. reg.: 69.95737104987452
```

In [None]:
import math


lamb1 = 1.5
lamb2 = 0.1
L = regularized_linear_regression_loss(y, y_hat, betas, lamb1, lamb2)
assert math.isclose(L, 97.70368652856253)


lamb1 = 0.5
lamb2 = 1.1
L = regularized_linear_regression_loss(y, y_hat, betas, lamb1, lamb2)
assert math.isclose(L, 411.8217119783049)


lamb1 = 0.5
lamb2 = 1.2
L = regularized_linear_regression_loss(y, y_hat, betas, lamb1, lamb2)
assert math.isclose(L, 446.0081460711479)

#### (BONUS) 2.3 Compute Regularized Linear Regression partial derivatives

$$\frac{\partial L}{\partial \beta_0} = - \sum_{n=1}^{N} 2 (y_n - \hat{y}_n)$$

$$\frac{\partial L}{\partial \beta_k}
= - \sum_{n=1}^{N} 2 (y_n - \hat{y}_n) x_{k_n} + \lambda_1 \frac{\beta_k}{\left|\beta_k\right|} + 2 \lambda_2 \beta_k 
= - \sum_{n=1}^{N} 2 (y_n - \hat{y}_n) x_{k_n} + \lambda_1 sign(\beta_k) + 2 \lambda_2 \beta_k $$

$$sign(\beta_k) = 
\begin{cases}
    +1,& \text{if } \beta_k > 0\\
    -1,& \text{if } \beta_k < 0\\
    0,& \text{if } \beta_k = 0
\end{cases}$$

In [None]:
def regularized_linear_regression_partial_derivatives(x, y, betas, lamb1, lamb2):
    """
    Args:
        x : pandas.DataFrame with shape (num_observations, num_features)
            The input features.
        y : pandas.Series with shape (num_observations,)
            The targets.
        betas : pandas.Series with shape (num_features+1,)
            The intercept at index 0.
            The coefficients in the remaining indexes.
        lamb1 : float
            The strength of the L1 regularizer.
        lamb2 : float
            The strength of the L2 regularizer.
    
    Returns: 
        dL_dbetas : pandas.Series with shape (num_features+1,)
    """
    # To make your life easier, extract 
    # the numpy array from x.
    # YOUR CODE HERE
    raise NotImplementedError()
    
    # Make predictions y_hat by using 
    # the dot product between x and betas. 
    # Don't forget to separate betas[0] 
    # from the remaining betas!
    # YOUR CODE HERE
    raise NotImplementedError()
    
    # Compute the difference between the 
    # target and the predictions.
    # YOUR CODE HERE
    raise NotImplementedError()
    
    # Initialize (with zeros) the pandas 
    # Series that will store the partial 
    # derivatives for the betas.
    K = x.shape[1]
    dL_dbetas = pd.Series(np.zeros(K))
    
    # Calculate the partial derivative 
    # for beta_0. Don't 
    # YOUR CODE HERE
    raise NotImplementedError()
    
    # Calculate the partial derivatives 
    # for each beta_k, for k > 0.
    for k in range(1, K):
        # Perform 3 sums in order to make 
        # things clear. 
        # 1) First, add the part correspoding to the 
        #    MSE derivative in order to beta_k.
        #    Don't forget to put the minus sign at 
        #    the beginning! 
        #    TIP: to get the k column of x, use x[:, k].
        # YOUR CODE HERE
        raise NotImplementedError()
        
        # 2) Add the part corresponding to the L1 
        #    regularization. To make it simpler, 
        #    use numpy.sign function.
        #    TIP: if you want to use the sign 
        #    operation that you saw in the formula, 
        #    take a look at np.sign.
        # YOUR CODE HERE
        raise NotImplementedError()
        
        # 3) Finally, add the part corresponding 
        #    to the L2 regularization.
        # YOUR CODE HERE
        raise NotImplementedError()
    
    return dL_dbetas

In [None]:
lamb1 = 1
lamb2 = 1
print(regularized_linear_regression_partial_derivatives(x, y, betas, lamb1, lamb2))

Expected output: 
```
0    -5.684342e-13
1    -1.214341e+00
2     1.092790e+00
3     1.041720e+00
4     6.377123e+00
5    -3.659152e+01
6     8.609505e+00
7     1.001502e+00
8    -3.951518e+00
9     1.611310e+00
10   -1.024659e+00
11   -2.906927e+00
12    1.018785e+00
dtype: float64
```

In [None]:
import math


lamb1 = 2
lamb2 = 2
np.testing.assert_array_almost_equal(
    regularized_linear_regression_partial_derivatives(x, y, betas, lamb1, lamb2).values, 
    np.array([-5.68434189e-13, -2.42868223e+00,  2.18558088e+00,  2.08344096e+00,
              1.27542456e+01, -7.31830346e+01,  1.72190098e+01,  2.00300425e+00,
              -7.90303519e+00,  3.22262015e+00, -2.04931739e+00, -5.81385422e+00,
              2.03757005e+00]))


lamb1 = 0
lamb2 = 0
np.testing.assert_array_almost_equal(
    regularized_linear_regression_partial_derivatives(x, y, betas, lamb1, lamb2).values, 
    np.array([-5.68434189e-13, -1.81353244e-09,  1.40516931e-10,  1.13686838e-13,
              3.49587026e-12, -3.02406988e-11,  1.00408215e-09, -9.45306056e-11,
              -6.50288712e-11, -1.46974344e-09,  2.45563569e-11, -2.52475729e-09,
              1.82808435e-10]))


lamb1 = 0
lamb2 = 3
np.testing.assert_array_almost_equal(
    regularized_linear_regression_partial_derivatives(x, y, betas, lamb1, lamb2).values, 
    np.array([-5.68434189e-13, -6.43023341e-01,  2.78371317e-01,  1.25161437e-01,
              1.61313684e+01, -1.06774552e+02,  2.28285148e+01,  4.50637013e-03,
              -8.85455278e+00,  1.83393023e+00, -7.39760778e-02, -5.72078133e+00,
              5.63550765e-02]))