# Assignment 4 Group 16

## Import the necessary libraries

In [None]:
import pandas as pd
import numpy as np
from scipy.stats import t
from scipy.stats import norm
from datetime import datetime
from statsmodels.multivariate.pca import PCA

In [None]:
# import our custom functions
from Assignment4_lib import HSMeasurements

## Load the data from the files

In [None]:
# load the indexes as dictionary of {ticker: name}
with open('data/_indexes.csv', 'r') as f:
    # skip the first line
    indexes = {
        line.split(',')[1]: line.split(',')[2].strip()
        for line in f.readlines()[1:]
    }

In [None]:
# load the actual dataset as a dataframe
EuroStoxx50 = pd.read_csv('data/EUROSTOXX50_Dataset.csv', sep=',')
EuroStoxx50 = EuroStoxx50.set_index(pd.DatetimeIndex(EuroStoxx50['Date']))
EuroStoxx50 = EuroStoxx50.drop('Date', axis=1)
EuroStoxx50.head()

In [None]:
# summarize the dataset
EuroStoxx50.describe()

In [None]:
# drop the columns that only contain NaN
EuroStoxx50 = EuroStoxx50.dropna(axis=1, how='all')
# for those who have NaN, fill them with the previous value
EuroStoxx50 = EuroStoxx50.ffill()

In [None]:
# create the log-returns dataframe
returns = np.log(EuroStoxx50/EuroStoxx50.shift(1))
returns = returns.dropna(axis=0, how='all')

returns.head()

# Point 0: Variance-Covariance method for VaR and ES in a linear portfolio

On the 20th of February 2020 we have an equally weighted portfolio made up of the following equities

- Adidas
- Allianz
- Munich Re
- L'Oréal

We compute the daily VaR and ES with a 5y estimation using a t-student distribution with 4 degrees
of freedom ($\nu$).
The notional of the portfolio is 15 million €. We take a significance level of $\alpha = 0.99\%$.

Wherever we have missing data due to differing trading days for each stock we substitute the previous
day's value.

## Select the data 

In [None]:
# create a dataframe with the relevant time series
df = returns[['ADSGn.DE', 'ALVG.DE', 'MUVGn.DE', 'OREP.PA']]
# set the date to 20th February 2020
valuation_date = datetime(2020, 2, 20)
# only use data prior to the valuation date
df = df[df.index < valuation_date]
# only use the last 5 years
df = df[df.index >= valuation_date - pd.DateOffset(years=5)]

## Set the parameters


In [None]:
# set nu and alpha
nu = 4
alpha = 0.99
notional = 15 * 10**6

# estimate the mean vector
mean_df = df.mean()
# estimate the covariance matrix
Cov_df = df.cov()
# create the weights vector
weights = np.array([0.25, 0.25, 0.25, 0.25])

## Daily VaR
We compute the daily VaR using the variance-covariance method. The daily VaR is given by:

$$
VaR_{\alpha} = \underbrace{\bar\mu \cdot \bar\omega}_{\mu} + \underbrace{ \sqrt{\bar\omega^T \Sigma \bar\omega}}_{\sigma} \cdot t^{-1}_{\nu} (\alpha)
$$

Where:

- $\bar\omega$ is the vector of weights of the portfolio
- $\bar\mu$ is the vector of expected returns of the portfolio
- $\Sigma$ is the variance-covariance matrix of the returns of the portfolio
- $t^{-1}_{\nu}(\alpha)$ is the $\alpha$-quantile of the t-student distribution with $\nu$ degrees of freedom

To compute the quantity $t^{-1}_{\nu}(\alpha)$ we use the `t.ppf` function from the `scipy.stats` module.

See [this stackoverflow answer](https://stackoverflow.com/questions/65468026/norm-ppf-vs-norm-cdf-in-pythons-scipy-stats)
and [this documentation](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.t.html#:~:text=ppf(q%2C%20df,cdf%20%E2%80%94%20percentiles).) for more information.


In [None]:
# find the t_alpha quantile
t_alpha = t.ppf(alpha, nu)

# compute the VaR
VaR = mean_df @ weights + np.sqrt(weights @ Cov_df @ weights) * t_alpha

print(f'The daily VaR at 99% confidence level is {VaR:.2%}')
print(f'The daily VaR at 99% confidence level is {VaR * notional:.2f} EUR')

### Daily ES

We compute the daily ES using the variance-covariance method. The daily ES is given by:

$$
ES_{\alpha} = \bar\mu \cdot \bar\omega +
    \sqrt{\bar\omega^T \Sigma \bar\omega} \cdot
    \underbrace{
        \frac{\nu + ( t^{-1}_{\nu}(\alpha) )^2}{\nu - 1} \cdot \frac{ \phi_{\nu} (t^{-1}_{\nu}(\alpha)) }{1 - \alpha}
    }_{ES_{\alpha}^{std}}
$$

Where:

- $\bar\omega$ is the vector of weights of the portfolio
- $\bar\mu$ is the vector of expected returns of the portfolio
- $\Sigma$ is the variance-covariance matrix of the returns of the portfolio
- $t^{-1}_{\nu}(\alpha)$ is the $\alpha$-quantile of the t-student distribution with $\nu$ degrees of freedom
- $\phi_{\nu} (\cdot)$ is the density function of the t-student distribution with $\nu$ degrees of freedom


In [None]:
# compute the ES for the standard t-distribution
ES_std = (nu + t_alpha**2) / (nu - 1) * (t.pdf(t_alpha, nu) / (1 - alpha))

# compute the ES for the portfolio
ES = mean_df @ weights + np.sqrt(weights @ Cov_df @ weights) * ES_std

print(f'The daily ES at 99% confidence level is {ES:.2%}')
print(f'The daily ES at 99% confidence level is {ES * notional:.2f} EUR')

# Point 1: Historical simulation, bootstrap and PCA for VaR and ES in a linear portfolio

On the 20th of March 2019 we must compute the following quantitities with $\alpha = 0.95\%$:

- Portfolio 1: Total (25K shares), AXA (20K shares), Sanofi (20K shares), Volkswagen (10K shares).
    We compute the daily VaR and ES with a Historical Simulation and Bootstrap method (with 200 simulations) and a 5 years estimation.
- Portfolio 2: Adidas, Airbus, BBVA, BMW and Deutsche Telekom (all equally weighted).
    We compute the daily VaR and ES with a 5 year estimation using a Weighted Historical Simulation with $\lambda = 0.95$.
- Portfolio 3: An equally weighted portfolio with shares of the first 18 companies.
    We compute the 10 days VaR and ES with a 5 year estimation using a Gaussian parametric PCA approach using the first n principanl components (with n = 1, 2, 3, 4, 5).

For each portfolio we also check the Plausibility Check.


## Data setup
We set the parameters for the various models and select the data to use

In [None]:
# set the parameters
alpha = 0.95
lmd = 0.94 # lambda is a reserved keyword
# set the valuation date to 20th March 2019
valuation_date = datetime(2019, 3, 20)

# select only the relevant returns
df = returns[returns.index <= valuation_date]
# only use the last 5 years
df = df[df.index >= valuation_date - pd.DateOffset(years=5)]

## Point 1.1 Portfolio 1
First of all we set up the weights we will use to compute the various quantities.
Recall that portfolio 1 has the following quantities:
- Total (25K shares)
- AXA (20K shares)
- Sanofi (20K shares)
- Volkswagen (10K shares)

In [None]:
# select the relevant indexes
df_1_1 = df[['TTEF.PA', 'AXAF.PA', 'SASY.PA', 'VOWG_p.DE']]
# compute the value at valuation date
val_Total = 25_000 * EuroStoxx50.loc[valuation_date]['TTEF.PA']
val_AXA = 20_000 * EuroStoxx50.loc[valuation_date]['AXAF.PA']
val_Sanofi = 20_000 * EuroStoxx50.loc[valuation_date]['SASY.PA']
val_VW = 10_000 * EuroStoxx50.loc[valuation_date]['VOWG_p.DE']
V_t = val_Total + val_AXA + val_Sanofi + val_VW
# compute the weights
weights = np.array([val_Total, val_AXA, val_Sanofi, val_VW]) / V_t

### Historical Simulation approach

In order to apply the Historical Simulation approach we need to compute the losses for each day as:

$$
L_t = - V_t \cdot \omega \cdot r_t
$$

then we sort the losses in descending order (the value of index 0 is the highest loss) and we take
the $\alpha$-quantile of the historical losses as $q_{\alpha, HS} = \lfloor (1 - \alpha) \cdot N \rfloor$, with 
$N$ the number of observations.

Then we simply take:

$$

VaR_{\alpha} =  L^{(q_{\alpha, HS}, N)} \\

ES_{\alpha} = \frac{1}{q_{\alpha, HS}} \sum_{i=1}^{q_{\alpha, HS}} L^{(i, N)}

$$

In [None]:
ES, VaR = HSMeasurements(df_1_1, alpha, weights, V_t, 1)

print(f"The daily VaR at 95% confidence level is {VaR:.2f} EUR")
print(f"The daily ES at 95% confidence level is {ES:.2f} EUR")

## Point 1.3: Portfolio 3

We compute the 10 days VaR and ES with a 5 year estimation using a Gaussian parametric PCA approach using the first n principanl components (with n = 1, 2, 3, 4, 5).

### Data setup

We set up this portfolio by selecting the first 18 companies.

In [None]:
# take the first 18 companies without any NaN values
stocks_1_3 = df.dropna(axis=1, how='any').iloc[:, :18]
stocks_1_3.head()

### PCA approach

We compute the PCA on the returns for each n = 1, 2, 3, 4, 5 and use the first n principal components to compute the VaR and ES.

In [None]:
# compute the PCA for each n
pcas = {
    n: PCA(stocks_1_3, ncomp=n, normalize=True)
    for n in range(1, 6)
}
# extract the coefficients from each PCA
coeffs = {
    n: pcas[n].coeff
    for n in pcas
}

In [None]:
weights = {
    # sum by ticker
    n: np.sum(coeffs[n], axis=0)
    for n in coeffs
}

In [None]:
# compute the mean vector and covariance matrix
mu = stocks_1_3.mean()
cov = stocks_1_3.cov()

In [None]:
# compute the mean as the scalar product of the weights and the mean vector
mus = {
    n: np.dot(weights[n], mu)
    for n in weights
}

sigmas = {
    n: np.dot(np.dot(weights[n], cov), weights[n])
    for n in weights
}

In [None]:
# compute the VaR for each n using a standard Gaussian distribution
VaRs = {
    n: mus[n] + np.sqrt(sigmas[n]) * norm.ppf(alpha)
    for n in mus
}

# compute the ES for each n using a standard Gaussian distribution
ESs = {
    n: mus[n] + np.sqrt(sigmas[n]) * norm.pdf(norm.ppf(alpha)) / (1 - alpha)
    for n in mus
}

In [None]:
# print the results
for n in VaRs:
    print(f'For n = {n}, the VaR at 95% confidence level is {VaRs[n]:.2%}')
    print(f'For n = {n}, the ES at 95% confidence level is {ESs[n]:.2%}')