# Assignment 4 Group 16

## Import the necessary libraries

In [1]:
import pandas as pd
import numpy as np
from scipy.stats import t
from datetime import datetime

## Load the data from the files

In [2]:
# load the indexes as dictionary of {ticker: name}
with open('data/_indexes.csv', 'r') as f:
    # skip the first line
    indexes = {
        line.split(',')[1]: line.split(',')[2].strip()
        for line in f.readlines()[1:]
    }

In [3]:
# load the actual dataset as a dataframe
EuroStoxx50 = pd.read_csv('data/EUROSTOXX50_Dataset.csv', sep=',', index_col=0, parse_dates=True)
EuroStoxx50.head()

Unnamed: 0_level_0,ABI.BR,AD.AS,ADSGn.DE,ADYEN.AS,AIR.PA,AIRP.PA,ALVG.DE,ASML.AS,AXAF.PA,BASFn.DE,...,SAN.MC,SAPG.DE,SASY.PA,SCHN.PA,SGEF.PA,SIEGn.DE,STLAM.MI,TTEF.PA,VNAn.DE,VOWG_p.DE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-02,67.35,10.344663,67.48,,30.065,63.872086,108.3,50.17,13.81,73.18,...,4.642466,61.740795,72.619232,56.69,36.477679,73.202953,1.859794,39.975,,179.0
2013-01-03,66.54,10.344663,67.18,,30.2,63.433011,108.0,50.34,13.825,72.35,...,4.626329,61.661232,72.430223,56.45,36.636105,73.255365,1.886567,39.855,,178.9
2013-01-04,67.62,10.465124,66.72,,30.8,63.499538,108.2,49.755,13.865,72.47,...,4.6432,61.134128,73.285737,56.43,36.754925,73.316513,1.917165,39.82,,178.85
2013-01-07,66.93,10.530375,66.28,,30.75,62.973979,107.7,48.75,13.905,71.64,...,4.637332,60.616969,72.99725,56.1,36.403416,73.045715,1.919078,39.155,,175.95
2013-01-08,65.9,10.460105,66.36,,30.925,62.767747,107.05,48.22,13.575,71.14,...,4.650535,60.318608,73.36532,56.12,36.3044,72.55653,1.895173,39.385,,174.0


In [4]:
# summarize the dataset
EuroStoxx50.describe()

Unnamed: 0,ABI.BR,AD.AS,ADSGn.DE,ADYEN.AS,AIR.PA,AIRP.PA,ALVG.DE,ASML.AS,AXAF.PA,BASFn.DE,...,SAN.MC,SAPG.DE,SASY.PA,SCHN.PA,SGEF.PA,SIEGn.DE,STLAM.MI,TTEF.PA,VNAn.DE,VOWG_p.DE
count,2595.0,2595.0,2568.0,1204.0,2595.0,2595.0,2568.0,2595.0,2595.0,2568.0,...,2593.0,2568.0,2595.0,2595.0,2595.0,2568.0,2574.0,2595.0,2435.0,2568.0
mean,78.466609,20.222655,168.11796,1313.152865,79.464584,94.856556,169.402371,228.457549,21.497944,69.981291,...,4.117864,87.953312,80.714696,83.90896,72.686856,99.203473,8.653848,44.765356,34.876696,159.223306
std,22.017982,4.959865,78.600789,634.775495,27.85183,25.424532,34.043809,192.493032,3.408427,13.209816,...,1.239889,23.157071,7.999882,31.224451,18.878874,20.444696,4.366842,6.105825,11.245108,31.092819
min,30.97,10.344663,53.89,411.0,30.065,59.481338,101.75,47.2,12.494,38.85,...,1.460862,50.621882,62.880297,45.93,34.155742,54.880885,1.859794,21.795,15.654411,87.2
25%,56.97,17.04307,85.0,680.15,53.915,72.103553,138.45,85.51,18.907,62.605,...,3.176468,65.689103,74.653565,62.02,56.315,84.843162,4.683985,40.9175,26.520376,137.6
50%,76.3,20.175,172.775,1366.0,72.38,87.479356,174.655,148.5,22.165,69.815,...,4.009865,90.157344,79.801572,69.82,77.22,94.914723,7.764398,45.28,33.5,153.63
75%,98.465,23.4725,235.1375,1789.0,105.01,120.072733,198.46,317.45,23.9675,80.3775,...,5.202168,105.045614,86.635739,102.875,88.31,108.752134,12.067848,48.5725,43.469417,179.9275
max,123.25,31.09,336.25,2766.0,139.0,150.600015,232.0,770.5,28.83,97.67,...,6.771846,141.482691,105.665956,177.82,109.54,157.96,19.14,60.71,58.327952,255.2


In [5]:
# drop the columns that only contain NaN
EuroStoxx50 = EuroStoxx50.dropna(axis=1, how='all')
# for those who have NaN, fill them with the previous value
EuroStoxx50 = EuroStoxx50.ffill()


In [6]:
# create the log-returns dataframe
returns = np.log(EuroStoxx50/EuroStoxx50.shift(1))
returns = returns.dropna(axis=0, how='all')

returns.head()

Unnamed: 0_level_0,ABI.BR,AD.AS,ADSGn.DE,ADYEN.AS,AIR.PA,AIRP.PA,ALVG.DE,ASML.AS,AXAF.PA,BASFn.DE,...,SAN.MC,SAPG.DE,SASY.PA,SCHN.PA,SGEF.PA,SIEGn.DE,STLAM.MI,TTEF.PA,VNAn.DE,VOWG_p.DE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-03,-0.0121,0.0,-0.004456,,0.00448,-0.006898,-0.002774,0.003383,0.001086,-0.011407,...,-0.003482,-0.001289,-0.002606,-0.004243,0.004334,0.000716,0.014293,-0.003006,,-0.000559
2013-01-04,0.016101,0.011578,-0.006871,,0.019673,0.001048,0.00185,-0.011689,0.002889,0.001657,...,0.00364,-0.008585,0.011742,-0.000354,0.003238,0.000834,0.016089,-0.000879,,-0.00028
2013-01-07,-0.010257,0.006216,-0.006617,,-0.001625,-0.008311,-0.004632,-0.020406,0.002881,-0.011519,...,-0.001265,-0.008495,-0.003944,-0.005865,-0.00961,-0.0037,0.000997,-0.016841,,-0.016348
2013-01-08,-0.015509,-0.006695,0.001206,,0.005675,-0.00328,-0.006054,-0.010931,-0.024019,-0.007004,...,0.002843,-0.004934,0.00503,0.000356,-0.002724,-0.006719,-0.012535,0.005857,,-0.011145
2013-01-09,-0.018996,-0.005292,0.013174,,0.03807,-0.002653,-0.018384,0.012571,0.00551,0.003788,...,0.009575,0.006082,0.00149,0.001425,0.011794,0.002045,0.018991,0.003675,,-0.013889


# Point 0: Variance-Covariance method for VaR and ES in a linear portfolio

On the 20th of February 2020 we have an equally weighted portfolio made up of the following equities

- Adidas
- Allianz
- Munich Re
- L'Oréal

We compute the daily VaR and ES with a 5y estimation using a t-student distribution with 4 degrees
of freedom ($\nu$).
The notional of the portfolio is 15 million €. We take a significance level of $\alpha = 0.99\%$.

Wherever we have missing data due to differing trading days for each stock we substitute the previous
day's value.

## Select the data 

In [7]:
# create a dataframe with the relevant time series
df = returns[['ADSGn.DE', 'ALVG.DE', 'MUVGn.DE', 'OREP.PA']]
# set the date to 20th February 2020
valuation_date = datetime(2020, 2, 20)
# only use data prior to the valuation date
df = df[df.index < valuation_date]
# only use the last 5 years
df = df[df.index > valuation_date - pd.DateOffset(years=5)]


## Set the parameters


In [8]:
# set nu and alpha
nu = 4
alpha = 0.99
notional = 15 * 10**6

# estimate the mean vector
mean_df = df.mean()
# estimate the covariance matrix
Cov_df = df.cov()
# create the weights vector
weights = np.array([0.25, 0.25, 0.25, 0.25])

## Daily VaR
We compute the daily VaR using the variance-covariance method. The daily VaR is given by:

$$
VaR_{\alpha} = \underbrace{\bar\mu \cdot \bar\omega}_{\mu} + \underbrace{ \sqrt{\bar\omega^T \Sigma \bar\omega}}_{\sigma} \cdot t^{-1}_{\nu} (\alpha)
$$

Where:

- $\bar\omega$ is the vector of weights of the portfolio
- $\bar\mu$ is the vector of expected returns of the portfolio
- $\Sigma$ is the variance-covariance matrix of the returns of the portfolio
- $t^{-1}_{\nu}(\alpha)$ is the $\alpha$-quantile of the t-student distribution with $\nu$ degrees of freedom

To compute the quantity $t^{-1}_{\nu}(\alpha)$ we use the `t.ppf` function from the `scipy.stats` module.

See [this stackoverflow answer](https://stackoverflow.com/questions/65468026/norm-ppf-vs-norm-cdf-in-pythons-scipy-stats)
and [this documentation](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.t.html#:~:text=ppf(q%2C%20df,cdf%20%E2%80%94%20percentiles).) for more information.


In [9]:
# find the t_alpha quantile
t_alpha = t.ppf(alpha, nu)

# compute the VaR
VaR = mean_df @ weights + np.sqrt(weights @ Cov_df @ weights) * t_alpha

print(f'The daily VaR at 99% confidence level is {VaR:.2%}')
print(f'The daily VaR at 99% confidence level is {VaR * notional:.2f} EUR')

The daily VaR at 99% confidence level is 3.86%
The daily VaR at 99% confidence level is 579028.03 EUR


### Daily ES

We compute the daily ES using the variance-covariance method. The daily ES is given by:

$$
ES_{\alpha} = \bar\mu \cdot \bar\omega +
    \sqrt{\bar\omega^T \Sigma \bar\omega} \cdot
    \underbrace{
        \frac{\nu + ( t^{-1}_{\nu}(\alpha) )^2}{\nu - 1} \cdot \frac{ \phi_{\nu} (t^{-1}_{\nu}(\alpha)) }{1 - \alpha}
    }_{ES_{\alpha}^{std}}
$$

Where:

- $\bar\omega$ is the vector of weights of the portfolio
- $\bar\mu$ is the vector of expected returns of the portfolio
- $\Sigma$ is the variance-covariance matrix of the returns of the portfolio
- $t^{-1}_{\nu}(\alpha)$ is the $\alpha$-quantile of the t-student distribution with $\nu$ degrees of freedom
- $\phi_{\nu} (\cdot)$ is the density function of the t-student distribution with $\nu$ degrees of freedom


In [10]:
# compute the ES for the standard t-distribution
ES_std = (nu + t_alpha**2) / (nu - 1) * (t.pdf(t_alpha, nu) / (1 - alpha))

# compute the ES for the portfolio
ES = mean_df @ weights + np.sqrt(weights @ Cov_df @ weights) * ES_std

print(f'The daily ES at 99% confidence level is {ES:.2%}')
print(f'The daily ES at 99% confidence level is {ES * notional:.2f} EUR')

The daily ES at 99% confidence level is 5.36%
The daily ES at 99% confidence level is 803402.00 EUR


# Point 1: Historical simulation, bootstrap and PCA for VaR and ES in a linear portfolio

On the 20th of March 2019 we must compute the following quantitities with $\alpha = 0.95\%$:

- Portfolio 1: AXA (20K shares), Sanofi (20K shares), Volkswagen (10K shares).
    We compute the daily VaR and ES with a Historical Simulation and Bootstrap method (with 200 simulations) and a 5 years estimation.
- Portfolio 2: Adidas, Airbus, BBVA, BMW and Deutsche Telekom (all equally weighted).
    We compute the daily VaR and ES with a 5 year estimation using a Weighted Historical Simulation with $\lambda = 0.95$.
- Portfolio 3: An equally weighted portfolio with shares of the first 18 companies.
    We compute the 10 days VaR and ES with a 5 year estimation using a Gaussian parametric PCA approach using the first n principanl components (with n = 1, 2, 3, 4, 5).

For each portfolio we also check the Plausibility Check.


## Data setup
We set the parameters for the various models and select the data to use

In [11]:
# set the parameters
alpha = 0.95
lmd = 0.95 # lambda is a reserved keyword
# set the valuation date to 20th March 2019
valuation_date = datetime(2019, 3, 20)

# select only the relevant returns
df = returns[returns.index <= valuation_date]
# only use the last 5 years
df = df[df.index >= valuation_date - pd.DateOffset(years=5)]

## Point 1.1 Portfolio 1
First of all we set up the weights we will use to compute the various quantities.

In [12]:
tot_shares = 50
w_1 = np.array([20/tot_shares, 20/tot_shares, 10/tot_shares])
indexes.get('AXA')

### Historical Simulation
We freeze the portfolio to the valutaion date and write the loss function as a function of the weights and the returns.

In [13]:
# compute the value of the portfolio

## Point 1.B

In [14]:
# Build an equally weighted portfolio
w_2 = 0.20
# create the array with the weights
ptf_weights = np.array([w_2, w_2, w_2, w_2, w_2])
# normalization factor
C  = (1 - lmd) / (1 - lmd**len(df))
# compute the decreasing sequence of weights: w_s = C*lambda^(t-s)
weights = np.array([C * lmd**(len(df) - t) for t in range(len(df))])

# weights = sorted(weights, reverse=True)

# extract the log returns of the following companies Adidas, Airbus, BBVA, BMW, Deutsche 
tickers_of_interest = ['ADSGn.DE', 'AIR.PA', 'BBVA.MC', 'BMWG.DE', 'DTEGn.DE']

# Selecting columns for tickers of interest
selected_returns = df[tickers_of_interest]


In [15]:

# the loss distribution
loss  = - 1 * (selected_returns @ ptf_weights)

# sort the loss in ascending order
#loss_sorted = sorted(loss, reverse=True)

# Create pairs of (loss, weight) tuples
loss_weight_pairs = list(zip(loss, weights))

# Sort the pairs based on loss (descending order)
loss_weight_pairs.sort(key=lambda x: x[0], reverse=True)

# Unzip sorted pairs to get sorted loss and weights arrays
loss_sorted, weights_sorted = zip(*loss_weight_pairs)

# Convert back to numpy arrays if needed
loss_sorted = np.array(loss_sorted)
weights_sorted = np.array(weights_sorted)

print(loss_sorted)
print(weights_sorted)

[ 0.08033286  0.05352807  0.04785863 ... -0.03834153 -0.0383782
 -0.04373073]
[1.15064384e-17 1.63055966e-28 7.94567194e-20 ... 1.85813940e-23
 6.39955134e-22 1.96694605e-22]


In [25]:
VaR = [
    loss_sorted[i]
    for i in range(len(weights_sorted))
    if weights_sorted[:i].sum() <= 1 - alpha
]

i_star = len(VaR)
VaR = VaR[-1]

print(f'The daily VaR at 95% confidence level is {VaR:.2%}')

# compute the ES
Es = np.sum(loss_sorted[:i_star] * weights_sorted[:i_star]) / np.sum(weights_sorted[:i_star])

print(f'The daily ES at 95% confidence level is {Es:.2%}')


The daily VaR at 95% confidence level is 1.59%
The daily ES at 95% confidence level is 1.75%
