# Market Risk TD1

ESILV IF3

# Exercise 2

## Library Importation

In [1]:
import pandas as pd
import numpy as np
from scipy.stats import norm
from scipy.stats import gaussian_kde
from scipy.integrate import cumulative_trapezoid as cumtrapz

## Dataset

In [2]:
df = pd.read_csv("Lab1_Data.csv", sep = ";")
df

Unnamed: 0,date,value
0,02/01/2015,5621
1,05/01/2015,5424
2,06/01/2015,5329
3,07/01/2015,5224
4,08/01/2015,5453
...,...,...
1018,21/12/2018,4045
1019,24/12/2018,401
1020,27/12/2018,3938
1021,28/12/2018,4088


## Convert columns

In [3]:
df["date"] = pd.to_datetime(df["date"], format="%d/%m/%Y")
df.sort_values("date", inplace = True)

In [4]:
df["value"] = (df["value"].astype(str).str.replace(",", ".", regex=False))
df["value"] = pd.to_numeric(df["value"], errors="coerce")

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1023 entries, 0 to 1022
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    1023 non-null   datetime64[ns]
 1   value   1023 non-null   float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 16.1 KB


In [6]:
df

Unnamed: 0,date,value
0,2015-01-02,5.621
1,2015-01-05,5.424
2,2015-01-06,5.329
3,2015-01-07,5.224
4,2015-01-08,5.453
...,...,...
1018,2018-12-21,4.045
1019,2018-12-24,4.010
1020,2018-12-27,3.938
1021,2018-12-28,4.088


## Compute return

the return is equal to the actual return divided by the former return minus 1. 

In [7]:
df["return"] = df["value"] / df["value"].shift(1) - 1

In [8]:
df

Unnamed: 0,date,value,return
0,2015-01-02,5.621,
1,2015-01-05,5.424,-0.035047
2,2015-01-06,5.329,-0.017515
3,2015-01-07,5.224,-0.019704
4,2015-01-08,5.453,0.043836
...,...,...,...
1018,2018-12-21,4.045,-0.001481
1019,2018-12-24,4.010,-0.008653
1020,2018-12-27,3.938,-0.017955
1021,2018-12-28,4.088,0.038090


## Parameters

In [9]:
alpha = 0.99

In [10]:
## between 2015 and 2016
df_2015_2016 = df[df["date"] < "2017"].dropna().loc[:, "return"]
df_2015_2016

1     -0.035047
2     -0.017515
3     -0.019704
4      0.043836
5     -0.020723
         ...   
508   -0.008118
509    0.000744
510   -0.000186
511   -0.009481
512    0.006006
Name: return, Length: 512, dtype: float64

In [11]:
## between 2017 and 2018
df_2017_2018 = df[df["date"] >= "2017"].dropna().loc[:, "return"]
df_2017_2018

513     0.007463
514     0.040741
515     0.003737
516    -0.008155
517    -0.005719
          ...   
1018   -0.001481
1019   -0.008653
1020   -0.017955
1021    0.038090
1022    0.007583
Name: return, Length: 510, dtype: float64

## Question 1) VaR between 2015 and 2016

### Empirical VaR

In [12]:
def Empirical_VaR(df, alpha):
    return np.percentile(df, (1 - alpha) * 100, method="linear")

emp_var = Empirical_VaR(df_2015_2016, alpha)

print("The empirical VaR is for returns between 2015 and 2016 is : ", emp_var*100, "%")
print("There are", round(emp_var*100, 2), "% chances to lose", round((1 - alpha)*100, 2), "% of the portfolio's value")

The empirical VaR is for returns between 2015 and 2016 is :  -5.67754430082823 %
There are -5.68 % chances to lose 1.0 % of the portfolio's value


### VaR based on a parametric distribution (say Gaussian)

In [13]:
def Gaussian_VaR(df, alpha):
    mean = np.mean(df)
    std = np.std(df)
    z = norm.ppf(1 - alpha)
    gauss_var = mean + std*z
    return mean, std, z, gauss_var

mean, std, z, gauss_var = Gaussian_VaR(df_2015_2016, alpha)
print("Mean:", mean, "\nStandard deviation:", std, "\nZ:", z)


print("The gaussian VaR is for returns between 2015 and 2016 is : ", gauss_var*100, "%")
print("There are", round(gauss_var*100, 2), "% chances to lose", round((1 - alpha)*100, 2), "% of the portfolio's value")

Mean: 0.00019533353462882343 
Standard deviation: 0.023864825008581173 
Z: -2.3263478740408408
The gaussian VaR is for returns between 2015 and 2016 is :  -5.532255138844067 %
There are -5.53 % chances to lose 1.0 % of the portfolio's value


### VaR based on a non-parametric distribution (Gaussian Kernel)

In [14]:
def Kde_VaR(df, alpha, bw=None):
    # Fit KDE (Gaussian kernel)
    kde = gaussian_kde(df, bw_method=bw)  # 'scott' by default

    # Build a grid covering the tail well
    mu, s = np.mean(df), np.std(df, ddof=1)
    lo = min(df.min(), mu - 6*s)
    hi = max(df.max(), mu + 6*s)
    grid = np.linspace(lo, hi, 20001)

    # PDF on grid and CDF by numerical integration (trapezoid)
    pdf = kde(grid)
    cdf = cumtrapz(pdf, grid, initial=0.0)
    cdf = cdf / cdf[-1]  # normalize to 1

    # α-quantile by interpolation of the CDF
    q_alpha = np.interp(alpha, cdf, grid)

    return -q_alpha

kde_var = Kde_VaR(df_2015_2016, alpha)

print("The empirical VaR is for returns between 2015 and 2016 is : ", kde_var*100, "%")
print("There are", round(kde_var*100, 2), "% chances to lose", round((1 - alpha)*100, 2), "% of the portfolio's value")

The empirical VaR is for returns between 2015 and 2016 is :  -6.007630146624207 %
There are -6.01 % chances to lose 1.0 % of the portfolio's value


### Results

In [15]:
var = {"empirical":emp_var,
       "parametric (Gaussian)" : gauss_var,
       "non-parametric (Gaussian kernet)": kde_var}
for key, value in var.items():
    if(value != None):
        print(key, " : ", round(value*100, 3), "%")
    else:
        print(key, " : ", value)

empirical  :  -5.678 %
parametric (Gaussian)  :  -5.532 %
non-parametric (Gaussian kernet)  :  -6.008 %


## Question 2) Proportion of price returns between January 2015 and December 2016 that exceed the VaR threshold

In [16]:
def Proportion(df, var):
    prop = {}
    for key, value in var.items():
        prop[key] = df[df < value].count() / df.count()
    return prop

In [17]:
print("Proportion of price returns between january 2015 and december 2016 that exceed each VaR:\n")
prop_2015_2016 = Proportion(df_2015_2016, var)
for key, value in prop_2015_2016.items():
    print(key, " : ", round(value*100, 3), "%")

Proportion of price returns between january 2015 and december 2016 that exceed each VaR:

empirical  :  1.172 %
parametric (Gaussian)  :  1.172 %
non-parametric (Gaussian kernet)  :  0.781 %


## Question 2) Out of sample: which proportion of price returns between January 2017 and December 2018 exceed the VaR threshold? 

In [18]:
print("Proportion of price returns between january 2017 and december 2018 that exceed each VaR:\n")
prop_2017_2018 = Proportion(df_2017_2018, var)
for key, value in prop_2017_2018.items():
    print(key, " : ", round(value*100, 3), "%")

Proportion of price returns between january 2017 and december 2018 that exceed each VaR:

empirical  :  0.196 %
parametric (Gaussian)  :  0.392 %
non-parametric (Gaussian kernet)  :  0.196 %


### Discussion : Do you validate the choice of a historical VaR?

The proportion of the price return that exceed the VaR threshold is higher for the parametric VaR. So we prefer this one here.

## Question 3) between October and December 2016

In [19]:
## between October and December 2016
df_oct_dec = df[(df["date"] < "2017") & (df["date"] >= "2016-10")].dropna().loc[:, "return"]
df_oct_dec

449   -0.008677
450    0.026744
451    0.027705
452    0.016590
453    0.006346
         ...   
508   -0.008118
509    0.000744
510   -0.000186
511   -0.009481
512    0.006006
Name: return, Length: 64, dtype: float64

In [20]:
var_oct_dec = {"empirical":Empirical_VaR(df_oct_dec, alpha),
       "parametric (Gaussian)" : Gaussian_VaR(df_oct_dec, alpha)[3],
       "non-parametric (Gaussian kernet)": Kde_VaR(df_oct_dec, alpha)}

for key, value in var_oct_dec.items():
    if(value != None):
        print(key, " : ", round(value*100, 3), "%")
    else:
        print(key, " : ", value)

empirical  :  -3.044 %
parametric (Gaussian)  :  -3.454 %
non-parametric (Gaussian kernet)  :  -4.072 %


In [24]:
# proportion of exceed
print("Proportion of price returns between october and december 2016 that exceed each VaR:\n")
prop_oct_dec = Proportion(df_oct_dec, var)
for key, value in prop_oct_dec.items():
    print(key, " : ", round(value*100, 3), "%")

Proportion of price returns between october and december 2016 that exceed each VaR:

empirical  :  0.0 %
parametric (Gaussian)  :  0.0 %
non-parametric (Gaussian kernet)  :  0.0 %
