## Baseline Estimation

In [1]:
import pandas as pd
import numpy as np
from tick_loss import *
import requests

In [2]:
url = "https://raw.githubusercontent.com/TobiPfeiffersGitHub/BSExNovartis_Thesis/main/Data/monthly_data.csv"
response = requests.get(url)

if response.status_code == 200:
    with open("monthly_data.csv", "wb") as file:
        file.write(response.content)
    print("File downloaded successfully.")
else:
    print("Failed to download the file.")

# Load the file into a DataFrame
data = pd.read_csv("monthly_data.csv")

data

File downloaded successfully.


Unnamed: 0.1,Unnamed: 0,Date,unemployment_rate,inflation_rate,treasury_yield,Monthly Real GDP Index,GDP_growth,SP500_return,ABBV,AZN,BMY,JNJ,LLY,MRK,NVO,NVS,PFE,ROG
0,0,2000-01-01,4.0,169.300,,12870.47162,-1.066967,,,,,,,,,,,
1,1,2000-02-01,4.1,170.000,6.661000,12934.38121,0.496560,-1.522563,,-12.828964,-13.228004,-16.339821,-11.121498,-21.701151,2.220031,3.838386,-11.226228,54.440789
2,2,2000-03-01,4.0,171.000,6.519500,13000.50928,0.511258,9.413333,,22.264136,-0.218329,-2.079067,5.804243,0.913712,8.390897,6.420237,14.101954,6.922258
3,3,2000-04-01,3.8,170.900,6.256522,13173.13041,1.327803,-3.266805,,5.567379,-8.205683,17.437698,23.153694,12.400712,-0.097663,2.559423,15.213674,7.370518
4,4,2000-05-01,4.0,171.200,5.990526,13149.18201,-0.181797,-1.572223,,-0.148357,5.395746,8.484832,-1.296597,7.374072,20.863985,5.169310,5.638019,-8.163265
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
278,278,2023-03-01,3.5,301.808,3.746842,,,3.313488,3.554254,8.035329,0.507544,1.862736,10.703390,0.141189,12.873250,9.367574,0.566924,11.025813
279,279,2023-04-01,3.4,302.918,3.663043,,,1.985238,-5.176628,5.489119,-3.664707,5.612908,15.269915,9.289214,5.836894,16.334413,-4.681371,-1.517467
280,280,2023-05-01,3.7,,3.460000,,,0.461619,-7.868480,-0.191204,-2.695194,-5.277949,8.487855,-4.382088,-3.967915,-6.161645,-2.237080,-2.162160
281,281,2023-06-01,,,3.573636,,,,,,,,,,,,,


We cut off the first observations and the last 5 to ensure the same amount of data.

In [3]:
data = data.drop(data.index[0])
data = data.drop(data.index[-5:])
data = data.reset_index(drop=True)
data = data.drop(columns=['Unnamed: 0'])
data

Unnamed: 0,Date,unemployment_rate,inflation_rate,treasury_yield,Monthly Real GDP Index,GDP_growth,SP500_return,ABBV,AZN,BMY,JNJ,LLY,MRK,NVO,NVS,PFE,ROG
0,2000-02-01,4.1,170.000,6.661000,12934.38121,0.496560,-1.522563,,-12.828964,-13.228004,-16.339821,-11.121498,-21.701151,2.220031,3.838386,-11.226228,54.440789
1,2000-03-01,4.0,171.000,6.519500,13000.50928,0.511258,9.413333,,22.264136,-0.218329,-2.079067,5.804243,0.913712,8.390897,6.420237,14.101954,6.922258
2,2000-04-01,3.8,170.900,6.256522,13173.13041,1.327803,-3.266805,,5.567379,-8.205683,17.437698,23.153694,12.400712,-0.097663,2.559423,15.213674,7.370518
3,2000-05-01,4.0,171.200,5.990526,13149.18201,-0.181797,-1.572223,,-0.148357,5.395746,8.484832,-1.296597,7.374072,20.863985,5.169310,5.638019,-8.163265
4,2000-06-01,4.0,172.200,6.440455,13189.36127,0.305565,1.728613,,10.549735,5.788826,14.239888,31.641749,3.078671,2.813690,8.474599,8.076012,13.131313
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
272,2022-10-01,3.7,297.987,3.519048,20127.40070,0.280228,8.571722,9.082759,7.239243,8.974528,6.494844,11.980826,18.457777,9.244201,6.735944,6.375682,-2.707956
273,2022-11-01,3.6,298.598,3.983500,20222.02655,0.470134,5.559164,11.215906,15.575584,4.416367,2.316498,2.482801,8.814239,14.479974,10.390734,7.690670,-53.665066
274,2022-12-01,3.5,298.990,3.891000,20204.80519,-0.085161,-6.193593,0.266786,-0.250105,-10.376181,-0.117873,-1.137072,0.753719,8.619586,1.295221,3.090302,9.446071
275,2023-01-01,3.4,300.536,3.616190,20283.65522,0.390254,6.776820,-8.576205,-3.584079,0.972908,-7.489384,-5.928822,-2.549213,2.541749,-0.110227,-13.817335,16.968326


1. 1 month 

In [4]:
def baseline_performance(data, ticker, alpha, period):
    """
    arguements: DataFrame, Name of the Ticker, alpha (level of risk), period e.g. 12 (month)
    
    the function calculates the historical quantile and the tickloss for this historical quantile 
    for every month and averages it over the length of the hold out sample
    """
    cutoff = period
    sample_length = len(data)-cutoff
    hold_out = data.tail(cutoff)
    hold_out = hold_out.reset_index(drop=True)

    loss = 0

    for i in range(0, cutoff):
        sample = data[i:sample_length+i]
        var = get_historical_quantiles(sample, [str(ticker)], alpha)
        r = hold_out[str(ticker)][i]
        loss += tick_loss(alpha, r, var)

    loss = loss/period
    return loss

In [5]:
nvs_m_loss = baseline_performance(data, 'NVS', 0.05, 12)
print(f'NVS: {nvs_m_loss}')

abbv_m_loss = baseline_performance(data, 'ABBV', 0.05, 12)
print(f'ABBV: {abbv_m_loss}')

azn_m_loss = baseline_performance(data, 'AZN', 0.05, 12)
print(f'AZN: {azn_m_loss}')

bmy_m_loss = baseline_performance(data, 'BMY', 0.05, 12)
print(f'BMY: {bmy_m_loss}')

jnj_m_loss = baseline_performance(data, 'JNJ', 0.05, 12)
print(f'JNJ: {jnj_m_loss}')

lly_m_loss = baseline_performance(data, 'LLY', 0.05, 12)
print(f'LLY: {lly_m_loss}')

mrk_m_loss = baseline_performance(data, 'MRK', 0.05, 12)
print(f'MRK: {mrk_m_loss}')

nvo_m_loss = baseline_performance(data, 'NVO', 0.05, 12)
print(f'NVO: {nvo_m_loss}')

pfe_m_loss = baseline_performance(data, 'PFE', 0.05, 12)
print(f'PFE: {pfe_m_loss}')

rog_m_loss = baseline_performance(data, 'ROG', 0.05, 12)
print(f'ROG: {rog_m_loss}')

NVS: 0.4158943191553413
ABBV: 0.5866629369917314
AZN: 0.7253214925407804
BMY: 0.6198943173957252
JNJ: 0.5184315979380726
LLY: 0.6093893044352746
MRK: 0.7352374082006423
NVO: 0.588473192549298
PFE: 0.8987178577899045
ROG: 3.771761003369601


2. 3 months

In [6]:
def baseline_performance_3m(data, ticker, alpha):
    
    loss = 0

    for i in range(0, 9):
        hold_out = data.tail(12 - i)
        hold_out = hold_out.reset_index(drop=True)
        sample_length = len(data) - 12
        sample = data[i:sample_length + i + 1]
        hq = sample[str(ticker)].quantile(alpha)
        var = pd.Series([hq, hq, hq])
        r = hold_out[str(ticker)][0:3]
        tickloss = tick_loss(alpha, r, var)
    
        loss += tickloss
        
    loss = loss/8
    return loss

In [7]:
tickers = ['NVS', 'ABBV', 'AZN', 'BMY', 'JNJ', 'LLY', 'MRK', 'NVO', 'PFE', 'ROG']

for t in tickers:
    loss = baseline_performance_3m(data, t, 0.05)
    print(f'{t}: {loss}')

NVS: 1.4386873878432633
ABBV: 1.9780135305358089
AZN: 2.5853149588753355
BMY: 2.145963401966395
JNJ: 1.7295556590632721
LLY: 2.1029112887456303
MRK: 2.56292266384592
NVO: 1.9183361224603632
PFE: 2.462487251016617
ROG: 15.660599515512764


In [12]:
def baseline_performance_6m(data, ticker, alpha):
    
    loss = 0

    for i in range(0, 6):
        hold_out = data.tail(12 - i)
        hold_out = hold_out.reset_index(drop=True)
        sample_length = len(data) - 12
        sample = data[i:sample_length + i + 1]
        hq = sample[str(ticker)].quantile(alpha)
        var = pd.Series([hq]*6)
        r = hold_out[str(ticker)][0:6]
        tickloss = tick_loss(alpha, r, var)
    
        loss += tickloss
        
    loss = loss/5
    return loss

In [13]:
for t in tickers:
    loss = baseline_performance_6m(data, t, 0.05)
    print(f'{t}: {loss}')

NVS: 2.7313697124859315
ABBV: 3.9906687812865007
AZN: 5.7250053470248705
BMY: 4.406008748077153
JNJ: 3.8952356899441716
LLY: 4.285005451267841
MRK: 5.169917011030264
NVO: 3.6767918608413614
PFE: 4.775729014942063
ROG: 26.300419052696164


In [14]:
def baseline_performance_9m(data, ticker, alpha):
    
    loss = 0

    for i in range(0, 3):
        hold_out = data.tail(12 - i)
        hold_out = hold_out.reset_index(drop=True)
        sample_length = len(data) - 12
        sample = data[i:sample_length + i + 1]
        hq = sample[str(ticker)].quantile(alpha)
        var = pd.Series([hq]*9)
        r = hold_out[str(ticker)][0:9]
        tickloss = tick_loss(alpha, r, var)
    
        loss += tickloss
        
    loss = loss/2
    return loss

In [15]:
for t in tickers:
    loss = baseline_performance_9m(data, t, 0.05)
    print(f'{t}: {loss}')

NVS: 5.777242178164185
ABBV: 7.9544151377188355
AZN: 10.297687882244215
BMY: 8.647649020585185
JNJ: 6.935428395795205
LLY: 8.48231776210246
MRK: 10.259902957409135
NVO: 7.673344489841452
PFE: 9.876630635332761
ROG: 62.69013942523135


5. 12 months

In [10]:
hold_out = data.tail(12)
hold_out = hold_out.reset_index(drop=True)
sample_length = len(data) - 12
sample = data.head(sample_length)

In [11]:
for t in tickers:
    hq = sample[t].quantile(0.05)
    var = pd.Series([hq]*12)
    r = hold_out[t]
    loss = tick_loss(0.05, r, var)
    print(f'{t}: {loss}')

NVS: 5.274738256797579
ABBV: 7.108988010023979
AZN: 8.276013374178527
BMY: 7.669541652151122
JNJ: 5.859500317762217
LLY: 7.0805237481555805
MRK: 8.84731237179337
NVO: 7.066533880117722
PFE: 10.228839472052988
ROG: 45.24386835802569
