### Initial package imports

In [None]:
import pandas as pd
import scipy.stats as st
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import minimize
from typing import Dict, List, Union

## Initial files being read in and dataset creation

In [None]:
pd.set_option('display.float_format', '{:.6f}'.format)
# Read the data
main_df = pd.read_csv('Data/Cleaned_Indices_Assignment1.csv', sep=';')

# Read the interest rate data
interest_rate_bond_df = pd.read_csv('Data/ECB_Data_10yr_Treasury_bond.csv', sep=',')

# Convert date columns to datetime format for proper merging
main_df['Date'] = pd.to_datetime(main_df['Date'], format='%d-%m-%Y')
interest_rate_bond_df['Date'] = pd.to_datetime(interest_rate_bond_df['Date'], format='%Y-%m-%d')

# Merge the dataframes on the Date column
main_df = pd.merge(main_df, interest_rate_bond_df, on='Date', how='left')

# Remove rows where the bond does not have a yield curve spot rate (Market closed?)
main_df = main_df.dropna(axis=0, subset=['Yield curve spot rate, 10-year maturity - Government bond'])

# Filter the dataframe to start from 2012-01-04
main_df = main_df[main_df['Date'] >= '2012-01-04']

# Set Date as index
main_df = main_df.set_index('Date')
main_df = main_df.sort_index() # Ensure chronological order

### government bond column

In [None]:
# Add a column for the interest bond value per day
days_per_annum = 365
interest_bond = 1500000

# Initialize the arrays with appropriate lengths matching the DataFrame
interest_bond_vector = np.zeros(len(main_df))
interest_bond_profit_vector = np.zeros(len(main_df))
interest_bond_loss_vector = np.zeros(len(main_df))
daily_rates = np.zeros(len(main_df))

# Set initial value
interest_bond_vector[0] = interest_bond

# Calculate bond values day by day based on the daily yield rate
for i in range(len(main_df)):
    # Adding 1.5% to account for the credit risk spread
    daily_rate = (((main_df['Yield curve spot rate, 10-year maturity - Government bond'].iloc[i] + 1.5) / (days_per_annum)) * (7/5)) / 100
    daily_rates[i] = daily_rate
    
    if i > 0:
        previous_value = interest_bond_vector[i-1]
        current_value = previous_value * (1 + daily_rate)
        interest_bond_vector[i] = current_value
        
        # Calculate change, profit/loss and return
        change = current_value - previous_value
        interest_bond_profit_vector[i] = change
        interest_bond_loss_vector[i] = -change

# Add vectors to the dataframe
main_df['Interest_Bond'] = interest_bond_vector
main_df['Interest_Bond_Profit'] = interest_bond_profit_vector
main_df['Interest_Bond_Loss'] = interest_bond_loss_vector
main_df['Interest_Bond_daily_rate'] = daily_rates

## Portfolio details

### details

#### Instruments:
- **S&P500**
- **DAX40**
- **NIKKEI**
- **EU Government Bond (10-year maturity, AAA-rated)**

#### Invested amount:
- **10,000,000 EURO**

#### Period:
- **01/01/2012 - 31/12/2022**

#### Weights:
- **S&P500**: 0.4  
- **DAX40**: 0.3  
- **NIKKEI**: 0.15  
- **EU Government Bond**: 0.15  

#### Measures:
- **Value at Risk (VaR)**: 1, 5, 10 days  
- **Expected Shortfall (ES)**  



### weights and currency correction


In [None]:
# initial investment 
weights_dict = {
    'S&P500': 0.4,
    'DAX40': 0.3,
    'NIKKEI': 0.15,
    'EU-BOND': 0.15,
}
weights_array = np.array([weights_dict['S&P500'], weights_dict['DAX40'], weights_dict['NIKKEI'], weights_dict['EU-BOND']])

starting_investment = 10000000  # 10 million euros
starting_date = pd.to_datetime('2012-01-04')

# Get the starting row using the index
starting_row = main_df.loc[starting_date]

# Extract the exchange rates for the starting date
usd_to_eur = float(starting_row['USD/EUR'])
jpy_to_eur = float(starting_row['JPY/EUR'])

# Calculate the invested amounts
invested_amount_SP500 = starting_investment * weights_dict['S&P500'] / usd_to_eur
invested_amount_DAX40 = starting_investment * weights_dict['DAX40']
invested_amount_NIKKEI = starting_investment * weights_dict['NIKKEI'] / jpy_to_eur
invested_amount_EU_BOND = starting_investment * weights_dict['EU-BOND']

invested_amounts = [
    invested_amount_SP500, #in USD
    invested_amount_DAX40, #in EUR
    invested_amount_NIKKEI, #in JPY
    invested_amount_EU_BOND #in EUR
]

print(invested_amounts)

### Returns Portfolio 

In [None]:
# Create columns to track investments for each asset
# Initialize the first day with the initial invested amounts
main_df.loc[starting_date, 'SP500_Investment'] = invested_amount_SP500
main_df.loc[starting_date, 'DAX40_Investment'] = invested_amount_DAX40
main_df.loc[starting_date, 'NIKKEI_Investment'] = invested_amount_NIKKEI
main_df.loc[starting_date, 'EU_BOND_Investment'] = invested_amount_EU_BOND

# Calculate daily investment values for subsequent days
# This uses cumulative returns to track the value growth
for i in range(1, len(main_df)):
    current_date = main_df.index[i]
    prev_date = main_df.index[i-1]
    # S&P 500 in USD
    main_df.loc[current_date, 'SP500_Investment'] = main_df.loc[prev_date, 'SP500_Investment'] * (1 + main_df.loc[current_date, 'C_S&P500_Returns'])
    
    # DAX 40 in EUR
    main_df.loc[current_date, 'DAX40_Investment'] = main_df.loc[prev_date, 'DAX40_Investment'] * (1 + main_df.loc[current_date, 'C_Dax40_Returns'])
    
    # NIKKEI in JPY
    main_df.loc[current_date, 'NIKKEI_Investment'] = main_df.loc[prev_date, 'NIKKEI_Investment'] * (1 + main_df.loc[current_date, 'C_Nikkei_Returns'])
    
# EU Government Bond value is already calculated in the Interest_Bond column
main_df['EU_BOND_Investment'] = main_df['Interest_Bond']

# Methods input values 

### Portfolio change Daily

In [None]:
# Calculate total portfolio value in EUR for each day
main_df['Portfolio_Value_EUR'] = (
    main_df['SP500_Investment'].fillna(0) * main_df['USD/EUR'] +
    main_df['DAX40_Investment'].fillna(0) +
    main_df['NIKKEI_Investment'].fillna(0) * main_df['JPY/EUR'] +
    main_df['EU_BOND_Investment'].fillna(0)
)

# First day should be the initial investment amount
main_df.loc[starting_date, 'Portfolio_Value_EUR'] = starting_investment

# Calculate the daily change in portfolio value (profit/loss)
main_df['Portfolio_Change_EUR'] = main_df['Portfolio_Value_EUR'].diff()
main_df.loc[starting_date, 'Portfolio_Change_EUR'] = 0.0  # Set the first day's change to 0

# Portfolio loss is the negative of the daily change
main_df['Portfolio_loss'] = -main_df['Portfolio_Change_EUR']

# Set the first day's loss to 0 (there's no previous day to compare with)
main_df.loc[starting_date, 'Portfolio_loss'] = 0.0

# Calculate portfolio daily returns (used later)
main_df['Portfolio_Daily_Returns'] = main_df['Portfolio_Value_EUR'].pct_change()
main_df.loc[starting_date, 'Portfolio_Daily_Returns'] = 0.0

# Display the relevant columns to verify
display(main_df[['SP500_Investment', 'DAX40_Investment', 'NIKKEI_Investment', 
                'EU_BOND_Investment', 'USD/EUR', 'JPY/EUR', 'Portfolio_Value_EUR', 
                'Portfolio_Change_EUR', 'Portfolio_loss', 'Portfolio_Daily_Returns']].head(20))

In [None]:
loss_values = main_df['Portfolio_loss'].values

# Calculate and print the minimum, maximum, and mean of portfolio loss values
min_loss = np.nanmin(loss_values)
max_loss = np.nanmax(loss_values)
mean_loss = np.nanmean(loss_values)

print(f"Portfolio Loss Statistics:")
print(f"Minimum Loss: {min_loss:.4f}")
print(f"Maximum Loss: {max_loss:.4f}")
print(f"Mean Loss: {mean_loss:.4f}")

# Also print the number of valid loss values (non-NaN)
valid_count = np.sum(~np.isnan(loss_values))
print(f"Number of valid loss values: {valid_count} out of {len(loss_values)}")

In [None]:
def calculate_daily_loss_variables(time_window, current_date):
    # Calculate the mean and standard deviation of portfolio loss from the time windows
    loss_dict = {
        "Date": current_date,
        "Portfolio_mean_loss": np.nanmean(time_window['Portfolio_loss']),
        "Portfolio_std_loss": np.nanstd(time_window['Portfolio_loss'])
    }
    return loss_dict

### Portfolio variance

## Value at Risk (VaR)

In [None]:
def VaR(alpha, r= 0, s= 1, df= 0):
    """
    Get the VaR of the normal or student-t model.
    Assumes VaR is for LOSSES (positive value).
    """
    if (df == 0):
        # Normal distribution: VaR = mu + sigma * Z_alpha
        # Since we model losses, VaR = E[Loss] + std(Loss) * Z_alpha
        dVaR0 = st.norm.ppf(alpha)
        dVaR = r + s*dVaR0
    else:
        # Student-t distribution
        dVaR0 = st.t.ppf(alpha, df= df)
        # Scale factor to match volatility
        dS2t = df/(df-2) # Variance of standard t-distribution
        c = s / np.sqrt(dS2t)
        dVaR = r + c*dVaR0
    return dVaR

## Expected Shortfall (ES) 

In [None]:
def ES(alpha, r= 0, s= 1, df= 0):
    """
    Get the ES of the normal/student model for LOSSES.
    """
    if (df == 0):
        # Normal distribution: ES = mu + sigma * pdf(Z_alpha) / (1-alpha)
        dVaR0 = st.norm.ppf(alpha)
        dES0 = st.norm.pdf(dVaR0) / (1-alpha)
        dES = r + s*dES0
    else:
        # Student-t distribution
        dVaR0 = st.t.ppf(alpha, df= df)
        # ES formula for t-distribution
        dES0 = st.t.pdf(dVaR0, df= df)*((df + dVaR0**2)/(df-1)) / (1-alpha)
        # Scale factor
        dS2t = df/(df-2)
        c = s / np.sqrt(dS2t)
        dES = r + c*dES0
    return dES

# performing different methods

write method for variance covariance where the sample period is an input parameter alongside other parameters that are needed for the calculation

## 1. var/cov multivar normal dist & T-distribution & Historical

Functions to calculate components of Var/cov method and Historical method.

### Main method

In [None]:
def calculate_var_cov(window, current_date, vAlpha, mean_loss, portfolio_std_loss, df=0):
    """
    Calculate Value at Risk and ES using variance-covariance method.
    Returns VaR/ES for LOSSES.
    """
    var_results = []
    es_results = []
    for alpha in vAlpha:
        var_results.append(VaR(alpha, mean_loss, portfolio_std_loss, df=df))
        es_results.append(ES(alpha, mean_loss, portfolio_std_loss, df=df))
    
    # Set label for distribution type
    if df == 0:
        dist_label = "Normal"
    else:
        dist_label = f"T{df}"
        
    return {
        'Date': current_date,
        f'VaR {dist_label}': np.array(var_results),
        f'ES {dist_label}': np.array(es_results)
    }

def calculate_historical_var_es(window, current_date, vAlpha):
    """
    Calculate VaR and ES using historical simulation method for LOSSES.
    """
    # Extract portfolio loss values from the window
    historical_losses = window['Portfolio_loss'].dropna()
    
    # Sort losses in ascending order (higher losses are larger positive numbers)
    sorted_losses = np.sort(historical_losses)
    
    # Calculate VaR for alpha levels
    var_hist = np.percentile(sorted_losses, vAlpha * 100)
    
    # Calculate ES for each alpha level
    es_hist = []
    for i, alpha in enumerate(vAlpha):
        # ES is the mean of losses greater than or equal to VaR
        es_val = sorted_losses[sorted_losses >= var_hist[i]].mean()
        es_hist.append(es_val)
    
    return {
        'Date': current_date,
        'VaR Historical': np.array(var_hist),
        'ES Historical': np.array(es_hist)
    }

def calculate_multiday_risk(main_df_indexed, vAlpha, interval, sample_size):
    """
    Calculate multi-day VaR and ES using the historical simulation method.
    Returns VaR/ES for LOSSES.
    
    Parameters:
    - main_df_indexed: DataFrame with DatetimeIndex
    - vAlpha: Confidence levels (array)
    - interval: Number of days for the multi-day calculation (e.g., 5 or 10)
    - sample_size: Rolling window size for daily VaR calculation (used for sqrt rule)
    
    Returns:
    - var_multi_df, es_multi_df: DataFrames with multi-day VaR and ES
    """
    # Filter data for the period we want to analyze (excluding initial sample)
    analysis_start_date = main_df_indexed.index[sample_size]
    time_window_multi = main_df_indexed[main_df_indexed.index >= analysis_start_date].copy()
    
    # Calculate rolling sum of losses over the interval
    time_window_multi[f'Portfolio_loss_{interval}d'] = time_window_multi['Portfolio_loss'].rolling(window=interval).sum()
    
    # Drop NaNs created by rolling sum
    multi_day_losses_df = time_window_multi.dropna(subset=[f'Portfolio_loss_{interval}d'])
    
    # --- Historical Multi-Day VaR/ES (Regular Method) ---
    var_reg_list = []
    es_reg_list = []
    # Use expanding window for multi-day historical simulation
    for i in range(1, len(multi_day_losses_df) + 1):
        current_losses = multi_day_losses_df[f'Portfolio_loss_{interval}d'][:i]
        sorted_losses = np.sort(current_losses)
        var_vals = np.percentile(sorted_losses, vAlpha * 100)
        es_vals = []
        for j, alpha in enumerate(vAlpha):
            es_val = sorted_losses[sorted_losses >= var_vals[j]].mean()
            es_vals.append(es_val)
        var_reg_list.append(var_vals)
        es_reg_list.append(es_vals)
        
    multi_day_losses_df[f'VaR_{interval}d_Hist_Reg'] = var_reg_list
    multi_day_losses_df[f'ES_{interval}d_Hist_Reg'] = es_reg_list

    # --- Historical Multi-Day VaR/ES (Sqrt Rule) ---
    # Calculate rolling 1-day historical VaR/ES first
    daily_var_hist = []
    daily_es_hist = []
    for i in range(sample_size, len(main_df_indexed)):
        window = main_df_indexed.iloc[i - sample_size:i]
        hist_res = calculate_historical_var_es(window, main_df_indexed.index[i], vAlpha)
        daily_var_hist.append(hist_res['VaR Historical'])
        daily_es_hist.append(hist_res['ES Historical'])
        
    daily_risk_df = pd.DataFrame({
        'VaR_1d_Hist': daily_var_hist,
        'ES_1d_Hist': daily_es_hist
    }, index=main_df_indexed.index[sample_size:])

    # Merge daily risk with multi-day losses df
    multi_day_losses_df = multi_day_losses_df.merge(daily_risk_df, left_index=True, right_index=True, how='left')

    # Apply sqrt rule
    multi_day_losses_df[f'VaR_{interval}d_Hist_Sqrt'] = multi_day_losses_df['VaR_1d_Hist'] * np.sqrt(interval)
    multi_day_losses_df[f'ES_{interval}d_Hist_Sqrt'] = multi_day_losses_df['ES_1d_Hist'] * np.sqrt(interval)

    # Prepare output DataFrames
    var_cols = [f'VaR_{interval}d_Hist_Reg', f'VaR_{interval}d_Hist_Sqrt']
    es_cols = [f'ES_{interval}d_Hist_Reg', f'ES_{interval}d_Hist_Sqrt']
    
    var_multi_df = multi_day_losses_df[var_cols].copy()
    es_multi_df = multi_day_losses_df[es_cols].copy()
    # Add the actual loss column for plotting/backtesting
    var_multi_df[f'Actual_Loss_{interval}d'] = multi_day_losses_df[f'Portfolio_loss_{interval}d']
    es_multi_df[f'Actual_Loss_{interval}d'] = multi_day_losses_df[f'Portfolio_loss_{interval}d']

    return var_multi_df, es_multi_df

In [None]:
def main():
    # Initialize lists to store results
    VaR_results = []
    ES_results = []
    # Define time window
    time_window = main_df[(main_df['Date'] >= '2012-01-05') & (main_df['Date'] <= '2021-12-31')]

    # Define confidence levels
    vAlpha = np.array([0.95, 0.99])
    
    # Define sample size and t-distribution degrees of freedom
    sample_size = 500
    degrees_of_freedom = [0, 3, 4, 5, 6]  # 0 represents normal distribution
    
    for i in range(sample_size, len(time_window)):
        # Extract the rolling window
        window = time_window.iloc[i - sample_size:i]
        current_date = time_window.iloc[i]['Date']
        
        # Calculate loss statistics
        loss_stats = calculate_daily_loss_variables(window, current_date)
        mean_loss = loss_stats["Portfolio_mean_loss"]
        portfolio_std_loss = loss_stats["Portfolio_std_loss"]
        
        # Initialize result dictionaries for this date
        var_row = {'Date': current_date}
        es_row = {'Date': current_date}

        
        # Calculate VaR and ES using various distributions
        for df in degrees_of_freedom:
            results = calculate_var_cov(window, current_date, vAlpha, mean_loss, portfolio_std_loss, df)
            
            # Get the distribution label
            if df == 0:
                dist_label = "Normal"
            else:
                dist_label = f"T{df}"
                
            # Add results to the dictionaries
            var_row[f'VaR {dist_label}'] = results[f'VaR {dist_label}']
            es_row[f'ES {dist_label}'] = results[f'ES {dist_label}']
        
        # Calculate VaR and ES using historical simulation
        hist_results = calculate_historical_var_es(window, current_date, vAlpha)
        var_row['VaR Historical'] = hist_results['VaR Historical']
        es_row['ES Historical'] = hist_results['ES Historical']
        
        # Add results for this date
        VaR_results.append(var_row)
        ES_results.append(es_row)

    # Convert results to DataFrame
    var_results_df = pd.DataFrame(VaR_results)
    es_results_df = pd.DataFrame(ES_results)

    var_5d = calculate_multiday_var(vAlpha, 5, sample_size)
    var_10d = calculate_multiday_var(vAlpha, 10, sample_size)

    return var_results_df, es_results_df, var_5d, var_10d

In [None]:
var_results_df, es_results_df, var_5d, var_10d = main()
print("VaR results")
display(var_results_df.head())
print("ES results")
display(es_results_df.head())
print("5-day VaR results")
display(var_5d.head())
print("10-day VaR results")
display(var_10d.head())

In [None]:
# Create a plot for 5-day and 10-day VaR over time
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 12), sharex=False)

# ------- 5-day VaR Plot ------- 
# Extract regular and sqrt VaR values for 95% and 99% levels
var5d_reg_95 = [row[0] for row in var_5d['VaR_5d_reg']]
var5d_reg_99 = [row[1] for row in var_5d['VaR_5d_reg']]
var5d_sqrt_95 = [row[0] for row in var_5d['VaR_5d_sqrt']]
var5d_sqrt_99 = [row[1] for row in var_5d['VaR_5d_sqrt']]

# Plot actual portfolio losses
ax1.plot(var_5d['Date'], var_5d['Portfolio_loss'], 'k-', alpha=0.3, label='Actual 5-day Loss')

# Plot regular and sqrt VaR values
ax1.plot(var_5d['Date'], var5d_reg_95, 'b-', label='5-day VaR 95% (Regular)')
ax1.plot(var_5d['Date'], var5d_reg_99, 'r-', label='5-day VaR 99% (Regular)')
ax1.plot(var_5d['Date'], var5d_sqrt_95, 'b--', label='5-day VaR 95% (Sqrt Rule)')
ax1.plot(var_5d['Date'], var5d_sqrt_99, 'r--', label='5-day VaR 99% (Sqrt Rule)')

# Customize 5-day plot
ax1.set_title('5-Day Value at Risk Over Time', fontsize=14)
ax1.set_ylabel('Value (EUR)', fontsize=12)
ax1.legend(loc='upper left')
ax1.grid(True, alpha=0.3)

# ------- 10-day VaR Plot ------- 
# Extract regular and sqrt VaR values for 95% and 99% levels
var10d_reg_95 = [row[0] for row in var_10d['VaR_10d_reg']]
var10d_reg_99 = [row[1] for row in var_10d['VaR_10d_reg']]
var10d_sqrt_95 = [row[0] for row in var_10d['VaR_10d_sqrt']]
var10d_sqrt_99 = [row[1] for row in var_10d['VaR_10d_sqrt']]

# Plot actual portfolio losses
ax2.plot(var_10d['Date'], var_10d['Portfolio_loss'], 'k-', alpha=0.3, label='Actual 10-day Loss')

# Plot regular and sqrt VaR values
ax2.plot(var_10d['Date'], var10d_reg_95, 'b-', label='10-day VaR 95% (Regular)')
ax2.plot(var_10d['Date'], var10d_reg_99, 'r-', label='10-day VaR 99% (Regular)')
ax2.plot(var_10d['Date'], var10d_sqrt_95, 'b--', label='10-day VaR 95% (Sqrt Rule)')
ax2.plot(var_10d['Date'], var10d_sqrt_99, 'r--', label='10-day VaR 99% (Sqrt Rule)')

# Customize 10-day plot
ax2.set_title('10-Day Value at Risk Over Time', fontsize=14)
ax2.set_xlabel('Date', fontsize=12)
ax2.set_ylabel('Value (EUR)', fontsize=12)
ax2.legend(loc='upper left')
ax2.grid(True, alpha=0.3)

# Adjust layout to prevent clipping of labels
plt.tight_layout()
plt.show()

# Create a plot to compare the violations between 5-day and 10-day VaR
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10), sharex=False)

# ----- 5-day VaR Violations -----
# Calculate violations (when actual loss exceeds VaR)
violations_5d_reg_95 = var_5d['Portfolio_loss'] > np.array(var5d_reg_95)
violations_5d_reg_99 = var_5d['Portfolio_loss'] > np.array(var5d_reg_99)
violations_5d_sqrt_95 = var_5d['Portfolio_loss'] > np.array(var5d_sqrt_95)
violations_5d_sqrt_99 = var_5d['Portfolio_loss'] > np.array(var5d_sqrt_99)

# Plot violations
ax1.scatter(var_5d['Date'][violations_5d_reg_95], [1]*sum(violations_5d_reg_95), color='blue', marker='o', label='95% Regular')
ax1.scatter(var_5d['Date'][violations_5d_reg_99], [2]*sum(violations_5d_reg_99), color='red', marker='o', label='99% Regular')
ax1.scatter(var_5d['Date'][violations_5d_sqrt_95], [3]*sum(violations_5d_sqrt_95), color='blue', marker='x', label='95% Sqrt Rule')
ax1.scatter(var_5d['Date'][violations_5d_sqrt_99], [4]*sum(violations_5d_sqrt_99), color='red', marker='x', label='99% Sqrt Rule')

# Add violation counts to the legend
ax1.set_title(f'5-Day VaR Violations\n' + 
             f'95% Regular: {sum(violations_5d_reg_95)} ({sum(violations_5d_reg_95)/len(var_5d)*100:.2f}%), ' + 
             f'99% Regular: {sum(violations_5d_reg_99)} ({sum(violations_5d_reg_99)/len(var_5d)*100:.2f}%)\n' + 
             f'95% Sqrt: {sum(violations_5d_sqrt_95)} ({sum(violations_5d_sqrt_95)/len(var_5d)*100:.2f}%), ' + 
             f'99% Sqrt: {sum(violations_5d_sqrt_99)} ({sum(violations_5d_sqrt_99)/len(var_5d)*100:.2f}%)', 
             fontsize=12)
             
ax1.set_yticks([1, 2, 3, 4])
ax1.set_yticklabels(['95% Reg', '99% Reg', '95% Sqrt', '99% Sqrt'])
ax1.legend(loc='upper right')
ax1.grid(True, axis='y', alpha=0.3)

# ----- 10-day VaR Violations -----
# Calculate violations (when actual loss exceeds VaR)
violations_10d_reg_95 = var_10d['Portfolio_loss'] > np.array(var10d_reg_95)
violations_10d_reg_99 = var_10d['Portfolio_loss'] > np.array(var10d_reg_99)
violations_10d_sqrt_95 = var_10d['Portfolio_loss'] > np.array(var10d_sqrt_95)
violations_10d_sqrt_99 = var_10d['Portfolio_loss'] > np.array(var10d_sqrt_99)

# Plot violations
ax2.scatter(var_10d['Date'][violations_10d_reg_95], [1]*sum(violations_10d_reg_95), color='blue', marker='o', label='95% Regular')
ax2.scatter(var_10d['Date'][violations_10d_reg_99], [2]*sum(violations_10d_reg_99), color='red', marker='o', label='99% Regular')
ax2.scatter(var_10d['Date'][violations_10d_sqrt_95], [3]*sum(violations_10d_sqrt_95), color='blue', marker='x', label='95% Sqrt Rule')
ax2.scatter(var_10d['Date'][violations_10d_sqrt_99], [4]*sum(violations_10d_sqrt_99), color='red', marker='x', label='99% Sqrt Rule')

# Add violation counts to the legend
ax2.set_title(f'10-Day VaR Violations\n' + 
             f'95% Regular: {sum(violations_10d_reg_95)} ({sum(violations_10d_reg_95)/len(var_10d)*100:.2f}%), ' + 
             f'99% Regular: {sum(violations_10d_reg_99)} ({sum(violations_10d_reg_99)/len(var_10d)*100:.2f}%)\n' + 
             f'95% Sqrt: {sum(violations_10d_sqrt_95)} ({sum(violations_10d_sqrt_95)/len(var_10d)*100:.2f}%), ' + 
             f'99% Sqrt: {sum(violations_10d_sqrt_99)} ({sum(violations_10d_sqrt_99)/len(var_10d)*100:.2f}%)', 
             fontsize=12)
             
ax2.set_yticks([1, 2, 3, 4])
ax2.set_yticklabels(['95% Reg', '99% Reg', '95% Sqrt', '99% Sqrt'])
ax2.set_xlabel('Date', fontsize=12)
ax2.legend(loc='upper right')
ax2.grid(True, axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Plot Historical VaR and ES over time
plt.figure(figsize=(12, 6))

# Extract the values for different confidence levels from the arrays
var_95 = [row[0] for row in var_results_df['VaR Historical']]
var_99 = [row[1] for row in var_results_df['VaR Historical']]
es_95 = [row[0] for row in es_results_df['ES Historical']]
es_99 = [row[1] for row in es_results_df['ES Historical']]

# Plot VaR Historical 95% and 99%
plt.plot(var_results_df['Date'], var_95, label='VaR Historical 95%', color='blue')
plt.plot(var_results_df['Date'], var_99, label='VaR Historical 99%', color='red')

# Plot ES Historical 95% and 99%
plt.plot(es_results_df['Date'], es_95, label='ES Historical 95%', color='green', linestyle='--')
plt.plot(es_results_df['Date'], es_99, label='ES Historical 99%', color='orange', linestyle='--')

# Add labels, title, and legend
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Historical VaR and ES Over Time')
plt.legend()
plt.grid(True)

# Show the plot
plt.tight_layout()
plt.show()

In [None]:
# Create subplots for all indices
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))

# S&P 500
sp500_returns = main_df['C_S&P500_Returns'].dropna()
mu_sp500 = sp500_returns.mean()
sigma_sp500 = sp500_returns.std()
x_sp500 = np.linspace(mu_sp500 - 4*sigma_sp500, mu_sp500 + 4*sigma_sp500, 100)
ax1.hist(sp500_returns, bins=500, density=True, alpha=0.3, color='grey', label='Histogram')
ax1.plot(x_sp500, st.norm.pdf(x_sp500, mu_sp500, sigma_sp500), 'r-', lw=2, label='Normal')
# Add t-distributions
for df in [3, 4, 5, 6]:
    s = sigma_sp500 / np.sqrt(df/(df-2))
    ax1.plot(x_sp500, st.t.pdf((x_sp500-mu_sp500)/s, df)/s, '--', lw=1, label=f't-dist (df={df})')
ax1.set_title('S&P500 Returns Distribution')
ax1.legend()
ax1.grid(True)

# DAX40
dax_returns = main_df['C_Dax40_Returns'].dropna()
mu_dax = dax_returns.mean()
sigma_dax = dax_returns.std()
x_dax = np.linspace(mu_dax - 4*sigma_dax, mu_dax + 4*sigma_dax, 100)
ax2.hist(dax_returns, bins=500, density=True, alpha=0.3, color='grey', label='Histogram')
ax2.plot(x_dax, st.norm.pdf(x_dax, mu_dax, sigma_dax), 'r-', lw=2, label='Normal')
# Add t-distributions
for df in [3, 4, 5, 6]:
    s = sigma_dax / np.sqrt(df/(df-2))
    ax2.plot(x_dax, st.t.pdf((x_dax-mu_dax)/s, df)/s, '--', lw=1, label=f't-dist (df={df})')
ax2.set_title('DAX40 Returns Distribution')
ax2.legend()
ax2.grid(True)

# NIKKEI
nikkei_returns = main_df['C_Nikkei_Returns'].dropna()
mu_nikkei = nikkei_returns.mean()
sigma_nikkei = nikkei_returns.std()
x_nikkei = np.linspace(mu_nikkei - 4*sigma_nikkei, mu_nikkei + 4*sigma_nikkei, 100)
ax3.hist(nikkei_returns, bins=500, density=True, alpha=0.3, color='grey', label='Histogram')
ax3.plot(x_nikkei, st.norm.pdf(x_nikkei, mu_nikkei, sigma_nikkei), 'r-', lw=2, label='Normal')
# Add t-distributions
for df in [3, 4, 5, 6]:
    s = sigma_nikkei / np.sqrt(df/(df-2))
    ax3.plot(x_nikkei, st.t.pdf((x_nikkei-mu_nikkei)/s, df)/s, '--', lw=1, label=f't-dist (df={df})')
ax3.set_title('NIKKEI Returns Distribution')
ax3.legend()
ax3.grid(True)

# EU Bond
bond_returns = main_df['Interest_Bond_daily_rate'].dropna()
mu_bond = bond_returns.mean()
sigma_bond = bond_returns.std()
x_bond = np.linspace(mu_bond - 4*sigma_bond, mu_bond + 4*sigma_bond, 100)
ax4.hist(bond_returns, bins=500, density=True, alpha=0.3, color='grey', label='Histogram')
ax4.plot(x_bond, st.norm.pdf(x_bond, mu_bond, sigma_bond), 'r-', lw=2, label='Normal')
# Add t-distributions
for df in [3, 4, 5, 6]:
    s = sigma_bond / np.sqrt(df/(df-2))
    ax4.plot(x_bond, st.t.pdf((x_bond-mu_bond)/s, df)/s, '--', lw=1, label=f't-dist (df={df})')
ax4.set_title('EU Bond Returns Distribution')
ax4.legend()
ax4.grid(True)

plt.tight_layout()
plt.show()

## EWMA & Filtered Historical Simulation (FHS)

In [None]:
import pandas as pd
import numpy as np
from typing import Dict, List, Union

def compute_ewma_volatility(
    returns: pd.DataFrame, 
    lambdas: List[float] = [0.94, 0.97]
) -> Dict[str, pd.DataFrame]:
    """
    Compute EWMA volatility for each risk factor using different lambda values.

    Parameters:
    ----------
    returns : pd.DataFrame
        DataFrame of returns (T x N), excluding Date column
    lambdas : List[float], optional
        List of smoothing factors, by default [0.94, 0.97]

    Returns:
    -------
    Dict[str, pd.DataFrame]
        Dictionary of DataFrames containing EWMA volatilities for each lambda
    """
    # Input validation
    if not isinstance(returns, pd.DataFrame):
        raise TypeError("returns must be a pandas DataFrame")
    
    # Remove Date column if present
    if 'Date' in returns.columns:
        returns = returns.drop('Date', axis=1)
    
    results = {}
    
    for lambda_ in lambdas:
        n_obs, n_assets = returns.shape
        ewma_vol = np.zeros((n_obs, n_assets))
        
        # Initialize first value with sample standard deviation
        ewma_vol[0] = returns.iloc[0].values.std()
        
        # Loop through time to apply EWMA formula
        for t in range(1, n_obs):
            ewma_vol[t] = np.sqrt(
                lambda_ * ewma_vol[t-1]**2 + 
                (1 - lambda_) * returns.iloc[t-1].values**2
            )
        
        # Store results in dictionary
        results[f'lambda_{lambda_}'] = pd.DataFrame(
            ewma_vol,
            index=returns.index,
            columns=returns.columns
        )
    
    return results

# Example usage
returns_df = main_df[['Date', 'C_S&P500_Returns', 'C_Dax40_Returns', 
                      'C_Nikkei_Returns', 'Interest_Bond_daily_rate']].dropna()
#set Date as index
returns_df.set_index('Date', inplace=True)
ewma_results = compute_ewma_volatility(returns_df)
print(ewma_results)

In [None]:
def filter_returns(returns: pd.DataFrame, ewma_vol: dict) -> dict:
    """
    Compute standardized (filtered) returns for each lambda value.

    Parameters:
    - returns: DataFrame of raw returns
    - ewma_vol: Dictionary containing DataFrames of EWMA volatilities for each lambda

    Returns:
    - filtered_returns: Dictionary of DataFrames with standardized returns for each lambda
    """
    filtered_returns = {}
    
    # Filter returns for each lambda value
    for lambda_key, vol_df in ewma_vol.items():
        filtered_returns[lambda_key] = returns / vol_df
    
    return filtered_returns

# Assuming returns_df and ewma_vol_df (from compute_ewma_volatility) are already defined
filtered_returns_dict = filter_returns(returns_df, ewma_results)

# Preview results for both lambda values
print("\nFiltered Returns (lambda = 0.94):")
print(filtered_returns_dict['lambda_0.94'].tail())
print("\nFiltered Returns (lambda = 0.97):")
print(filtered_returns_dict['lambda_0.97'].tail())


In [None]:
def filtered_historical_simulation_multivariate(filtered_returns_dict: dict, ewma_vol_dict: dict,
                                              n_simulations: int = 10000, random_seed: int = None, 
                                              weights: np.ndarray = None) -> dict:
    """
    Perform Filtered Historical Simulation for a multi-asset portfolio for different lambda values.

    Parameters:
    - filtered_returns_dict: Dictionary of DataFrames of standardized residuals for each lambda
    - ewma_vol_dict: Dictionary of DataFrames of EWMA volatility for each lambda
    - n_simulations: number of simulated return vectors
    - random_seed: for reproducibility
    - weights: portfolio weights (numpy array of shape [n_assets])

    Returns:
    - Dictionary containing simulated portfolio returns for each lambda value
    """
    if random_seed is not None:
        np.random.seed(random_seed)
    
    results = {}
    
    # Process each lambda value
    for lambda_key in filtered_returns_dict.keys():
        filtered_returns = filtered_returns_dict[lambda_key]
        # print(filtered_returns)
        ewma_vol = ewma_vol_dict[lambda_key]
        
        assets = filtered_returns.columns
        print(f"Assets: {assets}")
        n_assets = len(assets)
        
        # Initialize simulated return matrix (n_simulations x n_assets)
        sim_returns = np.zeros((n_simulations, n_assets))

        for i, asset in enumerate(assets):
            if asset == 'Date':
                continue
            z_asset = filtered_returns[asset].dropna().values
            print(f"z_asset: {z_asset}")
            z_star = np.random.choice(z_asset, size=n_simulations, replace=True)
            print(f"z_star: {z_star}")
            sigma_t = ewma_vol[asset].iloc[-1]  # latest volatility for asset
            sim_returns[:, i] = sigma_t * z_star  # re-scale

        if weights is not None:
            portfolio_simulated_returns = sim_returns @ weights
            results[lambda_key] = pd.Series(portfolio_simulated_returns, 
                                          name=f"Simulated_Portfolio_Returns_{lambda_key}")
        else:
            results[lambda_key] = pd.DataFrame(sim_returns, columns=assets)

    return results

# Define weights in the same order as your DataFrame columns
weights = np.array([0.4, 0.3, 0.15, 0.15])  # Example: S&P500, DAX, Nikkei, Bond rate

# Run multivariate FHS simulation for both lambda values
simulated_returns = filtered_historical_simulation_multivariate(
    filtered_returns_dict,
    ewma_results,
    n_simulations=10000,
    random_seed=42,
    weights=weights
)

# Compute VaR and ES for 95% and 99% for each lambda value
confidence_levels = [0.95, 0.99]
results = {}

for lambda_key, sim_returns in simulated_returns.items():
    for cl in confidence_levels:
        alpha = 1 - cl
        percentile = alpha * 100
        var = -np.percentile(sim_returns, percentile)
        es = -sim_returns[sim_returns <= -var].mean()
        results[f"{lambda_key}_VaR_{int(cl * 100)}"] = var
        results[f"{lambda_key}_ES_{int(cl * 100)}"] = es

# Print results
for lambda_key in simulated_returns.keys():
    print(f"\nResults for {lambda_key}:")
    for cl in confidence_levels:
        print(f"Portfolio 1-day VaR ({int(cl * 100)}%): {results[f'{lambda_key}_VaR_{int(cl * 100)}']:.5f}")
        print(f"Portfolio 1-day ES  ({int(cl * 100)}%): {results[f'{lambda_key}_ES_{int(cl * 100)}']:.5f}")



In [None]:
def filtered_historical_simulation_multiday(
    filtered_returns_dict: dict,
    ewma_vol_dict: dict,
    lambda_key: str,
    n_days: int = 1,
    n_simulations: int = 10000,
    random_seed: int = None,
    weights: np.ndarray = None
) -> pd.Series:
    """
    Simulate N-day portfolio returns using Filtered Historical Simulation.

    Returns:
    - Simulated N-day portfolio return series (n_simulations,)
    """
    if random_seed is not None:
        np.random.seed(random_seed)

    filtered_returns = filtered_returns_dict[lambda_key]
    ewma_vol = ewma_vol_dict[lambda_key]

    assets = filtered_returns.columns
    n_assets = len(assets)
    sim_returns = np.zeros((n_simulations, n_days, n_assets))

    for i, asset in enumerate(assets):
        z_asset = filtered_returns[asset].dropna().values
        sigma_t = ewma_vol[asset].iloc[-1]

        if len(z_asset) == 0 or np.isnan(sigma_t):
            raise ValueError(f"Cannot simulate for asset '{asset}': empty or invalid data.")

        for day in range(n_days):
            z_star = np.random.choice(z_asset, size=n_simulations, replace=True)
            sim_returns[:, day, i] = sigma_t * z_star

    # Combine all simulated daily returns into N-day portfolio PnL
    total_pnl = (sim_returns @ weights).sum(axis=1)

    return pd.Series(total_pnl, name=f"Simulated_{n_days}Day_Returns_{lambda_key}")


# Example usage with both lambda values
confidence_levels = [0.95, 0.99]
horizons = [1, 5, 10]
lambda_keys = ['lambda_0.94', 'lambda_0.97']

for lambda_key in lambda_keys:
    print(f"\n=== Results for {lambda_key} ===")
    for days in horizons:
        print(f"\n--- {days}-Day VaR & ES ---")
        sim_returns = filtered_historical_simulation_multiday(
            filtered_returns_dict,
            ewma_results,
            lambda_key,
            n_days=days,
            n_simulations=10000,
            random_seed=42,
            weights=weights
        )

        for cl in confidence_levels:
            alpha = 1 - cl
            percentile = alpha * 100
            var = -np.percentile(sim_returns, percentile)
            es = -sim_returns[sim_returns <= -var].mean()
            print(f"VaR ({int(cl*100)}%): {var:.5f} | ES ({int(cl*100)}%): {es:.5f}")


In [None]:
def rolling_fhs_multiday_var_es(
    returns_df: pd.DataFrame,
    weights: np.ndarray,
    window_size: int = 500,
    horizons: list = [1, 5, 10],
    confidence_levels: list = [0.95, 0.99],
    n_simulations: int = 1000,
    lambdas: list = [0.94, 0.97],
    random_seed: int = None
) -> tuple:
    """
    Rolling Filtered Historical Simulation for multi-day VaR & ES.
    """
    time_window = returns_df.loc['2012-01-05':'2021-12-31']

    var_results = []
    es_results = []

    for horizon in horizons:
        adjusted_window_size = int(window_size / horizon)

        for t in range(adjusted_window_size, len(time_window)):
            current_date = time_window.index[t]
            var_row = {'index': current_date}
            es_row = {'index': current_date}

            window = time_window.iloc[t - adjusted_window_size:t]


            try:
                # Compute EWMA vol
                ewma_results = compute_ewma_volatility(
                    window[['C_S&P500_Returns', 'C_Dax40_Returns', 'C_Nikkei_Returns', 'Interest_Bond_daily_rate']],
                    lambdas
                )
                # Filtered returns
                filtered_returns_dict = {}
                for lambda_key, vol_df in ewma_results.items():
                    safe_vol_df = vol_df.replace(0, np.nan).fillna(method='ffill')
                    filtered_returns = window[['C_S&P500_Returns', 'C_Dax40_Returns', 'C_Nikkei_Returns', 'Interest_Bond_daily_rate']] / safe_vol_df
                    filtered_returns_dict[lambda_key] = filtered_returns

                # Simulate for each lambda and confidence level
                for lambda_key in ewma_results.keys():
                    sim_returns = filtered_historical_simulation_multiday(
                        filtered_returns_dict,
                        ewma_results,
                        lambda_key,
                        n_days=horizon,
                        n_simulations=n_simulations,
                        weights=weights,
                        random_seed=random_seed
                    )

                    for cl in confidence_levels:
                        alpha = 1 - cl
                        var = np.percentile(sim_returns, 100 * alpha)
                        es = sim_returns[sim_returns <= var].mean()

                        var_key = f"VaR_{int(cl * 100)}_{lambda_key}_h{horizon}"
                        es_key = f"ES_{int(cl * 100)}_{lambda_key}_h{horizon}"

                        var_row[var_key] = var
                        es_row[es_key] = es

                    var_results.append(var_row)
                    es_results.append(es_row)

            except Exception as e:
                continue
    var_df = pd.DataFrame(var_results).set_index('index')
    es_df = pd.DataFrame(es_results).set_index('index')

    var_df.index.name = 'Date'
    es_df.index.name = 'Date'

    return var_df, es_df

In [None]:
import matplotlib.pyplot as plt

def plot_var_es_vs_actual_given_actuals(var_df, es_df, actual_returns_dict, lambdas, horizons, confidence_levels):
    """
    Plot VaR and ES against already computed actual portfolio returns.

    Parameters:
    - var_df: DataFrame of rolling VaR values
    - es_df: DataFrame of rolling ES values
    - actual_returns_dict: Dict of actual portfolio return Series keyed by horizon
    - lambdas: list of lambda values used (e.g. [0.94, 0.97])
    - horizons: list of horizon days (e.g. [1, 5, 10])
    - confidence_levels: list of confidence levels (e.g. [0.95, 0.99])
    """
    for h in horizons:
        actual_returns = actual_returns_dict[h].reindex(var_df.index)

        for lambda_ in lambdas:
            lambda_key = f"lambda_{lambda_}"

            for cl in confidence_levels:
                var_col = f"VaR_{int(cl * 100)}_{lambda_key}_h{h}"
                es_col = f"ES_{int(cl * 100)}_{lambda_key}_h{h}"

                plt.figure(figsize=(14, 5))
                plt.plot(actual_returns, label="Actual Portfolio Return", alpha=0.6)
                plt.plot(-var_df[var_col], label=f"VaR {int(cl * 100)}%", color='red', linestyle='--')
                plt.plot(-es_df[es_col], label=f"ES {int(cl * 100)}%", color='orange', linestyle=':')
                plt.title(f"{h}-Day VaR and ES vs Actual Returns | λ={lambda_} | CL={int(cl*100)}%")
                plt.axhline(0, color='gray', linestyle='-')
                plt.legend()
                plt.grid(True)
                plt.tight_layout()
                plt.show()


In [None]:
def compute_actual_portfolio_returns(returns_df, weights, horizons=[1, 5, 10]):
    actual_returns = {}
    weighted_returns = returns_df @ weights

    for h in horizons:
        actual_returns[h] = weighted_returns.rolling(window=h).sum().shift(-h + 1)
        actual_returns[h].name = f"Actual_{h}d"

    return actual_returns

actual_returns_dict = compute_actual_portfolio_returns(returns_df, weights)
returns_dfe = pd.DataFrame(actual_returns_dict)
display(returns_dfe)

In [None]:
# Calculate VaR and ES using rolling FHS
var_df, es_df = rolling_fhs_multiday_var_es(
    returns_df=returns_df,  # DataFrame containing returns
    weights=weights,        # Portfolio weights
    window_size=500,       # Base window size
    horizons=[1, 5, 10],   # Horizons for VaR calculation
    confidence_levels=[0.95, 0.99],  # Confidence levels
    n_simulations=1000,    # Number of simulations
    lambdas=[0.94, 0.97]   # EWMA lambda values
)

# Plot the results
plot_var_es_vs_actual_given_actuals(
    var_df=var_df,
    es_df=es_df,
    actual_returns_dict=actual_returns_dict,
    lambdas=[0.94, 0.97],
    horizons=[1, 5, 10],
    confidence_levels=[0.95, 0.99]
)


## GARCH(1,1) with constant conditional correlation 

In [None]:
from scipy.optimize import minimize

#maximum likelihood estimation of GARCH(1,1) parameters
def garch_likelihood(params, returns):
    omega, alpha, beta = params
    T = len(returns)
    var = np.zeros(T)
    var[0] = omega / (1 - alpha - beta)
    ll = 0
    for t in range(2, T):
        var[t] = omega + alpha * returns[t-1]**2 + beta * var[t-1]
        ll += 0.5 * (np.log(2 * np.pi) + np.log(var[t]) + returns[t]**2 / var[t])
    return ll


In [None]:
# GARCH(1,1) parameter estimation using MLE
# This function estimates the parameters of a GARCH(1,1) model using maximum likelihood estimation (MLE).
def parameter_estimation_GARCH(returns):
    # Run the optimization
    result = minimize(
        garch_likelihood,
        x0=[0.02, 0.13, 0.86],
        args=(returns,),
        method='SLSQP',
        bounds=[(1e-6, None), (0, 0.99), (0, 0.99)],
        constraints=[
            {'type': 'ineq', 'fun': lambda x: 0.999 - x[1] - x[2]}
        ],
        options={'disp': True}
    )

    # Return the optimization result
    return result
# result = parameter_estimation_GARCH(main_df['Portfolio_Daily_Returns'].dropna())
# Check the optimization result
# if result.success:
#     print(f"Optimized parameters: omega={result.x[0]}, alpha={result.x[1]}, beta={result.x[2]}")
# else:
#     print("Optimization failed:", result.message)


In [None]:
# calculate the GARCH(1,1) volatility for returns t
def garch_volatility(returns):
    param = [0.000002, 0.13, 0.86]
    param = type('obj', (object,), {'x': param})
    omega, alpha, beta = param.x
    T = len(returns)
    var = np.zeros(T)

    var[0]= param.x[0] / (1 - param.x[1] - param.x[2])
    for t in range(2, T):
        var[t] = omega + alpha * returns[t-1]**2 + beta * var[t-1]
    return np.sqrt(var)

In [None]:
# calculate correlation matrix of the 4 time series
# the method must take the returns as matrix of 4 vectors and return the correlation matrix
def correlation_matrix(returns):
    # Calculate the covariance matrix
    cov_matrix = np.cov(returns.T)
    
    # Calculate the standard deviations of each asset
    std_devs = np.sqrt(np.diag(cov_matrix))
    
    # Calculate the correlation matrix
    corr_matrix = cov_matrix / np.outer(std_devs, std_devs)
    
    return corr_matrix


In [None]:
# calculate the diagonal matrix with the garch volatility of the 4 time series
def diagonal_matrix(returns):
    # Initialize an empty dictionary to store volatilities for each column
    volatilities_dict = {}

    # Iterate through each column in the returns DataFrame
    for column in returns.columns:
        # Reset the index of the column to ensure numeric indexing
        column_returns = returns[column].dropna().reset_index(drop=True)
        
        # Calculate the GARCH(1,1) volatility for the column
        volatilities_dict[column] = garch_volatility(column_returns)
        
    # Combine the volatilities into a single array
    volatilities = np.array([vol[-1] for vol in volatilities_dict.values()])

    # Create a diagonal matrix with the volatilities
    #diag_matrix = np.diag(volatilities)
    
    return volatilities

In [None]:
# calculate the covariance matrix of the 4 time series
def covariance_matrix(returns, corr_matrix):
    # Calculate the GARCH(1,1) volatilities
    volatilities = diagonal_matrix(returns)
    
    # # Calculate the correlation matrix
    # corr_matrix = correlation_matrix(returns)
    
    # Calculate the covariance matrix
    cov_matrix = np.outer(volatilities, volatilities) * corr_matrix
    
    return cov_matrix

In [None]:
# method to calculate the portfolio variance and volatility
def portfolio_variance(weights, cov_matrix):
    # Calculate the portfolio variance
    port_variance = np.dot(weights.T, np.dot(cov_matrix, weights))
    
    # Calculate the portfolio volatility
    port_volatility = np.sqrt(port_variance)
    
    return port_variance, port_volatility

In [None]:
# method to calculate the VaR and ES of the portfolio using the normal distribution
def portfolio_VaR_ES(weights, cov_matrix, alpha=0.99):
    # Calculate the portfolio variance and volatility
    _, port_volatility = portfolio_variance(weights, cov_matrix)
    
    # Calculate the VaR using the normal distribution
    VaR = -port_volatility * st.norm.ppf(alpha)
    
    # Calculate the ES using the normal distribution
    ES = -port_volatility * (st.norm.pdf(st.norm.ppf(1 - alpha)) / (1 - alpha))
    
    return VaR, ES

In [None]:
def count_violations(returns, VaR, ES):
    # Ensure VaR and ES are scalar values (e.g., take the first element if they are lists)
    if isinstance(VaR, list):
        VaR = VaR[0]  # Use the first value in the list
    
    # Count the number of violations for VaR
    VaR_violations = np.sum(returns < VaR)
    
    # Calculate the relative violations
    VaR_violations = VaR_violations / len(returns)

    # count the average shortfall for the VaR violations
    ES_violations = returns[returns < VaR]
    ES_violation_av = np.mean(ES_violations) if len(ES_violations) > 0 else 0

    return VaR_violations, ES_violation_av

In [None]:
# main function to run the analysis
def main_analysis(time_window_size):
    # Initialize lists to store results
    portfolio_VaR_list = []
    portfolio_ES_list = []
    dates = []
    corr_matrix = correlation_matrix(main_df[['C_S&P500_Returns', 'C_Dax40_Returns', 'C_Nikkei_Returns', 'Interest_Bond_daily_rate']].dropna())
    # estimate the parameters per investment on the whole dataset
    
    # param = {}
    # for column in ['C_S&P500_Returns', 'C_Dax40_Returns', 'C_Nikkei_Returns', 'Interest_Bond_daily_rate']:
    #     param[column] = parameter_estimation_GARCH(main_df[column].dropna())
    # estimate the parameters for the portfolio on the whole dataset
    # returns = main_df['Portfolio_Daily_Returns'].dropna()
    # param = parameter_estimation_GARCH(returns)

    # Iterate through the dataset with a rolling time window
    for i in range(time_window_size, len(main_df)):
        # Extract the rolling time window
        window = main_df.iloc[i - time_window_size:i]
        current_date = main_df.iloc[i]['Date']
        
        # Calculate the returns for each asset in the window
        returns = window[['C_S&P500_Returns', 'C_Dax40_Returns', 'C_Nikkei_Returns', 'Interest_Bond_daily_rate']].dropna()

        # Define the weights for the portfolio
        weights = np.array([0.4, 0.3, 0.15, 0.15])

        # Calculate the covariance matrix
        cov_matrix = covariance_matrix(returns, corr_matrix)

        # Calculate the portfolio VaR and ES
        VaR, ES = portfolio_VaR_ES(weights, cov_matrix)

        # Append results
        portfolio_VaR_list.append(VaR)
        portfolio_ES_list.append(ES)
        dates.append(current_date)

    # Create a DataFrame to store the results
    # Calculate daily portfolio returns
    daily_portfolio_returns = main_df['Portfolio_Daily_Returns'][time_window_size:].reset_index(drop=True)

    # Create a DataFrame to store the results
    results_df = pd.DataFrame({
        'Date': dates,
        'Portfolio VaR': portfolio_VaR_list,
        'Portfolio ES': portfolio_ES_list,
        'Portfolio Daily Returns': daily_portfolio_returns
    })

    # Plot the results
    plt.figure(figsize=(12, 6))
    plt.plot(results_df['Date'], results_df['Portfolio VaR'], label='Portfolio VaR', color='red')
    plt.plot(results_df['Date'], results_df['Portfolio ES'], label='Portfolio ES', color='blue')
    plt.plot(main_df['Date'], main_df['Portfolio_Daily_Returns'], label='Portfolio Daily Returns', color='green', alpha=0.5)
    plt.title('Portfolio VaR, ES, and Daily Returns Over Time')
    plt.xlabel('Date')
    plt.ylabel('Value')
    plt.legend()
    plt.grid()
    plt.show()

    portfolio_returns = main_df['Portfolio_Daily_Returns'][time_window_size:].reset_index(drop=True)
    # count the amount of violations via the method count_violations
    VaR_violations, ES_violations = count_violations(portfolio_returns, results_df['Portfolio VaR'], results_df['Portfolio ES'])
    print(f"VaR Violations: {VaR_violations:.2%}")
    print(f"Mean return at violation: {ES_violations}")
    
    return results_df

time_window_size = 500

main_analysis(time_window_size)

In [None]:
# show only the column names and the first 3 rows of the main dataframe
print(main_df.columns)
print(main_df.head(3))


In [None]:
# checking assumption on taking mean = 0 
# Calculate portfolio daily returns
main_df['Portfolio_Daily_Returns'] = (
    weights['S&P500'] * main_df['C_S&P500_Returns'] +
    weights['DAX40'] * main_df['C_Dax40_Returns'] +
    weights['NIKKEI'] * main_df['C_Nikkei_Returns'] +
    weights['EU-BOND'] * main_df['Interest_Bond_daily_rate']
)

# make a graph of the portfolio daily returns
plt.figure(figsize=(12, 6))
plt.plot(main_df['Date'], main_df['Portfolio_Daily_Returns'], label='Portfolio Daily Returns', color='blue')
plt.title('Portfolio Daily Returns Over Time')
plt.xlabel('Date')
plt.ylabel('Daily Returns')
plt.legend()
plt.grid()
#plt.show()

# Calculate the mean of the returns for each stock over a subset of the data defined on the time range
# Define the sample size
sample_size = 500

# Initialize variables to store the mean returns for each stock for each time window
mean_sp500_returns_list = []
mean_dax40_returns_list = []
mean_nikkei_returns_list = []
mean_eu_bond_returns_list = []

# Loop through the data with a fixed sample size
for i in range(sample_size, len(main_df)):
    time_range_start = main_df['Date'].iloc[i - sample_size]
    time_range_end = main_df['Date'].iloc[i]
    time_range_df = main_df[(main_df['Date'] >= time_range_start) & (main_df['Date'] <= time_range_end)]
    
    # Calculate mean returns for each stock
    mean_sp500_returns = time_range_df['C_S&P500_Returns'].mean()
    mean_dax40_returns = time_range_df['C_Dax40_Returns'].mean()
    mean_nikkei_returns = time_range_df['C_Nikkei_Returns'].mean()
    mean_eu_bond_returns = time_range_df['Interest_Bond_daily_rate'].mean()
    
    # Append the results to the respective lists
    mean_sp500_returns_list.append({
        'Start Date': time_range_start,
        'End Date': time_range_end,
        'Mean S&P500 Returns': mean_sp500_returns
    })
    mean_dax40_returns_list.append({
        'Start Date': time_range_start,
        'End Date': time_range_end,
        'Mean DAX40 Returns': mean_dax40_returns
    })
    mean_nikkei_returns_list.append({
        'Start Date': time_range_start,
        'End Date': time_range_end,
        'Mean Nikkei Returns': mean_nikkei_returns
    })
    mean_eu_bond_returns_list.append({
        'Start Date': time_range_start,
        'End Date': time_range_end,
        'Mean EU Bond Returns': mean_eu_bond_returns
    })

# Convert the results to DataFrames for easier analysis
mean_sp500_returns_df = pd.DataFrame(mean_sp500_returns_list)
mean_dax40_returns_df = pd.DataFrame(mean_dax40_returns_list)
mean_nikkei_returns_df = pd.DataFrame(mean_nikkei_returns_list)
mean_eu_bond_returns_df = pd.DataFrame(mean_eu_bond_returns_list)

# Display the results
display(mean_sp500_returns_df.head())
display(mean_dax40_returns_df.head())
display(mean_nikkei_returns_df.head())
display(mean_eu_bond_returns_df.head())

# Plot the mean returns for each stock over time
plt.figure(figsize=(12, 6))
plt.plot(mean_sp500_returns_df['End Date'], mean_sp500_returns_df['Mean S&P500 Returns'], label='Mean S&P500 Returns', color='blue')
plt.plot(mean_dax40_returns_df['End Date'], mean_dax40_returns_df['Mean DAX40 Returns'], label='Mean DAX40 Returns', color='orange')
plt.plot(mean_nikkei_returns_df['End Date'], mean_nikkei_returns_df['Mean Nikkei Returns'], label='Mean Nikkei Returns', color='green')
plt.plot(mean_eu_bond_returns_df['End Date'], mean_eu_bond_returns_df['Mean EU Bond Returns'], label='Mean EU Bond Returns', color='red')
plt.title('Mean Returns Over Time for Each Stock')
plt.xlabel('End Date')
plt.ylabel('Mean Returns')
plt.legend()
plt.grid()
plt.show()

# Backtesting VaR and ES

In this section, we perform backtesting on the calculated Value at Risk (VaR) and Expected Shortfall (ES) measures. Backtesting helps assess the accuracy and reliability of the risk models.

We will:
1.  **Calculate Violations:** Identify the days where the actual portfolio loss exceeded the predicted VaR.
2.  **Compare Actual vs. Expected Violations (VaR):** Group violations by year and compare the observed number of violations against the number expected based on the confidence level (alpha).
3.  **Compare Actual Shortfall vs. Predicted ES (ES):** For the days a violation occurred, compare the average actual loss (shortfall) against the predicted ES, grouped by year.
4.  **Visualize Violations:** Plot the occurrences of violations over time to visually inspect for clustering or patterns.

In [None]:
def calculate_violations(actual_losses, var_predictions):
    """Checks for VaR violations."""
    return actual_losses > var_predictions

def backtest_var(violations, alpha, dates):
    """Compares actual vs. expected VaR violations yearly."""
    if not isinstance(violations, pd.Series):
        violations = pd.Series(violations, index=dates)
    elif violations.index.name != 'Date': # Ensure index is Date for grouping
         violations = violations.set_index(dates)
            
    violations_df = pd.DataFrame({'Violations': violations, 'Year': violations.index.year})
    yearly_violations = violations_df.groupby('Year')['Violations'].sum()
    yearly_counts = violations_df.groupby('Year')['Violations'].count()
    
    expected_violations = yearly_counts * (1 - alpha)
    
    summary = pd.DataFrame({
        'Actual Violations': yearly_violations,
        'Expected Violations': expected_violations,
        'Total Observations': yearly_counts
    })
    return summary

def backtest_es(actual_losses, violations, es_predictions, dates):
    """Compares actual average shortfall vs. predicted ES yearly."""
    # Ensure inputs are pandas Series with Date index
    if not isinstance(actual_losses, pd.Series):
        actual_losses = pd.Series(actual_losses, index=dates)
    elif actual_losses.index.name != 'Date':
        actual_losses = actual_losses.set_index(dates)
        
    if not isinstance(violations, pd.Series):
        violations = pd.Series(violations, index=dates)
    elif violations.index.name != 'Date':
        violations = violations.set_index(dates)
        
    if not isinstance(es_predictions, pd.Series):
        es_predictions = pd.Series(es_predictions, index=dates)
    elif es_predictions.index.name != 'Date':
        es_predictions = es_predictions.set_index(dates)

    results_df = pd.DataFrame({
        'Actual_Loss': actual_losses,
        'Violation': violations,
        'Predicted_ES': es_predictions,
        'Year': actual_losses.index.year
    })
    
    # Filter for violations
    violation_data = results_df[results_df['Violation']]
    
    # Calculate yearly averages
    yearly_avg_actual_shortfall = violation_data.groupby('Year')['Actual_Loss'].mean()
    yearly_avg_predicted_es = violation_data.groupby('Year')['Predicted_ES'].mean()
    yearly_violation_count = violation_data.groupby('Year').size()

    summary = pd.DataFrame({
        'Avg Actual Shortfall': yearly_avg_actual_shortfall,
        'Avg Predicted ES': yearly_avg_predicted_es,
        'Violation Count': yearly_violation_count
    })
    return summary

def plot_violations(violations, dates, title):
    """Plots VaR violations over time."""
    plt.figure(figsize=(15, 4))
    plt.plot(dates, violations, 'ro', markersize=4, alpha=0.7, label='Violation')
    plt.title(title)
    plt.xlabel('Date')
    plt.ylabel('Violation (1=Yes, 0=No)')
    plt.yticks([0, 1])
    plt.grid(axis='y', linestyle='--')
    plt.show()

def run_backtesting(main_df, var_results_df, es_results_df):
    """Runs the backtesting process for VaR and ES models."""
    # Align main_df with var/es results (which start after the initial window)
    backtest_dates = var_results_df['Date']
    backtest_data = main_df[main_df['Date'].isin(backtest_dates)].set_index('Date')
    actual_losses = backtest_data['Portfolio_loss']

    # Confidence levels used
    alphas = [0.95, 0.99]
    alpha_indices = {0.95: 0, 0.99: 1} # Index mapping for results arrays

    # Iterate through models (columns in var_results_df/es_results_df)
    var_model_cols = [col for col in var_results_df.columns if col != 'Date']
    es_model_cols = [col for col in es_results_df.columns if col != 'Date']

    for i, model_name in enumerate(var_model_cols):
        print(f"\n--- Backtesting for Model: {model_name} ---")
        
        # Extract predictions for this model
        # Need to handle the fact that predictions are stored as arrays [pred_95, pred_99]
        var_preds_list = var_results_df[model_name].tolist()
        # Ensure alignment between var and es model columns
        if i < len(es_model_cols):
            es_preds_list = es_results_df[es_model_cols[i]].tolist()
        else:
            print(f"  Warning: No matching ES column found for {model_name}. Skipping ES backtest.")
            es_preds_list = None
        
        for alpha in alphas:
            alpha_idx = alpha_indices[alpha]
            print(f"\nConfidence Level: {alpha*100}%")
            
            # Extract predictions for the specific alpha
            # Handle potential errors if data isn't as expected (e.g., not a list/array)
            try:
                var_predictions = pd.Series([p[alpha_idx] for p in var_preds_list], index=backtest_dates)
                if es_preds_list:
                    es_predictions = pd.Series([p[alpha_idx] for p in es_preds_list], index=backtest_dates)
                else:
                    es_predictions = None
            except (TypeError, IndexError) as e:
                print(f"  Error extracting predictions for alpha={alpha}: {e}. Skipping.")
                continue
                
            # 1. Calculate Violations
            violations = calculate_violations(actual_losses, var_predictions)
            
            # 2. Backtest VaR
            var_summary = backtest_var(violations, alpha, backtest_dates)
            print("\nVaR Backtest Summary (Yearly):")
            display(var_summary)
            
            # 3. Backtest ES
            if es_predictions is not None:
                es_summary = backtest_es(actual_losses, violations, es_predictions, backtest_dates)
                print("\nES Backtest Summary (Yearly):")
                display(es_summary)
            
            # 4. Plot Violations
            plot_violations(violations, backtest_dates, f'VaR Violations for {model_name} (alpha={alpha})')

In [None]:
# Call the backtesting function with the required dataframes
run_backtesting(main_df, var_results_df, es_results_df)

### Backtesting Interpretation

Review the tables and plots above:

*   **VaR Backtest:** Compare 'Actual Violations' to 'Expected Violations' each year. Significant deviations might indicate issues with the VaR model's calibration. If actual violations consistently exceed expected, the model underestimates risk. If they are consistently lower, it might be too conservative.
*   **ES Backtest:** Compare 'Avg Actual Shortfall' to 'Avg Predicted ES'. If the actual average shortfall during violations is consistently higher than the predicted ES, the model underestimates the severity of tail losses.
*   **Violation Plots:** Look for patterns. Ideally, violations should be randomly distributed. Clustering of violations suggests the model fails to adapt quickly to changing market volatility (violation dependence).


# Stress Testing

For stress testing, different extreme changes are tested to measure their effect on the VaR and ES in the portfolio

• Equity index values or stock prices changing by +/- 20% and +/- 40% of the current values

• Currencies moving by +/- 10% for major currencies and +/- 20% for other currencies

• Commodity prices changing by +/- 20% and +/- 40% of the current values

• Interest rates shifting by +/- 2% and +/- 3%

In [None]:
# Change 1: Equity index values or stock prices changing by +/- 20% and +/- 40% of the current values
# These changes will have to be at random dates in the future and will occur at a maximum of 4 consecutive days and will be repeated 5 times


In [None]:
# Change 2: Currencies moving by +/- 10% for major currencies and +/- 20% for other currencies
# These changes will have to be at random dates in the future and will occur at a maximum of 4 consecutive days and will be repeated 5 times

In [None]:
# Change 3: Commodity prices changing by +/- 20% and +/- 40% of the current values
# These changes will have to be at random dates in the future and will occur at a maximum of 4 consecutive days and will be repeated 5 times

In [None]:
# Change 4: Interest rates shifting by +/- 2% and +/- 3%
# These changes will have to be at random dates in the future and will occur at a maximum of 4 consecutive days and will be repeated 5 times