## Install dependencies

In [None]:
!pip install pandas matplotlib seaborn scipy statsmodels

## Load data

In [38]:
import pandas as pd

data = pd.read_csv("./AEX_1995_2024.csv", parse_dates=['Date'])
data = data.sort_values('Date')
data['Return'] = data['Close'].pct_change()
data

Unnamed: 0,Date,Open,High,Low,Close,Volume,Change,Return
0,1995-01-02,188.22,189.41,187.52,189.31,0,0.650000,
1,1995-01-03,189.08,189.80,188.85,189.15,0,-0.084517,-0.000845
2,1995-01-04,189.76,190.15,188.90,189.40,0,0.132170,0.001322
3,1995-01-05,189.30,189.76,187.94,188.17,0,-0.649419,-0.006494
4,1995-01-06,188.16,188.46,187.53,188.40,0,0.122230,0.001222
...,...,...,...,...,...,...,...,...
7655,2024-12-23,870.21,874.25,867.05,872.22,30458310,-0.367815,-0.003678
7656,2024-12-24,876.15,878.34,875.83,876.26,12311085,0.463186,0.004632
7657,2024-12-27,874.97,880.84,873.67,879.70,36976475,0.392578,0.003926
7658,2024-12-30,876.95,878.58,869.76,871.80,29741477,-0.898033,-0.008980


In [31]:
# Extract daily returns for 2005 and 2020
returns_2005 = data[data['Date'].dt.year == 2005]['Return'].dropna()
returns_2020 = data[data['Date'].dt.year == 2020]['Return'].dropna()

In [37]:
returns_2005.shape

(257,)

## 2g

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats

# Histogram + KDE
plt.figure(figsize=(12,5))
sns.histplot(returns_2005, kde=True, bins=40, color='blue', label='2005', stat='density')
sns.histplot(returns_2020, kde=True, bins=40, color='red', label='2020', stat='density', alpha=0.5)
plt.legend()
plt.title("AEX Daily Returns: 2005 vs 2020")
plt.savefig("returns_histogram_kde.png", dpi=300, bbox_inches='tight')
plt.close()

# QQ plots
plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
stats.probplot(returns_2005, dist="norm", plot=plt)
plt.title("QQ plot 2005")
plt.subplot(1,2,2)
stats.probplot(returns_2020, dist="norm", plot=plt)
plt.title("QQ plot 2020")
plt.savefig("qq_plots.png", dpi=300, bbox_inches='tight')
plt.close()

# Boxplots
df = pd.DataFrame({'2005': returns_2005, '2020': returns_2020})
sns.boxplot(data=df, palette=['blue','red'])
plt.title("Boxplot of Daily Returns")
plt.show()


## 2i

In [56]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import t

# Fit a t-distribution to 2005 and 2020
params_2005 = t.fit(returns_2005)
params_2020 = t.fit(returns_2020)

print("2005 t-dist params (df, loc, scale):", params_2005)
print("2020 t-dist params (df, loc, scale):", params_2020)

# Create subplots
fig, axes = plt.subplots(1, 2, figsize=(12, 5), sharey=True)

# Define x range
x_2005 = np.linspace(returns_2005.min(), returns_2005.max(), 1000)
x_2020 = np.linspace(returns_2020.min(), returns_2020.max(), 1000)

# 2005 subplot
axes[0].hist(returns_2005, bins=35, density=True, alpha=0.5, color='blue', label='2005')
axes[0].plot(x_2005, t.pdf(x_2005, *params_2005), 'b-', lw=2)
axes[0].set_title("2005 Returns")
axes[0].legend()

# 2020 subplot
axes[1].hist(returns_2020, bins=50, density=True, alpha=0.5, color='red', label='2020')
axes[1].plot(x_2020, t.pdf(x_2020, *params_2020), 'r-', lw=2)
axes[1].set_title("2020 Returns")
axes[1].legend()

# Overall figure adjustments
plt.suptitle("Histogram with t-distribution Fit")
plt.savefig("t_distribution_fit_subplots.png", dpi=300, bbox_inches='tight')
plt.close()

2005 t-dist params (df, loc, scale): (np.float64(7.191261640090092), np.float64(0.0009998928617651558), np.float64(0.005430937819900299))
2020 t-dist params (df, loc, scale): (np.float64(2.861398629214051), np.float64(0.0013893587640328854), np.float64(0.010678644084791956))


## 2j

In [58]:
p = 0.01

VaR_2005 = -t.ppf(p, *params_2005)
VaR_2020 = -t.ppf(p, *params_2020)

print("Parametric 1% VaR 2005:", VaR_2005)
print("Parametric 1% VaR 2020:", VaR_2020)



Parametric 1% VaR 2005: 0.015161856395033485
Parametric 1% VaR 2020: 0.0490281168179218


## 2k

In [35]:
VaR_2005_np = -np.quantile(returns_2005, p)
VaR_2020_np = -np.quantile(returns_2020, p)

print("Non-parametric 1% VaR 2005:", VaR_2005_np)
print("Non-parametric 1% VaR 2020:", VaR_2020_np)


Non-parametric 1% VaR 2005: 0.01625957251240904
Non-parametric 1% VaR 2020: 0.04270632365428852


## 2l

In [None]:
from scipy.stats import sem, t as t_dist

def mean_ci(data, alpha=0.05):
    n = len(data)
    m = np.mean(data)
    s = sem(data)
    h = s * t_dist.ppf(1 - alpha/2, n-1)
    return m, (m-h, m+h)

ci_2005 = mean_ci(returns_2005)
ci_2020 = mean_ci(returns_2020)

print("2005 mean and 95% CI:", ci_2005)
print("2020 mean and 95% CI:", ci_2020)


2005 mean and 95% CI: (np.float64(0.0009038565513530295), (np.float64(0.0001213979207636696), np.float64(0.0016863151819423894)))
2020 mean and 95% CI: (np.float64(0.0002862756290058166), (np.float64(-0.0018991398256047488), np.float64(0.002471691083616382)))
