In [1]:
# Problem2
import pandas as pd
from scipy.optimize import minimize
import matplotlib.pyplot as plt
import math
import numpy as np
from scipy.stats import norm, t
import scipy.stats as stats
import seaborn as sns
data = pd.read_csv("problem1.csv")
portfolio = pd.read_csv("portfolio.csv")
prices = pd.read_csv("DailyPrices.csv")

In [2]:
# Covariance Estimation
def multivariate_normal_simulation(covariance_matrix, n_samples, method='direct', mean = 0, explained_variance=1.0):
    if method == 'direct':      
        L = psd(covariance_matrix)
        normal_samples = np.random.normal(size=(covariance_matrix.shape[0], n_samples))       
        samples = np.transpose(np.dot(L, normal_samples) + mean)        
        return samples 
    elif method == 'pca':
        eigenvalues, eigenvectors = np.linalg.eigh(covariance_matrix)
        idx = eigenvalues > 1e-8
        eigenvalues = eigenvalues[idx]
        eigenvectors = eigenvectors[:, idx]
        idx = np.argsort(eigenvalues)[::-1]
        eigenvalues = eigenvalues[idx]
        eigenvectors = eigenvectors[:, idx]
        if explained_variance == 1.0:
            explained_variance = (np.cumsum(eigenvalues)/np.sum(eigenvalues))[-1]
        n_components = np.where((np.cumsum(eigenvalues)/np.sum(eigenvalues))>= explained_variance)[0][0] + 1
        eigenvectors = eigenvectors[:,:n_components]
        eigenvalues = eigenvalues[:n_components]
        normal_samples = np.random.normal(size=(n_components, n_samples))
        B = np.dot(eigenvectors, np.diag(np.sqrt(eigenvalues)))
        samples = np.transpose(np.dot(B, normal_samples))      
        return samples

In [3]:
# Non-PSD fixes for correlation matrix
def psd(a):
    n= a.shape[0]
    root = np.zeros((n,n))
    for j in range(n):
        s=0
        if j>0:
            s = root[j,:j].T @ root[j,:j]
        temp = a[j,j] - s
        if temp <= 0 and temp >= -1e-8:
            temp =0
        root[j,j] = math.sqrt(temp)
        if root[j,j] == 0:
            root[j+1:n,j] = 0
        else:
            ir = 1/root[j,j]
            for i in range(j+1,n):
                s = root[i,:j].T @ root[j,:j]
                root[i,j] = (a[i,j]-s)*ir
    return root

In [4]:
# Calculate Expected Shortfalls
def ES(a,alpha=0.05):
    a.sort()
    v= np.quantile(a,alpha)
    es = a[a<=v].mean()
    return -es

In [5]:
# Calculate Value at Risk
def VaR(a,alpha=0.05):
    a.sort()
    v= np.quantile(a,alpha)
    return -v

In [6]:

def MLE_T(params, returns):
    negLL = -1 * np.sum(stats.t.logpdf(returns, df=params[0], loc=params[1], scale=params[2]))
    return(negLL)
def Fitting_t_MLE(returns):
    constraints=({"type":"ineq", "fun":lambda x: x[0]-1}, {"type":"ineq", "fun":lambda x: x[2]})
    returns_t = minimize(MLE_T, x0=[10, np.mean(returns), np.std(returns)], args=returns, constraints=constraints)
    df, loc, scale = returns_t.x[0], returns_t.x[1], returns_t.x[2]
    return df, loc, scale

In [19]:
#generate a suitcase to test the code

# Function to generate a random dataset
def generate_random_data(num_samples, num_assets):
    # Generate random covariance matrix
    covariance_matrix = np.random.randn(num_assets, num_assets)
    covariance_matrix = np.dot(covariance_matrix, covariance_matrix.T)
    np.fill_diagonal(covariance_matrix, 1.0)  # Make diagonal elements 1

    # Generate random mean returns
    mean_returns = np.random.randn(num_assets)

    # Generate random returns using a multivariate normal distribution
    returns = np.random.multivariate_normal(mean_returns, covariance_matrix, num_samples).T

    return returns

# Set random seed for reproducibility
np.random.seed(0)

# Generate random data with 1000 samples and 5 assets
num_samples = 1000
num_assets = 5
returns_data = generate_random_data(num_samples, num_assets)

# Calculate Value at Risk (VaR) and Expected Shortfall (ES)
alpha = 0.05
var_value = VaR(returns_data[0], alpha)
es_value = ES(returns_data[0], alpha)

print("Value at Risk (VaR):", var_value)
print("Expected Shortfall (ES):", es_value)

# Perform Maximum Likelihood Estimation for t-distribution parameters
df, loc, scale = Fitting_t_MLE(returns_data[0])
print("MLE parameters for t-distribution (df, loc, scale):", df, loc, scale)


Value at Risk (VaR): 4.108218379832072
Expected Shortfall (ES): 4.781323266046413
MLE parameters for t-distribution (df, loc, scale): 294.4151369120474 -1.3955999112137918 1.7116827468031655


  returns = np.random.multivariate_normal(mean_returns, covariance_matrix, num_samples).T


In [7]:
# problem 3
def return_calculate(prices, method="DISCRETE", dateColumn="Date"):
    vars_ = prices.columns
    nVars = len(vars_)
    vars_ = [var for var in vars_ if var != dateColumn]
    if nVars == len(vars_):
        raise ValueError(f"dateColumn: {dateColumn} not in DataFrame: {vars_}")
    nVars = nVars - 1
    p = prices[vars_].to_numpy()
    n, m = p.shape
    p2 = np.empty((n-1, m))
    if method.upper() == "DISCRETE" or method.upper() == "LOG":
    # Calculate returns
        for i in range(n - 1):
            for j in range(m):
                p2[i, j] = p[i + 1, j] / p[i, j]
        
        if method.upper() == "DISCRETE":
            p2 = p2 - 1.0
        else:
            p2 = np.log(p2)
    elif method.upper() == "CLASSIC":
        for i in range(n - 1):
            for j in range(m):
                p2[i, j] = p[i + 1, j] - p[i, j]
    else:
        raise ValueError(f"method: {method} must be in ('LOG', 'DISCRETE', 'CLASSIC')")
    dates = prices[dateColumn].iloc[1:n].to_numpy()
    out = pd.DataFrame({dateColumn: dates})
    for i in range(nVars):
        out[vars_[i]] = p2[:, i]
    return out

In [8]:
def get_portfolio_price(portfolio, prices, portfolio_code, Delta=False):
    if portfolio_code == "All":
        assets = portfolio.drop('Portfolio',axis=1)
        assets = assets.groupby(["Stock"], as_index=False)["Holding"].sum()
    else:
        assets = portfolio[portfolio["Portfolio"] == portfolio_code]        
    stock_codes = list(assets["Stock"])
    assets_prices = pd.concat([prices["Date"], prices[stock_codes]], axis=1)  
    current_price = np.dot(prices[assets["Stock"]].tail(1), assets["Holding"])
    holdings = assets["Holding"]    
    if Delta == True:
        asset_values = assets["Holding"].values.reshape(-1, 1) * prices[assets["Stock"]].tail(1).T.values
        delta = asset_values / current_price    
        return current_price, assets_prices, delta   
    return current_price, assets_prices, holdings

In [14]:
asset = 'A'
current_price, assets_prices, holdings = get_portfolio_price(portfolio, prices, asset)
assets_returns = return_calculate(assets_prices)
assets_returns.drop('Date', axis=1, inplace=True)
norm_assets_returns = assets_returns - assets_returns.mean()
parameters = []
assets_returns_cdf = pd.DataFrame()

for stock in norm_assets_returns.columns.tolist():
    params = Fitting_t_MLE(norm_assets_returns[stock])
    parameters.append(params)
    assets_returns_cdf[stock] = stats.t.cdf(norm_assets_returns[stock], df=params[0], loc=params[1], scale=params[2])
spearman_corr_matrix = assets_returns_cdf.corr(method='spearman')
sim_sample = multivariate_normal_simulation(spearman_corr_matrix, 1000, method='pca')
sim_sample = pd.DataFrame(sim_sample, columns=assets_returns.columns)
sim_sample_cdf = pd.DataFrame()
for stock in sim_sample.columns.tolist():
    sim_sample_cdf[stock] = stats.norm.cdf(sim_sample[stock], loc=0, scale=1)

sim_returns = pd.DataFrame()
for i, stock in enumerate(sim_sample.columns.tolist()):
    sim_returns[stock] = stats.t.ppf(sim_sample_cdf[stock], df=parameters[i][0], loc=parameters[i][1], scale=parameters[i][2])

assets_prices.drop('Date', axis=1, inplace=True)
sim_prices = np.dot(sim_returns * assets_prices.tail(1).values.reshape(assets_prices.shape[1],), holdings)

var_T = VaR(sim_prices)
es_T = ES(sim_prices)

print("VaR： {}".format(var_T))
print("ES： {}".format(es_T))


VaR： 21060.107433205387
ES： 28279.714487442863


In [16]:
asset = 'B'
current_price, assets_prices, holdings = get_portfolio_price(portfolio, prices, asset)
assets_returns = return_calculate(assets_prices)
assets_returns.drop('Date', axis=1, inplace=True)
norm_assets_returns = assets_returns - assets_returns.mean()
parameters = []
assets_returns_cdf = pd.DataFrame()

for stock in norm_assets_returns.columns.tolist():
    params = Fitting_t_MLE(norm_assets_returns[stock])
    parameters.append(params)
    assets_returns_cdf[stock] = stats.t.cdf(norm_assets_returns[stock], df=params[0], loc=params[1], scale=params[2])
spearman_corr_matrix = assets_returns_cdf.corr(method='spearman')
sim_sample = multivariate_normal_simulation(spearman_corr_matrix, 1000, method='pca')
sim_sample = pd.DataFrame(sim_sample, columns=assets_returns.columns)
sim_sample_cdf = pd.DataFrame()
for stock in sim_sample.columns.tolist():
    sim_sample_cdf[stock] = stats.norm.cdf(sim_sample[stock], loc=0, scale=1)

sim_returns = pd.DataFrame()
for i, stock in enumerate(sim_sample.columns.tolist()):
    sim_returns[stock] = stats.t.ppf(sim_sample_cdf[stock], df=parameters[i][0], loc=parameters[i][1], scale=parameters[i][2])

assets_prices.drop('Date', axis=1, inplace=True)
sim_prices = np.dot(sim_returns * assets_prices.tail(1).values.reshape(assets_prices.shape[1],), holdings)

var_T = VaR(sim_prices)
es_T = ES(sim_prices)

print("VaR： {}".format(var_T))
print("ES： {}".format(es_T))


VaR： 12042.618933706053
ES： 16132.638858042772


In [17]:
asset = 'C'
current_price, assets_prices, holdings = get_portfolio_price(portfolio, prices, asset)
assets_returns = return_calculate(assets_prices)
assets_returns.drop('Date', axis=1, inplace=True)
norm_assets_returns = assets_returns - assets_returns.mean()
parameters = []
assets_returns_cdf = pd.DataFrame()

for stock in norm_assets_returns.columns.tolist():
    params = Fitting_t_MLE(norm_assets_returns[stock])
    parameters.append(params)
    assets_returns_cdf[stock] = stats.t.cdf(norm_assets_returns[stock], df=params[0], loc=params[1], scale=params[2])
spearman_corr_matrix = assets_returns_cdf.corr(method='spearman')
sim_sample = multivariate_normal_simulation(spearman_corr_matrix, 1000, method='pca')
sim_sample = pd.DataFrame(sim_sample, columns=assets_returns.columns)
sim_sample_cdf = pd.DataFrame()
for stock in sim_sample.columns.tolist():
    sim_sample_cdf[stock] = stats.norm.cdf(sim_sample[stock], loc=0, scale=1)

sim_returns = pd.DataFrame()
for i, stock in enumerate(sim_sample.columns.tolist()):
    sim_returns[stock] = stats.t.ppf(sim_sample_cdf[stock], df=parameters[i][0], loc=parameters[i][1], scale=parameters[i][2])

assets_prices.drop('Date', axis=1, inplace=True)
sim_prices = np.dot(sim_returns * assets_prices.tail(1).values.reshape(assets_prices.shape[1],), holdings)

var_T = VaR(sim_prices)
es_T = ES(sim_prices)

print("VaR： {}".format(var_T))
print("ES： {}".format(es_T))


VaR： 26476.66385351571
ES： 35311.08349690529


In [18]:
asset = 'All'
current_price, assets_prices, holdings = get_portfolio_price(portfolio, prices, asset)
assets_returns = return_calculate(assets_prices)
assets_returns.drop('Date', axis=1, inplace=True)
norm_assets_returns = assets_returns - assets_returns.mean()
parameters = []
assets_returns_cdf = pd.DataFrame()

for stock in norm_assets_returns.columns.tolist():
    params = Fitting_t_MLE(norm_assets_returns[stock])
    parameters.append(params)
    assets_returns_cdf[stock] = stats.t.cdf(norm_assets_returns[stock], df=params[0], loc=params[1], scale=params[2])
spearman_corr_matrix = assets_returns_cdf.corr(method='spearman')
sim_sample = multivariate_normal_simulation(spearman_corr_matrix, 1000, method='pca')
sim_sample = pd.DataFrame(sim_sample, columns=assets_returns.columns)
sim_sample_cdf = pd.DataFrame()
for stock in sim_sample.columns.tolist():
    sim_sample_cdf[stock] = stats.norm.cdf(sim_sample[stock], loc=0, scale=1)

sim_returns = pd.DataFrame()
for i, stock in enumerate(sim_sample.columns.tolist()):
    sim_returns[stock] = stats.t.ppf(sim_sample_cdf[stock], df=parameters[i][0], loc=parameters[i][1], scale=parameters[i][2])

assets_prices.drop('Date', axis=1, inplace=True)
sim_prices = np.dot(sim_returns * assets_prices.tail(1).values.reshape(assets_prices.shape[1],), holdings)

var_T = VaR(sim_prices)
es_T = ES(sim_prices)

print("VaR： {}".format(var_T))
print("ES： {}".format(es_T))


  out[vars_[i]] = p2[:, i]


VaR： 59311.71437211522
ES： 77699.57729189718
