## Problem 2

In [1]:
import numpy as np
import pandas as pd
from scipy.stats import norm
from scipy.integrate import quad

'''
Function that calculates the EW covariance and correlation.
Func take the parameter of 'cov' and 'corr'
'''
def ew_cov_corr(df, lmbd, func='cov'):
    if not isinstance(df, pd.DataFrame):
        df = pd.DataFrame(df)
        
    if func not in ['cov', 'corr']:
        raise ValueError(f'The func parameter must be "cov" or "corr", got {func} instead.')
    
    # Center the data - to calculate the covariance matrix.
    df -= df.mean(axis=0)
        
    m, n = df.shape
    wts = np.empty(m)
    
    # Setting weights for prior observation
    for i in range(m):
        wts[i] = (1 - lmbd) * lmbd ** (m - i - 1)
        
    # Normalizing the weights
    wts /= np.sum(wts)
    wts = wts.reshape(-1, 1)
    if func == 'cov':   
        res = (wts * df).T @ df
        
    elif func == 'corr':
        res = (wts * df).T @ df
        # Calculate the standard deviations (square root of variances along the diagonal)
        std_devs = np.sqrt(np.diag(res))

        # Convert the covariance matrix to a correlation matrix
        res /= np.outer(std_devs, std_devs)
        
    return res

'''
VaR for Normal distribution with an EW variance
'''
def var_ew(data, lmbd, alpha=0.05):
    # Calculate the mean.
    mu = data.mean()
    
    # Calculate the variance with an EW variance.
    ew_sigma2 = ew_cov_corr(data, lmbd, 'cov')
    
    # Calculate the VaR
    VaR = -norm.ppf(alpha, mu, np.sqrt(ew_sigma2))[0, 0]
    
    # Calculate the relative difference from the mean expected.
    VaR_diff = VaR + mu
    VaR_diff = VaR_diff[0]

    return pd.DataFrame({"VaR Absolute": [VaR], 
                         "VaR Diff from Mean": [VaR_diff]})

'''
ES for Normal Distribution with an EW variance
'''
def es_ew(data, lmbd, alpha=0.05):
    # Calculate the mean.
    mu = data.mean()
    
    # Calculate the variance with an EW variance.
    std = np.sqrt(ew_cov_corr(data, lmbd, 'cov')).iloc[0, 0]
    data += mu
    
    # Calculate the VaR
    res = var_ew(data, lmbd, alpha)
    VaR = res.iloc[0, 0]
    
    # Define the integrand function: x times the PDF of the distribution
    def integrand(x, mu, std):
        return x * norm.pdf(x, loc=mu, scale=std)
    
    ES, _ = quad(lambda x: integrand(x, mu, std), -np.inf, -VaR)
    ES /= -alpha
    
    # Calculate the relative difference from the mean expected.
    ES_diff = ES + mu
    ES_diff = ES_diff[0]
    
    return pd.DataFrame({"ES Absolute": [ES], 
                         "ES Diff from Mean": [ES_diff]})

In [2]:
df = pd.read_csv('problem1.csv')
var = var_ew(df, 0.97, 0.05)
var

Unnamed: 0,VaR Absolute,VaR Diff from Mean
0,0.091169,0.09029


In [3]:
df = pd.read_csv('problem1.csv')
es = es_ew(df, 0.97, 0.05)
es

Unnamed: 0,ES Absolute,ES Diff from Mean
0,0.114107,0.113227


In [4]:
from scipy.stats import t
from scipy.integrate import quad

'''
Fit the Data with t Distribution
'''
def fit_general_t(data):
    # Fit the t distribution to the data
    nu, mu, sigma = t.fit(data)
    return mu, sigma, nu

'''
VaR for t Distribution
''' 
def var_t(data, alpha=0.05):
    # Fit the data with t distribution.
    mu, sigma, nu = fit_general_t(data)
    
    # Calculate the VaR
    VaR = -t.ppf(alpha, nu, mu, sigma)

    # Calculate the relative difference from the mean expected.
    VaR_diff = VaR + mu
    
    return pd.DataFrame({"VaR Absolute": [VaR], 
                         "VaR Diff from Mean": [VaR_diff]})

'''
ES for t Distribution
'''

def es_t(data, alpha=0.05):
    # Fit the data with normal distribution.
    mu, sigma, nu = fit_general_t(data)
    
    # Calculate the VaR
    res = var_t(data, alpha)
    VaR = res.iloc[0, 0]
    
    # Define the integrand function: x times the PDF of the distribution
    def integrand(x, mu, sigma, nu):
        return x * t.pdf(x, df=nu, loc=mu, scale=sigma)
    
    # Calculate the ES
    ES, _ = quad(lambda x: integrand(x, mu, sigma, nu), -np.inf, -VaR)
    ES /= -alpha
    
    # Calculate the relative difference from the mean expected.
    ES_diff = ES + mu
    
    return pd.DataFrame({"ES Absolute": [ES], 
                         "ES Diff from Mean": [ES_diff]})

In [5]:
df = pd.read_csv('problem1.csv')
var = var_t(df, 0.05)
var

Unnamed: 0,VaR Absolute,VaR Diff from Mean
0,0.076476,0.076382


In [6]:
df = pd.read_csv('problem1.csv')
es = es_t(df, 0.05)
es

Unnamed: 0,ES Absolute,ES Diff from Mean
0,0.113218,0.113124


In [7]:
'''
VaR and ES for historic simulation
''' 
def historic(data, N=100000, alpha=0.05):
    # Use numpy's random.choice to draw N samples with replacement
    np.random.seed(50)
    simulated_draws = np.random.choice(data.iloc[:,0], size=N, replace=True)
    
    # Calculate the mean for the data.
    mu = simulated_draws.mean()
    
    # Sorted the value in order to get the alpha% of the distribution
    sorted_data = np.sort(simulated_draws)
    
    # Calculate the percentage and dollar basis VaR.
    VaR_index = int(len(sorted_data) * 0.05)
    VaR = -sorted_data[VaR_index]

    # Calculate the relative difference from the mean expected.
    VaR_diff = VaR + mu
    
    # Calculate the ES
    ES = -sorted_data[sorted_data <= -VaR].mean()
    
    # Calculate the relative difference from the mean expected.
    ES_diff = ES + mu
    
    
    return pd.DataFrame({"VaR Absolute": [VaR], 
                         "VaR Diff from Mean": [VaR_diff],
                         "ES Absolute": [ES], 
                         "ES Diff from Mean": [ES_diff]})

In [8]:
df = pd.read_csv('problem1.csv')
history = historic(df)
history

Unnamed: 0,VaR Absolute,VaR Diff from Mean,ES Absolute,ES Diff from Mean
0,0.075862,0.074978,0.115348,0.114465


## Problem 3

In [9]:
from scipy.stats import norm, t
'''
Calculate Return
'''

# Implement the function to calculate the return
def return_calculate(prices, method='ARS', dateColumn='Date'):
    # Exclude the date column from the calculations
    tickers = [col for col in prices.columns if col != dateColumn]
    df = prices[tickers] # The dataframe is now with no date column.
    
    # Calculate the return using Classical Brownian Motion.
    if method == 'CBM':
        df = df.diff().dropna()
    
    # Calculate the return using Arithmetic Return System.
    elif method == 'ARS':
        df = (df - df.shift(1)) / df.shift(1)
        df = df.dropna()
        
    # Calculate the return using Geometric Brownian Motion.
    elif method == 'GBM':
        df = np.log(df).diff().dropna()
        
    else:
        raise ValueError(f"method: {method} must be in (\"CBM\",\"ARS\",\"GBM\")")
    
    return df

'''
Multivariate PCA Simulation
'''
def simulatePCA(N, df, mean=None, seed=1234, pctExp=1):
    # Error Checking
    m, n = df.shape
    if n != m:
        raise ValueError(f"Covariance Matrix is not square ({n},{m})")
    
    # Initialize the output
    out = np.zeros((N, n))
    
    # Set mean
    if mean is None:
        mean = np.zeros(n)
    else:
        if len(mean) != n:
            raise ValueError(f"Mean ({len(mean)}) is not the size of cov ({n},{n})")
    
    eigenvalues, eigenvectors = np.linalg.eig(df)
    
    # Get the indices that would sort eigenvalues in descending order
    indices = np.argsort(eigenvalues)[::-1]
    # Sort eigenvalues
    eigenvalues = eigenvalues[indices]
    # Sort eigenvectors according to the same order
    eigenvectors = eigenvectors[:, indices]
    
    tv = np.sum(eigenvalues)
    posv = np.where(eigenvalues >= 1e-8)[0]
    if pctExp <= 1:
        nval = 0
        pct = 0.0
        # How many factors needed
        for i in posv:
            pct += eigenvalues[i] / tv
            nval += 1
            if pct >= pctExp:
                break
    
     # If nval is less than the number of positive eigenvalues, truncate posv
    if nval < len(posv):
        posv = posv[:nval]
        
    # Filter eigenvalues based on posv
    eigenvalues = eigenvalues[posv]
    eigenvectors = eigenvectors[:, posv]
    
    B = eigenvectors @ np.diag(np.sqrt(eigenvalues))
    
    np.random.seed(seed)
    rand_normals = np.random.normal(0.0, 1.0, size=(N, len(posv)))
    out = np.dot(rand_normals, B.T) + mean
    
    return out.T

'''
VaR/ES on 2 levels from simulated values - Copula
'''

def simulateCopula(portfolio, returns):
    portfolio['CurrentValue'] = portfolio['Holding'] * portfolio['Starting Price']
    models = {}
    uniform = pd.DataFrame()
    standard_normal = pd.DataFrame()
    
    for stock in portfolio["Stock"]:
        # If the distribution for the model is normal, fit the data with normal distribution.
        if portfolio.loc[portfolio['Stock'] == stock, 'Distribution'].iloc[0] == 'Normal':
            models[stock] = norm.fit(returns[stock])
            mu, sigma = norm.fit(returns[stock])
            
            # Transform the observation vector into a uniform vector using CDF.
            uniform[stock] = norm.cdf(returns[stock], loc=mu, scale=sigma)
            
            # Transform the uniform vector into a Standard Normal vector usig the normal quantile function.
            standard_normal[stock] = norm.ppf(uniform[stock])
            
        # If the distribution for the model is t, fit the data with normal t.
        elif portfolio.loc[portfolio['Stock'] == stock, 'Distribution'].iloc[0] == 'T':
            models[stock] = t.fit(returns[stock])
            nu, mu, sigma = t.fit(returns[stock])
            
            # Transform the observation vector into a uniform vector using CDF.
            uniform[stock] = t.cdf(returns[stock], df=nu, loc=mu, scale=sigma)
            
            # Transform the uniform vector into a Standard Normal vector usig the normal quantile function.
            standard_normal[stock] = norm.ppf(uniform[stock])
        
    # Calculate Spearman's correlation matrix
    spearman_corr_matrix = standard_normal.corr(method='spearman')
    
    nSim = 10000
    
    # Use the PCA to simulate the multivariate normal.
    simulations = simulatePCA(nSim, spearman_corr_matrix)
    simulations = pd.DataFrame(simulations.T, columns=[stock for stock in portfolio["Stock"]])
    
    # Transform the simulations into uniform variables using standard normal CDF.
    uni = norm.cdf(simulations)
    uni = pd.DataFrame(uni, columns=[stock for stock in portfolio["Stock"]])
    
    simulatedReturns = pd.DataFrame()
    # Transform the uniform variables into the desired data using quantile.
    for stock in portfolio["Stock"]:
        # If the distribution for the model is normal, use the quantile of the normal distribution.
        if portfolio.loc[portfolio['Stock'] == stock, 'Distribution'].iloc[0] == 'Normal':
            mu, sigma = models[stock]
            simulatedReturns[stock] = norm.ppf(uni[stock], loc=mu, scale=sigma)
            
        # If the distribution for the model is t, use the quantile of the t distribution.
        elif portfolio.loc[portfolio['Stock'] == stock, 'Distribution'].iloc[0] == 'T':
            nu, mu, sigma = models[stock]
            simulatedReturns[stock] = t.ppf(uni[stock], df=nu, loc=mu, scale=sigma)
    
    simulatedValue = pd.DataFrame()
    pnl = pd.DataFrame()
    # Calculate the daily prices for each stock
    for stock in portfolio["Stock"]:
        currentValue = portfolio.loc[portfolio['Stock'] == stock, 'CurrentValue'].iloc[0]
        simulatedValue[stock] = currentValue * (1 + simulatedReturns[stock])
        pnl[stock] = simulatedValue[stock] - currentValue
        
    risk = pd.DataFrame(columns = ["Stock", "VaR95", "ES95", "VaR95_Pct", "ES95_Pct"])
    w = pd.DataFrame()

    for stock in pnl.columns:
        i = risk.shape[0]
        risk.loc[i, "Stock"] = stock
        risk.loc[i, "VaR95"] = -np.percentile(pnl[stock], 5)
        risk.loc[i, "VaR95_Pct"] = risk.loc[i, "VaR95"] / portfolio.loc[portfolio['Stock'] == stock, 'CurrentValue'].iloc[0]
        risk.loc[i, "ES95"] = -pnl[stock][pnl[stock] <= -risk.loc[i, "VaR95"]].mean()
        risk.loc[i, "ES95_Pct"] = risk.loc[i, "ES95"] / portfolio.loc[portfolio['Stock'] == stock, 'CurrentValue'].iloc[0]
        
        # Determine the weights for the two stock
        w.at['Weight', stock] = portfolio.loc[portfolio['Stock'] == stock, 'CurrentValue'].iloc[0] / portfolio['CurrentValue'].sum()
        
    # Calculate the total pnl.
    pnl['Total'] = 0
    for stock in portfolio["Stock"]:
        pnl['Total'] += pnl[stock]
    
    i = risk.shape[0]
    risk.loc[i, "Stock"] = 'Total'
    risk.loc[i, "VaR95"] = -np.percentile(pnl['Total'], 5)
    risk.loc[i, "VaR95_Pct"] = risk.loc[i, "VaR95"] / portfolio['CurrentValue'].sum()
    risk.loc[i, "ES95"] = -pnl['Total'][pnl['Total'] <= -risk.loc[i, "VaR95"]].mean()
    risk.loc[i, "ES95_Pct"] = risk.loc[i, "ES95"] / portfolio['CurrentValue'].sum()

    return risk

In [10]:
# Read the price data from csv files.
prices = pd.read_csv('DailyPrices.csv')

# Calculate the arithmetic returns from prices.
returns = return_calculate(prices)

# Center the data.
returns -= returns.mean()

# Read the portfolio data from csv files.
portfolio = pd.read_csv('portfolio.csv')

# Assign 'T' to portfolio A and B, and 'Normal' to portfolio C
portfolio.loc[portfolio['Portfolio'].isin(['A', 'B']), 'Distribution'] = 'T'
portfolio.loc[portfolio['Portfolio'] == 'C', 'Distribution'] = 'Normal'

for stock in portfolio["Stock"]:
    portfolio.loc[portfolio['Stock'] == stock, 'Starting Price'] = prices.iloc[-1][stock]

# Fit Generalized T models to stocks in portfolios A and B, and fit a normal distributions to stocks 
# in portfolio C. Calculate the VaR and ES of each portfolio as well as your total VaR and ES. You 
# will need to use a copula.  Compare the results from this to your VaR form Problem 3 from 
# Week 4.
total = simulateCopula(portfolio, returns)
total

Unnamed: 0,Stock,VaR95,ES95,VaR95_Pct,ES95_Pct
0,AAPL,317.550523,412.721786,0.036345,0.047238
1,TSLA,142.463585,178.719497,0.068717,0.086205
2,JPM,269.518704,365.179696,0.02999,0.040635
3,HD,258.328428,350.019725,0.030321,0.041083
4,BAC,247.200327,346.955921,0.033112,0.046475
...,...,...,...,...,...
95,LRCX,395.788072,496.119474,0.054839,0.068741
96,MO,236.595797,299.279187,0.025923,0.03279
97,LMT,343.581294,426.456344,0.02707,0.033599
98,TFC,236.923781,298.768966,0.033175,0.041835


In [11]:
portfolio_a = portfolio.loc[portfolio["Portfolio"] == "A"]
risk_a = simulateCopula(portfolio_a, returns)
print(risk_a)
portfolio_b = portfolio.loc[portfolio["Portfolio"] == "B"]
risk_b = simulateCopula(portfolio_b, returns)
print(risk_b)
portfolio_c = portfolio.loc[portfolio["Portfolio"] == "C"]
risk_c = simulateCopula(portfolio_c, returns)
print(risk_c)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  portfolio['CurrentValue'] = portfolio['Holding'] * portfolio['Starting Price']


    Stock        VaR95          ES95 VaR95_Pct  ES95_Pct
0    AAPL   322.001413    415.704808  0.036854  0.047579
1    TSLA   143.584723    181.984675  0.069258   0.08778
2     JPM   269.616599    354.050777  0.030001  0.039396
3      HD    259.36929     367.07279  0.030443  0.043084
4     BAC   249.782594    344.143097  0.033458  0.046098
5     XOM   552.069654    738.839836  0.034485  0.046152
6    AVGO   387.430986    497.069107  0.038021   0.04878
7     PEP   190.981458    285.274583  0.019392  0.028966
8     TMO    326.12977    435.955763   0.03372  0.045075
9   CMCSA   219.741484    316.504757  0.029886  0.043047
10   META   339.504604    503.934332  0.057824  0.085829
11    ACN   274.329252    363.987886  0.033173  0.044015
12   INTC   196.700116    271.097887  0.039408  0.054313
13   PYPL   251.529744    332.039145  0.056271  0.074283
14    MRK   267.553114    380.443365  0.020383  0.028983
15      T   177.698943    261.042101  0.025455  0.037393
16    LOW   291.567303    389.4

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  portfolio['CurrentValue'] = portfolio['Holding'] * portfolio['Starting Price']


    Stock        VaR95         ES95 VaR95_Pct  ES95_Pct
0    MSFT   331.844614   436.502065  0.038145  0.050176
1   GOOGL    16.107943     21.75694  0.042385  0.057249
2    NVDA   569.260492   709.017418  0.067066  0.083531
3     JNJ   176.708296   242.713666  0.018247  0.025063
4      PG   192.149578   274.000421  0.022255  0.031734
5      MA   326.811789   453.010045  0.032677  0.045295
6     DIS   276.050869   368.853113    0.0379   0.05064
7    ADBE   326.070012   465.202256  0.043382  0.061893
8      KO   189.279531   275.483517  0.019477  0.028348
9    NFLX   441.777265   630.240201  0.060935   0.08693
10   COST   294.884741   477.077905  0.029451  0.047648
11    WFC   283.137419   401.226102  0.034405  0.048754
12    WMT   235.900642   368.539734  0.024158  0.037741
13    LLY   394.251653   530.660042  0.028134  0.037868
14    NKE   355.153949   502.974558  0.042747  0.060539
15    LIN   320.788752   423.704645  0.031115  0.041098
16    UNP   230.567507   304.227216  0.027728  0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  portfolio['CurrentValue'] = portfolio['Holding'] * portfolio['Starting Price']


    Stock        VaR95         ES95 VaR95_Pct  ES95_Pct
0    AMZN    20.081569    25.463203  0.051103  0.064798
1    GOOG    16.115539    20.247897  0.042205  0.053027
2   BRK-B   225.778392   282.968842  0.023707  0.029713
3     UNH   276.146287   348.466014  0.025842  0.032609
4       V   330.856557   417.133523  0.030693  0.038697
5     PFE   221.816446   273.954429  0.028121  0.034731
6    CSCO   236.258965   291.960461  0.031017   0.03833
7     CVX   446.574788   570.998461   0.03399  0.043461
8    ABBV   265.386323   331.385931  0.024118  0.030116
9     ABT   234.112472   292.872336  0.027422  0.034304
10    CRM   371.066495   466.579561  0.048562  0.061062
11     VZ   179.744185   221.181956  0.024016  0.029553
12   QCOM    337.87571   422.825724  0.048839  0.061119
13    MCD   219.220141   275.093886  0.021565  0.027061
14    DHR     313.3189   390.455491  0.035091   0.04373
15    TXN   310.993644   387.611805  0.032911  0.041019
16     PM   258.268329   328.134231  0.026098  0