In [2]:
import pandas as pd
import numpy as np
import os # To check if the file exists
from scipy.stats import norm # For Parametric VaR and CVaR

# --- Configuration ---
# Define the path to your Excel file. Make sure this file is in the same directory
# as your Python script, or provide the full path to it.
excel_file_path = 'Historical_Portfolio.xlsx'
sheet_name = 'portfolio' # Changed as per your update

# Define the confidence level for VaR and ES calculation
# For a 99% confidence level, we look at the 1st percentile for VaR and losses beyond that for ES.
confidence_level = 0.99

# For Monte Carlo Simulation
num_simulations = 10000 # Number of simulated portfolio returns

# --- Data Loading and Preparation ---

# Check if the Excel file exists before attempting to load
if not os.path.exists(excel_file_path):
    print(f"Error: The file '{excel_file_path}' was not found.")
    print("Please ensure the Excel file is in the correct directory.")
else:
    try:
        # Load the historical stock price data from the specified sheet
        # We'll set the 'Date' column as the index for time-series analysis
        df_prices = pd.read_excel(excel_file_path, sheet_name=sheet_name, index_col='Date')

        print("--- Original Data Head ---")
        print(df_prices.head())
        print("\n--- Data Info ---")
        df_prices.info()
        print("\n--- Data Description ---")
        print(df_prices.describe())

        # Calculate daily returns for each stock
        df_returns = df_prices.pct_change()

        # Drop the first row which will contain NaN values due to pct_change()
        df_returns = df_returns.dropna()

        print("\n--- Daily Returns Head ---")
        print(df_returns.head())
        print("\n--- Daily Returns Info ---")
        df_returns.info()

        # --- Portfolio Construction (Assuming Equal Weighting for now) ---
        num_assets = df_returns.shape[1]
        portfolio_weights = np.array([1/num_assets] * num_assets)
        print(f"\nPortfolio Weights (Equal): {portfolio_weights}")

        # Calculate daily portfolio returns
        portfolio_returns = df_returns.dot(portfolio_weights)

        print("\n--- Portfolio Returns Head ---")
        print(portfolio_returns.head())
        
        # Calculate mean and standard deviation of portfolio returns for Parametric and Monte Carlo VaR
        portfolio_mean_return = portfolio_returns.mean()
        portfolio_std_dev = portfolio_returns.std()

        print(f"Mean Portfolio Return: {portfolio_mean_return:.4f}")
        print(f"Standard Deviation of Portfolio Returns: {portfolio_std_dev:.4f}")

        # --- Historical Value-at-Risk (VaR) Calculation ---
        # Sort the portfolio returns in ascending order (from worst loss to best gain)
        sorted_returns = portfolio_returns.sort_values(ascending=True)

        # Calculate the percentile for VaR (e.g., 0.01 for 99% confidence)
        var_percentile = 1 - confidence_level

        # Calculate Historical VaR
        historical_var = sorted_returns.quantile(var_percentile)

        print(f"\n--- Market Risk Metrics ---")
        print(f"Historical VaR ({confidence_level*100:.0f}% Confidence Level): {historical_var:.4f} (or {historical_var*100:.2f}%)")

        # --- Historical Conditional Value-at-Risk (CVaR) / Expected Shortfall (ES) Calculation ---
        # CVaR is the average of losses that are worse than the VaR.
        # We need to filter the returns that are less than or equal to the VaR.
        cvar_returns_historical = sorted_returns[sorted_returns <= historical_var]

        # Calculate CVaR as the mean of these extreme losses
        historical_cvar = cvar_returns_historical.mean()

        print(f"Historical CVaR / Expected Shortfall ({confidence_level*100:.0f}% Confidence Level): {historical_cvar:.4f} (or {historical_cvar*100:.2f}%)")

        # --- Parametric (Delta-Normal) VaR Calculation ---
        # Assumes portfolio returns are normally distributed.
        # VaR = Mean Return + (Z-score * Standard Deviation)
        # For a 99% confidence level, we need the Z-score for the 1st percentile.
        # norm.ppf(percentile) gives the Z-score.
        
        z_score = norm.ppf(var_percentile)
        parametric_var = portfolio_mean_return + (z_score * portfolio_std_dev)

        print(f"\nParametric (Delta-Normal) VaR ({confidence_level*100:.0f}% Confidence Level): {parametric_var:.4f} (or {parametric_var*100:.2f}%)")

        # --- Parametric CVaR / Expected Shortfall Calculation ---
        # For a normal distribution, CVaR can be calculated using a formula involving the PDF and CDF.
        # CVaR = Mean - (StdDev * PDF(Z_alpha)) / (1 - alpha)
        # Where alpha is the percentile (e.g., 0.01 for 99% confidence)
        
        pdf_at_z_score = norm.pdf(z_score)
        parametric_cvar = portfolio_mean_return - (portfolio_std_dev * pdf_at_z_score) / (1 - var_percentile)

        print(f"Parametric CVaR / Expected Shortfall ({confidence_level*100:.0f}% Confidence Level): {parametric_cvar:.4f} (or {parametric_cvar*100:.2f}%)")


        # --- Monte Carlo VaR Calculation ---
        # Simulate a large number of portfolio returns based on historical mean and std dev.
        # Then, calculate VaR from these simulated returns.

        # Generate random samples from a normal distribution
        # loc = mean, scale = standard deviation, size = number of simulations
        simulated_returns = np.random.normal(loc=portfolio_mean_return,
                                             scale=portfolio_std_dev,
                                             size=num_simulations)
        
        # Sort simulated returns to find the VaR percentile
        sorted_simulated_returns = np.sort(simulated_returns)
        
        # Calculate Monte Carlo VaR
        monte_carlo_var = np.percentile(sorted_simulated_returns, var_percentile * 100)

        print(f"\nMonte Carlo VaR ({confidence_level*100:.0f}% Confidence Level, {num_simulations} simulations): {monte_carlo_var:.4f} (or {monte_carlo_var*100:.2f}%)")

        # --- Monte Carlo CVaR / Expected Shortfall Calculation ---
        # CVaR is the average of losses that are worse than the VaR for simulated returns.
        cvar_returns_mc = sorted_simulated_returns[sorted_simulated_returns <= monte_carlo_var]
        
        # Calculate Monte Carlo CVaR
        monte_carlo_cvar = cvar_returns_mc.mean()

        print(f"Monte Carlo CVaR / Expected Shortfall ({confidence_level*100:.0f}% Confidence Level, {num_simulations} simulations): {monte_carlo_cvar:.4f} (or {monte_carlo_cvar*100:.2f}%)")


        # Interpretation:
        # VaR (Value-at-Risk): The maximum expected loss at a given confidence level over a specific time horizon.
        # CVaR (Conditional Value-at-Risk) / Expected Shortfall (ES): The expected loss given that the loss exceeds the VaR.
        #
        # Historical VaR/CVaR: Based directly on past observed returns.
        # Parametric VaR/CVaR: Assumes a specific distribution (e.g., normal) for returns.
        # Monte Carlo VaR/CVaR: Simulates many possible future outcomes based on statistical properties.
        #
        # The choice of VaR/CVaR method depends on data characteristics, portfolio complexity,
        # and regulatory requirements (e.g., FRTB often requires ES).

    except Exception as e:
        print(f"An error occurred during data processing: {e}")

--- Original Data Head ---
            Cipla_close   TATACON_close   BEL_close 
Date                                                
2025-06-02        1470.2          1120.4      387.50
2025-05-30        1465.7          1106.3      384.60
2025-05-29        1476.9          1109.8      386.80
2025-05-28        1468.5          1121.4      390.45
2025-05-27        1480.5          1138.4      385.40

--- Data Info ---
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 248 entries, 2025-06-02 to 2024-06-03
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Cipla_close     248 non-null    float64
 1   TATACON_close   248 non-null    float64
 2   BEL_close       248 non-null    float64
dtypes: float64(3)
memory usage: 7.8 KB

--- Data Description ---
       Cipla_close   TATACON_close   BEL_close 
count    248.000000      248.000000  248.000000
mean    1515.797581     1069.459677  296.545927
std       65.277478      