# <a id="table-of-contents"></a>Table of Contents

1. [Portfolio_marginal_attributes](#section1)
2. [Portfolio_attributes](#section2)

In [1]:
import os
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

from plotly.subplots import make_subplots
from scipy.stats import trim_mean

In [2]:
# List portfolio securities here:
portfolio_with_candidate = ["SPY_history.csv", "XLU_history.csv", "XLF_history.csv"]
portfolio_without_candidate= ["XLF_history.csv","SPY_history.csv"]
candidate = ["XLU_history.csv"]

In [3]:
def process_portfolio_data(securities_list, trading_folder="~/Desktop/Trading"):
    
    # Define the Data folder path
    data_folder = os.path.join(os.path.expanduser(trading_folder), "Data")
    
    # Initialize an empty list to store the DataFrames
    data_frames = []
    
    # Iterate over the list of security files
    for file_name in securities_list:
        # Construct the full file path
        file_path = os.path.join(data_folder, file_name)
        
        # Read the CSV file into a DataFrame
        data = pd.read_csv(file_path)
        
        # Convert 'Date' column to datetime format
        data['Date'] = pd.to_datetime(data['Date'])
        
        # Extract security name from file name (e.g., "SPY" from "SPY_history.csv")
        security_name = file_name.split('_')[0]
        
        # Rename columns to include security name
        data = data.add_suffix(f'_{security_name}')
        
        # Calculate day-over-day price change based on 'Close' column
        data[f'Return_{security_name}'] = data[f'Close/Last_{security_name}'].pct_change()
        
        # Append the DataFrame to the list
        data_frames.append(data)
    
    # Combine all CSV side by side
    combined_data = pd.concat(data_frames, axis=1)
    
    # Calculate the portfolio's daily return (average of all securities' daily returns)
    return_columns = [col for col in combined_data.columns if col.startswith("Return_")]
    combined_data["portfolio_daily_return"] = combined_data[return_columns].mean(axis=1)
    
    # Add Original_Index column to capture current index
    combined_data['Original_Index'] = combined_data.index
    
    return combined_data

# Add function to sort DataFrame by Original_Index
def sort_by_original_index(data):
    """
    Sort the DataFrame by the 'Original_Index' column in descending order.
    
    Args:
        data (pd.DataFrame): The DataFrame to be sorted.
        
    Returns:
        pd.DataFrame: The sorted DataFrame.
    """
    return data.sort_values(by='Original_Index', ascending=False)


In [4]:
# Process the data for different portfolios
combined_data_with_candidate = process_portfolio_data(portfolio_with_candidate)
combined_data_without_candidate = process_portfolio_data(portfolio_without_candidate)
combined_data_candidate = process_portfolio_data(candidate)

# Sort the DataFrames by Original_Index
portfolio_with_candidate = sort_by_original_index(combined_data_with_candidate)
portfolio_without_candidate = sort_by_original_index(combined_data_without_candidate)
candidate = sort_by_original_index(combined_data_candidate)

# Display first few rows to verify
print("Portfolio with Candidate:\n", portfolio_with_candidate.head())
print("\nPortfolio without Candidate:\n", portfolio_without_candidate.head())
print("\nCandidate Portfolio:\n", candidate.head())

Portfolio with Candidate:
        Date_SPY  Close/Last_SPY  Volume_SPY  Open_SPY  High_SPY  Low_SPY  \
2515 2015-01-26        205.4500    91684840    204.71    205.56   203.85   
2514 2015-01-27        202.7400   133590500    202.97    204.12   201.74   
2513 2015-01-28        200.1400   167681700    204.17    204.29   199.91   
2512 2015-01-29        201.9902   173293000    200.38    202.30   198.68   
2511 2015-01-30        199.4500   193789000    200.57    202.17   199.13   

      Return_SPY   Date_XLU  Close/Last_XLU  Volume_XLU  ...  Return_XLU  \
2515    0.013367 2015-01-26           49.11     8129107  ...   -0.001423   
2514    0.012991 2015-01-27           49.18     8746036  ...    0.008614   
2513   -0.009160 2015-01-28           48.76    13694760  ...   -0.013155   
2512    0.012736 2015-01-29           49.41    12033220  ...    0.022558   
2511   -0.012233 2015-01-30           48.32    14901630  ...   -0.004327   

       Date_XLF  Close/Last_XLF  Volume_XLF Open_XLF  High_

In [5]:
#  Alternative approach (modifying in place if desired, be cautious with this!):
def drop_indices_in_place(df):
  if len(df) > 1500:
      df.drop(index=df.index[:1500], axis=0, inplace=True)  # Efficient for large DataFrames
      df.reset_index(drop=True, inplace=True) # Reset index after dropping
  # No return needed as it modifies in place


# Example of in-place usage:
drop_indices_in_place(portfolio_with_candidate)
drop_indices_in_place(portfolio_without_candidate)
drop_indices_in_place(candidate)

## <a id="section1"></a> Portfolio_marginal_attributes

In [6]:
def trimmed_std_dev(data, trim_percent=0.02):
    """
    Calculate the trimmed standard deviation for a portfolio's average daily return.

    Parameters:
    - data (pd.DataFrame): DataFrame containing daily returns of all securities
    - trim_percent (float): Percentage of observations to trim from each end (default 10%)

    Returns:
    - float: Trimmed standard deviation of the portfolio
    """
    # Select only return columns (avoid including other numerical data)
    return_columns = [col for col in data.columns if col.startswith("Return_")]
    if not return_columns:
        raise ValueError("No return columns found in the dataset!")

    # Compute the portfolio's daily return (average of all securities' daily returns)
    data["portfolio_daily_return"] = data[return_columns].mean(axis=1)

    # Extract the portfolio daily returns as a series
    portfolio_returns = data["portfolio_daily_return"].dropna().values

    # Trim the extreme observations
    trim_count = int(len(portfolio_returns) * trim_percent)
    sorted_returns = np.sort(portfolio_returns)
    trimmed_returns = sorted_returns[trim_count:-trim_count]  # Trim bottom & top values

    # Compute and return standard deviation of the trimmed dataset
    return np.std(trimmed_returns, ddof=1)


In [7]:
# Compute trimmed standard deviations
trimmed_std_portfolio_without_candidate = trimmed_std_dev(portfolio_without_candidate, trim_percent=0.02)
trimmed_std_portfolio_with_candidate = trimmed_std_dev(portfolio_with_candidate, trim_percent=0.02)
trimmed_std_candidate = trimmed_std_dev(candidate, trim_percent=0.02)

# Print results
print("Trimmed Std Dev (Portfolio without Candidate):", trimmed_std_portfolio_without_candidate)
print("Trimmed Std Dev (Candidate Security):", trimmed_std_candidate)
print("Trimmed Std Dev (Portfolio with Candidate):", trimmed_std_portfolio_with_candidate)

Trimmed Std Dev (Portfolio without Candidate): 0.008740266808113857
Trimmed Std Dev (Candidate Security): 0.009446978772188083
Trimmed Std Dev (Portfolio with Candidate): 0.007784708045209008


In [8]:
def gini_mean_difference(data, column):
    """
    Calculate the Gini mean difference for a specified column in a DataFrame.
    
    Args:
        data (pd.DataFrame): The DataFrame containing the data.
        column (str): The column name for which to calculate the Gini mean difference.
        
    Returns:
        float: The Gini mean difference.
    """
    # Extract the specified column values
    values = data[column].dropna().values
    
    # Calculate the absolute differences between all pairs of elements
    diff_matrix = np.abs(np.subtract.outer(values, values))
    
    # Calculate the mean of the absolute differences
    gini_mean_diff = np.mean(diff_matrix)
    
    return gini_mean_diff




In [4]:
portfolio_with_candidate.columns
portfolio_without_candidate.columns
candidate.columns

NameError: name 'portfolio_with_candidate' is not defined

In [9]:
# Example usage:
gini_diff_SPY = gini_mean_difference(portfolio_without_candidate, 'portfolio_daily_return')
gini_diff_SPY = gini_mean_difference(portfolio_with_candidate, 'portfolio_daily_return')
gini_diff_SPY = gini_mean_difference(candidate, 'portfolio_daily_return')


print(f'Gini Mean Difference for SPY: {portfolio_daily_return}')
print(f'Gini Mean Difference for SPY: {portfolio_daily_return}')
print(f'Gini Mean Difference for SPY: {portfolio_daily_return}')

NameError: name 'portfolio_daily_return' is not defined

## <a id="section2"></a> Portfolio_attributes

In [10]:
# Select only the "Close/Last_" columns
close_columns = [col for col in combined_data.columns if col.startswith('Close/Last_')]

# Calculate daily percentage returns
returns = combined_data[close_columns].pct_change()

# Drop NaN values (from the first row caused by pct_change)
returns = returns.dropna()

# 1. Correlation Matrix
correlation_matrix = returns.corr()

# 2. Covariance Matrix
covariance_matrix = returns.cov()

# 3. Beta Matrix - Adjust to have the same shape as correlation and covariance matrices
benchmark = close_columns[0]
betas = {}

# Initialize beta_matrix with NaN values to match the size of the correlation and covariance matrices
beta_matrix = pd.DataFrame(np.nan, index=close_columns, columns=close_columns)

for col in close_columns:
    for row in close_columns:
        if col == row:
            beta_matrix.loc[row, col] = 1
        else:
            beta = covariance_matrix.loc[col, benchmark] / covariance_matrix.loc[benchmark, benchmark] 
            beta_matrix.loc[col, benchmark] = beta

NameError: name 'combined_data' is not defined

In [11]:
# Define a conversion factor for covariance (e.g., multiplying by 1e6 for 'mu' units)
covariance_conversion_factor = 1e6  # Adjust this based on what "mu" represents

# Apply conversion factor to covariance matrix
covariance_matrix_mu = covariance_matrix * covariance_conversion_factor

# Store matrices in a dictionary for dynamic plotting
matrices = {
    "Correlation Matrix": correlation_matrix,
    "Covariance Matrix (mu)": covariance_matrix_mu,  # Use converted covariance matrix
    "Beta Matrix": beta_matrix
}

num_matrices = len(matrices)  # Adjust based on how many matrices you have

# Create subplots dynamically
fig = make_subplots(
    rows=1, cols=num_matrices,  
    subplot_titles=list(matrices.keys()),  # Dynamically set titles
    column_widths=[1/num_matrices] * num_matrices,  
    shared_yaxes=True,
    shared_xaxes=True
)

# Adjust x positions for colorbars below each heatmap, adding some spacing
x_positions = np.linspace(0.15, 0.85, num_matrices)  # Spread them evenly from left to right with spacing

# Add each matrix as a heatmap dynamically
for i, (title, matrix) in enumerate(matrices.items(), start=1):
    fig.add_trace(
        go.Heatmap(
            z=matrix.values,
            x=matrix.columns,
            y=matrix.columns,
            colorscale="RdBu",
            colorbar=dict(
                title=title.split()[0],  # Use first word of title (Correlation, Covariance, Beta)
                tickvals=[matrix.values.min(), 0, matrix.values.max()],
                yanchor="top",
                y=-0.25,  # Move colorbar slightly below the heatmap
                x=x_positions[i - 1],  # Align it under each respective heatmap with spacing
                xanchor="center",
                orientation="v"  # Make the colorbar vertical
            ),
            text=matrix.values.round(2),
            texttemplate="%{text}",
            showscale=True,
            hoverinfo="skip"
        ),
        row=1, col=i
    )

# Update layout
fig.update_layout(
    title="Correlation, Covariance (mu), and Beta Matrices",
    height=750,  # Increased height to accommodate vertical legends
    showlegend=False,
    title_x=0.5
)

# Show the interactive heatmap
fig.show()


NameError: name 'covariance_matrix' is not defined