In [1]:
# !pip install econml

In [2]:
import yfinance as yf
import numpy as np
import pandas as pd

# Data Processing and Analysis

Stationarity and preprocessing

In [None]:
from statsmodels.tsa.stattools import adfuller, kpss

In [None]:
def run_stationarity_tests(series, name="Series"):
    """
    Performs Augmented Dickey-Fuller (ADF) and Kwiatkowski-Phillips-Schmidt-Shin (KPSS) tests.
    Prints the results and suggests stationarity based on p-values.
    """
    print(f"\n--- Stationarity Tests for {name} ---")

    # ADF Test
    print("Augmented Dickey-Fuller Test:")
    adf_result = adfuller(series)
    print(f"ADF Statistic: {adf_result[0]:.2f}")
    print(f"P-value: {adf_result[1]:.3f}")
    print("Critical Values:")
    for key, value in adf_result[4].items():
        print(f"   {key}: {value:.2f}")
    if adf_result[1] <= 0.05:
        print(f"{name} is likely stationary (reject H0 of unit root).")
    else:
        print(f"{name} is likely non-stationary (fail to reject H0 of unit root).")

    # KPSS Test
    print("\nKwiatkowski-Phillips-Schmidt-Shin Test:")
    kpss_result = kpss(series, regression='c', nlags='auto')
    print(f"KPSS Statistic: {kpss_result[0]:.2f}")
    print(f"P-value: {kpss_result[1]:.3f}")
    print("Critical Values:")
    for key, value in kpss_result[3].items():
        print(f"   {key}: {value:.2f}")
    if kpss_result[1] > 0.05:
        print(f"{name} is likely stationary (fail to reject H0 of stationarity).")
    else:
        print(f"{name} is likely non-stationary (reject H0 of stationarity).")
    print("-" * 40)


# --- Modifications in your main script where real data is processed ---

# Example usage with the modified functions:

# First, ensure you have the 'data' directory and the HFRX CSV files.
# If you don't have HFRX_historical_HFRXM.csv, HFRX_historical_HFRXSDV.csv, etc.,
# you'll need to create dummy files or provide instructions on obtaining them.
# For example, a dummy file 'data/HFRX_historical_HFRXM.csv' could look like:
# Date,Daily ROR
# 2003-01-01,0.5%
# 2003-01-02,0.6%
# ...

# Inside your main script for real data processing:

# Before calling conditional_correlation_grf, you would call:

# --- Example for HFRXM ---
name_HFRXM = 'HFRXM'
full_df_HFRXM = prepare_hedgfund_and_sp500(name_HFRXM)

if full_df_HFRXM is not None:
    print(f"\n--- Data for {name_HFRXM} ---")
    print(full_df_HFRXM.head())
    print(f"Total rows: {len(full_df_HFRXM)}")

    # Run stationarity tests on the return series
    run_stationarity_tests(full_df_HFRXM[f'{name_HFRXM}_returns'], name=f'{name_HFRXM} Returns')
    run_stationarity_tests(full_df_HFRXM['S&P500_returns'], name='S&P 500 Returns')
    # Note: VIX levels are usually non-stationary, but your text mentions returns were tested.
    # If VIX levels are used directly, they might also be tested if their stationarity is relevant for 'X'.
    # Here, we test the returns as stated in the paper.

    # Modify show_result_bootstrap to accept the full_df directly
    # and adapt the extraction of X, y, w accordingly.
    # The current show_result_bootstrap reads data and prepares it internally,
    # so we'll need to modify it slightly or extract data outside it.

    # Original call:
    # t = show_result_bootstrap(name_HFRXM, returns)
    # This part needs to be updated to use `full_df_HFRXM` directly.

    # Updated show_result_bootstrap (assuming `full_df` passed will have VIX, HF returns, SP500 returns)
    def show_result_bootstrap_updated(name_hf, full_data_df):
        # Ensure column names are consistent with prepare_hedgfund_and_sp500 output
        vix_series = full_data_df['VIX']
        hf_returns_series = full_data_df[f'{name_hf}_returns']
        sp500_returns_series = full_data_df['S&P500_returns']

        prediction_data = pd.DataFrame()
        # The logic below aligns Y (HF returns) and W (SP500 returns) at time 'i'
        # with X (VIX) at time 'i-1'.
        # Ensure the indexing is correct for VIX to be the 'X_t' in your paper (conditioning variable).
        # Your paper uses (X_t, Y_t, W_t) and conditions (Y,W) on X.
        # Your code's show_result_bootstrap had: 'x': data['VIX'].iloc[i-1], 'w' : data['S&P 500'].iloc[i], 'y': data[name].iloc[i]
        # This means VIX(t-1) is X, S&P500(t) is W, HF_returns(t) is Y.

        # Let's adjust to be consistent with the paper's notation where:
        # X is VIX (conditioning variable)
        # Y is HF returns
        # W is S&P 500 returns

        # So, the original code had `w` as S&P500 and `y` as HF returns. Let's maintain that.
        # And `x` as VIX.
        for i in range(1, len(full_data_df)): # Start from 1 to allow for VIX.iloc[i-1]
            row = {
                'X_var': vix_series.iloc[i-1], # VIX from previous day as conditioning variable
                'Y_var': hf_returns_series.iloc[i], # HF returns for current day
                'W_var': sp500_returns_series.iloc[i] # S&P 500 returns for current day
            }
            prediction_data = pd.concat([prediction_data, pd.DataFrame([row])], ignore_index=True)

        X_vals = prediction_data[['X_var']].values
        Y_vals = prediction_data[['Y_var']].values
        W_vals = prediction_data[['W_var']].values

        x_test = np.array([[8.7 + i * 0.1] for i in range(500)]) # Keep the same test range
        corr_bootstrap = conditional_correlation_grf(X_vals, Y_vals, W_vals, x_test)
        return corr_bootstrap

    # Then call the updated function:
    t_HFRXM = show_result_bootstrap_updated(name_HFRXM, full_df_HFRXM)

    # The rest of your plotting code for HFRXM remains similar, just use t_HFRXM
    x_test_plot = np.array([8.7 + i * 0.1 for i in range(500)])
    data_plot_HFRXM = {'VIX' : x_test_plot , 'estimated correlation' : np.mean(t_HFRXM, axis = 0) ,
                      'upper bound':  np.sort(t_HFRXM, axis=0)[2,:],
                      'lower bound' :np.sort(t_HFRXM, axis=0)[-3,:] }
    df_plot_HFRXM = pd.DataFrame(data_plot_HFRXM)

    fig_HFRXM = px.line(df_plot_HFRXM, x="VIX", y = df_plot_HFRXM.columns)
    fig_HFRXM.update_layout(yaxis_range=[-0.8,1])
    # ... (rest of your plot layout code)
    fig_HFRXM.show()

# Repeat similar blocks for other hedge fund types (HFRXSDV, HFRXEMN, HFRXMA, HFRXMD)

# Simulation

## Conditional Correlation for Guassian with covariance 0.5

In [3]:
import econml
from econml.grf import CausalForest
import matplotlib.pyplot as plt
import numpy as np

import plotly.graph_objects as go
import plotly.express as px

In [4]:
n = 100000

# Mean vector (all means are 0)
mu = np.array([0, 0, 0])

# Covariance matrix (variance = 1, covariance = 0.5)
sigma = np.array([[1, 0.5, 0.5],
                  [0.5, 1, 0.5],
                  [0.5, 0.5, 1]])

# Generate the sample
sample = np.random.multivariate_normal(mu, sigma, n)
x = sample[:, 0:1]
w = sample[:, 1]
y = sample[:, 2]

forest = CausalForest(n_estimators = 1000 ,criterion = "mse" ,min_samples_leaf=200)
forest.fit(x,y,w)

x_test = np.arange(-2, 2, 0.01).reshape(-1,1)
pred,lb,ub = forest.predict(x_test, interval  = True)

x_test = np.squeeze(x_test)
pred = np.squeeze(pred)
lb = np.squeeze(lb)
ub = np.squeeze(ub)

data = {'X' : x_test , 'estimated correlation' : pred , 'lower bound':lb , 'upper bound':ub , 'exact correlation' : np.ones(400)*0.3333}
df = pd.DataFrame(data)

In [5]:
fig = px.line(df, x="X", y = df.columns)
fig.update_layout(yaxis_range=[0.15,0.75])
fig.update_layout(xaxis=dict(
        title='X',
        titlefont=dict(size=24 , family="Times New Roman"),
        tickfont=dict(size=24, family="Times New Roman"),# Set x-axis title font size here
    ),
    yaxis=dict(
        title=r'Conditional Correaltion',
        titlefont=dict(size=30, family="Times New Roman"),
        tickfont=dict(size=24, family="Times New Roman"),
    ),
    legend=dict(
          title='',
          title_font=dict(size=24, family="Times New Roman"),
          font=dict(size=28, family="Times New Roman"),
          bgcolor='rgba(255,255,255,0.5)',
          bordercolor="Black",
          borderwidth=2,
        x=1,  # Positioning on the x-axis (1 is at the far right of the graph)
        xanchor='auto',  # Anchoring the legend
        y=1,  # Positioning on the y-axis (1 is at the top of the graph)
        yanchor='auto'  # Anchoring the legend

    )
)
fig.update_layout(
    width=233 * 6,  # Width of the plot in pixels
    height=92 * 6  # Height of the plot in pixels
)

fig.show()


## Conditional Correlation for Guassian with covariance 0

In [6]:
import numpy as np
n = 100000

# Mean vector (all means are 0)
mu = np.array([0, 0, 0])

# Covariance matrix (variance = 1, covariance = 0.5)
sigma = np.array([[1, 0.0, 0.0],
                  [0.0, 1, 0.0],
                  [0.0, 0.0, 1]])

# Generate the sample
sample = np.random.multivariate_normal(mu, sigma, n)
x = sample[:, 0:1]
w = sample[:, 1]
y = sample[:, 2]

forest = CausalForest(n_estimators = 1000 ,criterion = "mse" ,min_samples_leaf=200)
forest.fit(x,y,w)

x_test = np.arange(-2, 2, 0.01).reshape(-1,1)
pred,lb,ub = forest.predict(x_test, interval  = True)

x_test = np.squeeze(x_test)
pred = np.squeeze(pred)
lb = np.squeeze(lb)
ub = np.squeeze(ub)

data = {'X' : x_test , 'estimated correlation' : pred , 'lower bound':lb , 'upper bound':ub , 'exact correlation' : np.ones(400)*0.0}
df = pd.DataFrame(data)



In [7]:

fig = px.line(df, x="X", y = df.columns)
fig.update_layout(yaxis_range=[-0.15,0.5])
fig.update_layout(xaxis=dict(
        title='X',
        titlefont=dict(size=24 , family="Times New Roman"),
        tickfont=dict(size=24, family="Times New Roman"),# Set x-axis title font size here
    ),
    yaxis=dict(
        title=r'Conditional Correaltion',
        titlefont=dict(size=30, family="Times New Roman"),
        tickfont=dict(size=24, family="Times New Roman"),
    ),
    legend=dict(
          title='',
          title_font=dict(size=24, family="Times New Roman"),
          font=dict(size=28, family="Times New Roman"),
          bgcolor='rgba(255,255,255,0.5)',
          bordercolor="Black",
          borderwidth=2,
        x=1,  # Positioning on the x-axis (1 is at the far right of the graph)
        xanchor='auto',  # Anchoring the legend
        y=1,  # Positioning on the y-axis (1 is at the top of the graph)
        yanchor='auto'  # Anchoring the legend

    )
)
fig.update_layout(
    width=233 * 6,  # Width of the plot in pixels
    height=92 * 6  # Height of the plot in pixels
)
fig.show()


### exponential conditional correlation

In [8]:
import numpy as np

n = 100000

# Generate standard normal random variables for Y_t, X_t, and epsilon_t
Y_t = np.random.normal(size=n)
X_t = np.random.uniform(low=0, high=10, size=n)
epsilon_t = np.random.normal(size=n)

# Calculate Z_t using the specified formula
Z_t = Y_t * np.exp(-X_t) + np.sqrt(1 - np.exp(-2 * X_t)) * epsilon_t

# Z_t now contains your generated sample
x = X_t.reshape(-1,1)
w = np.array(Y_t)
y = np.array(Z_t)

forest = CausalForest(n_estimators = 1000 ,criterion = "mse" ,min_samples_leaf=200)
forest.fit(x,y,w)



x_test = np.arange(0.15, 10, 0.01).reshape(-1,1)
pred,lb,ub = forest.predict(x_test, interval  = True)

x_test = np.squeeze(x_test)
pred = np.squeeze(pred)
lb = np.squeeze(lb)
ub = np.squeeze(ub)

data = {'X' : x_test , 'estimated correlation' : pred , 'lower bound':lb , 'upper bound':ub , 'exact correlation' : np.exp(-x_test)}
df = pd.DataFrame(data)

fig = px.line(df, x="X", y = df.columns)
fig.update_layout(yaxis_range=[-0.2,1])
fig.update_layout(xaxis=dict(
        title='X',
        titlefont=dict(size=24 , family="Times New Roman"),
        tickfont=dict(size=24, family="Times New Roman"),# Set x-axis title font size here
    ),
    yaxis=dict(
        title=r'Conditional Correaltion',
        titlefont=dict(size=30, family="Times New Roman"),
        tickfont=dict(size=24, family="Times New Roman"),
    ),
    legend=dict(
          title='',
          title_font=dict(size=24, family="Times New Roman"),
          font=dict(size=28, family="Times New Roman"),
          bgcolor='rgba(255,255,255,0.5)',
          bordercolor="Black",
          borderwidth=2,
        x=1,  # Positioning on the x-axis (1 is at the far right of the graph)
        xanchor='auto',  # Anchoring the legend
        y=1,  # Positioning on the y-axis (1 is at the top of the graph)
        yanchor='auto'  # Anchoring the legend

    )
)
fig.update_layout(
    width=233 * 6,  # Width of the plot in pixels
    height=92 * 6  # Height of the plot in pixels
)
fig.show()


In [9]:
import numpy as np

n = 100000

# Generate standard normal random variables for Y_t, X_t, and epsilon_t
W_t = np.random.normal(size=n)
X_t = np.random.uniform(low=0, high=10, size=n)
epsilon_t = np.random.normal(size=n)

# Calculate Z_t using the specified formula
Y_t = W_t * np.exp(-X_t) + epsilon_t

# Z_t now contains your generated sample

x = X_t.reshape(-1,1)
w = np.array(W_t)
y = np.array(Y_t)

forest = CausalForest(n_estimators = 1000 ,criterion = "mse" ,min_samples_leaf=100)
forest.fit(x,y,w)


forest1 = CausalForest(n_estimators = 1000 ,criterion = "mse" ,min_samples_leaf=100)
forest1.fit(x,w,y)




In [10]:

x_test = np.arange(0.15, 2, 0.01).reshape(-1,1)
pred,lb,ub = forest.predict(x_test, interval  = True)

pred1,lb1,ub1 = forest1.predict(x_test, interval  = True)

x_test = np.squeeze(x_test)
pred = np.squeeze(pred)

pred1 = np.squeeze(pred1)


In [None]:
data = {'X' : x_test , 'estimated correlation' : np.sign(pred) * np.sqrt(np.abs(pred * pred1)) , 'exact correlation' : np.exp(-x_test) / np.sqrt(np.exp(-2*x_test) + 1) , r'$\hat{\beta}_{WY}$' : pred ,  r'$\hat{\beta}_{YW}$' : pred1  ,

        r'$\beta_{WY}$' : np.exp(-x_test) / (np.exp(-2*x_test) + 1) ,
        r'$\beta_{YW}$' : np.exp(-x_test)
        }
df = pd.DataFrame(data)
fig = px.line(df, x="X", y = df.columns)
fig.update_layout(yaxis_range=[-0.2,1])
fig.update_layout(xaxis=dict(
        title='X',
        titlefont=dict(size=24 , family="Times New Roman"),
        tickfont=dict(size=24, family="Times New Roman"),# Set x-axis title font size here
    ),
    yaxis=dict(
        title=r'Conditional Correaltion',
        titlefont=dict(size=30, family="Times New Roman"),
        tickfont=dict(size=24, family="Times New Roman"),
    ),
    legend=dict(
          title='',
          title_font=dict(size=24, family="Times New Roman"),
          font=dict(size=28, family="Times New Roman"),
          bgcolor='rgba(255,255,255,0.5)',
          bordercolor="Black",
          borderwidth=2,
        x=1,  # Positioning on the x-axis (1 is at the far right of the graph)
        xanchor='auto',  # Anchoring the legend
        y=1,  # Positioning on the y-axis (1 is at the top of the graph)
        yanchor='auto'  # Anchoring the legend

    )
)
fig.update_layout(
    width=233 * 6,  # Width of the plot in pixels
    height=92 * 6  # Height of the plot in pixels
)


fig.write_image("plot_with_latex.png")




In [None]:
import numpy as np

final = np.array([])
for i in range(100):
  n = 10000

  # Generate standard normal random variables for Y_t, X_t, and epsilon_t
  W_t = np.random.normal(size=n)
  X_t = np.random.uniform(low=0, high=10, size=n)
  epsilon_t = np.random.normal(size=n)

  # Calculate Z_t using the specified formula
  Y_t = W_t * np.exp(-X_t) + epsilon_t

  # Z_t now contains your generated sample

  x = X_t.reshape(-1,1)
  w = np.array(W_t)
  y = np.array(Y_t)

  forest = CausalForest(n_estimators = 1000 ,criterion = "mse" ,min_samples_leaf=30)
  forest.fit(x,y,w)


  forest1 = CausalForest(n_estimators = 1000 ,criterion = "mse" ,min_samples_leaf=30)
  forest1.fit(x,w,y)



  x_test = np.arange(0.15, 10, 0.01).reshape(-1,1)
  pred,lb,ub = forest.predict(x_test, interval  = True)

  pred1,lb1,ub1 = forest1.predict(x_test, interval  = True)

  x_test = np.squeeze(x_test)
  pred = np.squeeze(pred)

  pred1 = np.squeeze(pred1)

  final = np.append(final,  np.sign(pred) * np.sqrt(np.abs(pred*pred1)))

In [None]:

data = {'X' : x_test , 'estimated correlation' : np.mean(np.reshape(final , (100,985)), axis = 0) , 'exact correlation' : np.exp(-x_test) / np.sqrt(np.exp(-2*x_test) + 1) ,
        'upper bound':  np.sort(np.reshape(final , (100,985)), axis=0)[2,:]
,

        'lower bound' :np.sort(np.reshape(final , (100,985)), axis=0)[-3,:] }
df = pd.DataFrame(data)

fig = px.line(df, x="X", y = df.columns)
fig.update_layout(yaxis_range=[-0.2,1])
fig.update_layout(xaxis=dict(
        title='X',
        titlefont=dict(size=24 , family="Times New Roman"),
        tickfont=dict(size=24, family="Times New Roman"),# Set x-axis title font size here
    ),
    yaxis=dict(
        title=r'Conditional Correaltion',
        titlefont=dict(size=30, family="Times New Roman"),
        tickfont=dict(size=24, family="Times New Roman"),
    ),
    legend=dict(
          title='',
          title_font=dict(size=24, family="Times New Roman"),
          font=dict(size=28, family="Times New Roman"),
          bgcolor='rgba(255,255,255,0.5)',
          bordercolor="Black",
          borderwidth=2,
        x=1,  # Positioning on the x-axis (1 is at the far right of the graph)
        xanchor='auto',  # Anchoring the legend
        y=1,  # Positioning on the y-axis (1 is at the top of the graph)
        yanchor='auto'  # Anchoring the legend

    )
)
fig.update_layout(
    width=233 * 6,  # Width of the plot in pixels
    height=92 * 6  # Height of the plot in pixels
)
fig.show()

In [None]:
n = 10000

# Generate standard normal random variables for Y_t, X_t, and epsilon_t
W_t = np.random.normal(size=n)
X_t = np.random.uniform(low=0, high=10, size=n)
epsilon_t = np.random.normal(size=n)

# Calculate Z_t using the specified formula
Y_t = W_t * np.exp(-X_t) + epsilon_t

# Z_t now contains your generated sample

final_bootstrap = np.array([])
for i in range(200):

  sample_size = 5000
  indices = np.random.choice(n, sample_size, replace=True)
  x = X_t[indices].reshape(-1,1)
  w = np.array(W_t[indices])
  y = np.array(Y_t[indices])

  forest = CausalForest(n_estimators = 500 ,criterion = "mse" ,min_samples_leaf=30)
  forest.fit(x,y,w)


  forest1 = CausalForest(n_estimators = 500 ,criterion = "mse" ,min_samples_leaf=30)
  forest1.fit(x,w,y)



  x_test = np.arange(0.15, 10, 0.01).reshape(-1,1)
  pred,lb,ub = forest.predict(x_test, interval  = True)

  pred1,lb1,ub1 = forest1.predict(x_test, interval  = True)

  x_test = np.squeeze(x_test)
  pred = np.squeeze(pred)

  pred1 = np.squeeze(pred1)

  final_bootstrap = np.append(final_bootstrap,  np.sign(pred) * np.sqrt(np.abs(pred*pred1)))

In [None]:

data = {'X' : x_test , 'estimated correlation' : np.mean(np.reshape(final_bootstrap , (200,985)), axis = 0) , 'exact correlation' : np.exp(-x_test) / np.sqrt(np.exp(-2*x_test) + 1) ,
        'upper bound':  np.sort(np.reshape(final_bootstrap , (200,985)), axis=0)[4,:]
,

        'lower bound' :np.sort(np.reshape(final_bootstrap , (200,985)), axis=0)[-5,:]  }
df = pd.DataFrame(data)

fig = px.line(df, x="X", y = df.columns)
fig.update_layout(yaxis_range=[-0.2,1])
fig.update_layout(xaxis=dict(
        title='X',
        titlefont=dict(size=24 , family="Times New Roman"),
        tickfont=dict(size=24, family="Times New Roman"),# Set x-axis title font size here
    ),
    yaxis=dict(
        title=r'Conditional Correaltion',
        titlefont=dict(size=30, family="Times New Roman"),
        tickfont=dict(size=24, family="Times New Roman"),
    ),
    legend=dict(
          title='',
          title_font=dict(size=24, family="Times New Roman"),
          font=dict(size=28, family="Times New Roman"),
          bgcolor='rgba(255,255,255,0.5)',
          bordercolor="Black",
          borderwidth=2,
        x=1,  # Positioning on the x-axis (1 is at the far right of the graph)
        xanchor='auto',  # Anchoring the legend
        y=1,  # Positioning on the y-axis (1 is at the top of the graph)
        yanchor='auto'  # Anchoring the legend

    )
)
fig.update_layout(
    width=233 * 6,  # Width of the plot in pixels
    height=92 * 6  # Height of the plot in pixels
)
fig.show()

# Real Data

In [None]:
def get_daily_returns(data):
    return data.pct_change().dropna()

def get_data(ticker_symbol):
    ticker = yf.Ticker(ticker_symbol)
    data = ticker.history(period='20y')
    data.index = data.index.date
    return data

In [None]:
# def prepare_hedgfund_and_sp500(name,start_idx):
#   hfrx= pd.read_csv('data\\' + 'HFRX_historical_'+name+'.csv' ,  header=3)
#   hfrx = hfrx.iloc[-7::-1 , 0:2]
#   hfrx.index = pd.to_datetime(hfrx['Date'])
#   hfrx = hfrx.drop(columns=["Date"])
#   hfrx['Daily ROR'] = hfrx['Daily ROR'].str.rstrip("%").astype(float)/100



#   sp500_data = get_data('^GSPC')
#   sp500_returns = get_daily_returns(sp500_data['Close'])


#   returns_df = pd.concat([hfrx, sp500_returns], axis=1).dropna()
#   returns_df.columns = [name, 'S&P 500']
#   return returns_df

def prepare_hedgfund_and_sp500(name):
    # Load HFRX data
    # Assuming 'data/' + 'HFRX_historical_' + name + '.csv' exists as specified in your original code
    try:
        hfrx_df = pd.read_csv('data/' + 'HFRX_historical_' + name + '.csv', header=3)
    except FileNotFoundError:
        print(f"Error: HFRX data file not found for {name}. Please ensure 'data/{'HFRX_historical_' + name + '.csv'}' exists.")
        return None

    # Original parsing for HFRX data
    hfrx_df = hfrx_df.iloc[-7::-1 , 0:2] # This seems to be custom for your CSV structure
    hfrx_df.columns = ['Date', 'Daily ROR'] # Ensure correct column names
    hfrx_df['Date'] = pd.to_datetime(hfrx_df['Date'])
    hfrx_df = hfrx_df.set_index('Date')
    hfrx_df['Daily ROR'] = hfrx_df['Daily ROR'].str.rstrip("%").astype(float) / 100
    hfrx_df.rename(columns={'Daily ROR': f'{name}_returns'}, inplace=True) # Rename for clarity

    # Get S&P 500 data
    sp500_data = yf.Ticker('^GSPC').history(period='20y')
    sp500_returns = sp500_data['Close'].pct_change().dropna()
    sp500_returns.index = sp500_returns.index.date
    sp500_returns = sp500_returns.rename('S&P500_returns') # Rename for clarity
    sp500_returns.index = pd.to_datetime(sp500_returns.index) # Convert index to datetime for merging

    # Get VIX data
    vix_data = yf.Ticker('^VIX').history(period='20y')
    vix_levels = vix_data['Close'].dropna()
    vix_levels.index = vix_levels.index.date
    vix_levels = vix_levels.rename('VIX') # Rename for clarity
    vix_levels.index = pd.to_datetime(vix_levels.index) # Convert index to datetime for merging

    # Perform inner joins sequentially to ensure all three series align on trading dates
    # Start with HFRX and S&P 500 returns
    merged_data = pd.merge(hfrx_df, sp500_returns, left_index=True, right_index=True, how='inner')
    # Then merge with VIX levels
    final_dataset = pd.merge(merged_data, vix_levels, left_index=True, right_index=True, how='inner')

    # Ensure the returns are aligned with the VIX level from the *previous* day if needed,
    # or ensure that the VIX 'X' is for the conditioning variable on the same day as returns 'Y' and 'W'.
    # Your current 'show_result_bootstrap' uses VIX.iloc[i-1] for X, so we need to ensure this alignment.
    # For daily data, the VIX on day t-1 is conditioning on returns on day t.
    # We will pass 'VIX' as a column in the DataFrame, and the `show_result_bootstrap` function will handle
    # the lag if its current logic expects it.

    return final_dataset

In [None]:
def conditional_correlation_grf(X,y,w , x_test = np.array([ [8.7 + i * 0.1] for i in range(500)])):

  n = X.shape[0]
  n_bootstrap = 100
  n_predict = x_test.shape[0]

  corr_bootstrap = np.array([])
  for i in range(n_bootstrap) :
    print(i)
    indices = np.random.choice(n, int(0.7 * n ), replace=True)

    x_selected = X[indices]
    w_selected = w[indices]
    y_selected = y[indices]

    forest_y_w = CausalForest(n_estimators = 1000 ,criterion = "mse" ,min_samples_leaf=30)
    forest_y_w.fit(x_selected,y_selected,w_selected)

    forest_w_y = CausalForest(n_estimators = 1000 ,criterion = "mse" ,min_samples_leaf=30)
    forest_w_y.fit(x_selected,w_selected,y_selected)

    pred_y_w = forest_y_w.predict(x_test, interval  = False)
    pred_w_y = forest_w_y.predict(x_test, interval  = False)
    pred_y_w = np.squeeze(pred_y_w)
    pred_w_y  = np.squeeze(pred_w_y)
    corr_est = np.sign(pred_y_w)* np.sqrt(np.abs(pred_y_w * pred_w_y))
    corr_bootstrap = np.append(corr_bootstrap ,corr_est)

  return np.reshape(corr_bootstrap,(n_bootstrap,n_predict))


In [None]:
def show_result_bootstrap(name,returns):
  vix = get_data('^VIX')['Close']
  vix.rename('VIX', inplace=True)
  #print(returns)
  df = pd.merge(returns,vix,left_index=True , right_index=True , how = 'inner')
  vix = df['VIX']
  returns = df[returns.columns]
  data =  pd.concat([vix, returns], axis=1).dropna()
  prediction_data = pd.DataFrame()
  #print(data)
  for i in range(10, len(data)):
      row = {
          'x': data['VIX'].iloc[i-1],
          'w' : data['S&P 500'].iloc[i],
          'y': data[name].iloc[i]
      }

      prediction_data = pd.concat([prediction_data, pd.DataFrame([row])], ignore_index=True)

  X = prediction_data.iloc[:,0:1].values
  y = prediction_data.iloc[:,1:2].values
  w = prediction_data.iloc[:,2:3].values

  x_test = np.array([ [8.7 + i * 0.1] for i in range(500)])
  corr_bootstrap  = conditional_correlation_grf(X,y,w , x_test)
  return corr_bootstrap






## Macro hedge fund

In [None]:
name = 'HFRXM'
returns = prepare_hedgfund_and_sp500(name, 0 )
t = show_result_bootstrap( name , returns)

  indexer = self._engine.get_indexer(tgt_values)
  return self._engine.is_unique
  return self._engine.is_monotonic_increasing
  indexer = self._engine.get_indexer(tgt_values)


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99


In [None]:
x_test = np.array([ 8.7 + i * 0.1 for i in range(500)])
data = {'VIX' : x_test , 'estimated correlation' : np.mean(t, axis = 0) ,
      'upper bound':  np.sort(t, axis=0)[2,:]
,

        'lower bound' :np.sort(t, axis=0)[-3,:] }
df = pd.DataFrame(data)

fig = px.line(df, x="VIX", y = df.columns)
fig.update_layout(yaxis_range=[-0.8,1])
fig.update_layout(xaxis=dict(
        title='VIX',
        titlefont=dict(size=24 , family="Times New Roman"),
        tickfont=dict(size=24, family="Times New Roman"),# Set x-axis title font size here
    ),
    yaxis=dict(
        title=r'Conditional Correaltion',
        titlefont=dict(size=30, family="Times New Roman"),
        tickfont=dict(size=24, family="Times New Roman"),
    ),
    legend=dict(
          title='',
          title_font=dict(size=24, family="Times New Roman"),
          font=dict(size=28, family="Times New Roman"),
          bgcolor='rgba(255,255,255,0.5)',
          bordercolor="Black",
          borderwidth=2,
        x=1,  # Positioning on the x-axis (1 is at the far right of the graph)
        xanchor='auto',  # Anchoring the legend
        y=1,  # Positioning on the y-axis (1 is at the top of the graph)
        yanchor='auto'  # Anchoring the legend

    )
)
fig.update_layout(
    width=233 * 6,  # Width of the plot in pixels
    height=92 * 6  # Height of the plot in pixels
)
fig.show()

## systematic diversified hedfunds

In [None]:
name = 'HFRXSDV'
returns = prepare_hedgfund_and_sp500(name, 0 )

t = show_result_bootstrap( name , returns)


x_test = np.array([ 8.7 + i * 0.1 for i in range(500)])
data = {'VIX' : x_test , 'estimated correlation' : np.mean(t, axis = 0) ,
      'upper bound':  np.sort(t, axis=0)[2,:]
,

        'lower bound' :np.sort(t, axis=0)[-3,:] }
df = pd.DataFrame(data)

fig = px.line(df, x="VIX", y = df.columns)
fig.update_layout(yaxis_range=[-0.8,1])
fig.update_layout(xaxis=dict(
        title='VIX',
        titlefont=dict(size=24 , family="Times New Roman"),
        tickfont=dict(size=24, family="Times New Roman"),# Set x-axis title font size here
    ),
    yaxis=dict(
        title=r'Conditional Correaltion',
        titlefont=dict(size=30, family="Times New Roman"),
        tickfont=dict(size=24, family="Times New Roman"),
    ),
    legend=dict(
          title='',
          title_font=dict(size=24, family="Times New Roman"),
          font=dict(size=28, family="Times New Roman"),
          bgcolor='rgba(255,255,255,0.5)',
          bordercolor="Black",
          borderwidth=2,
        x=1,  # Positioning on the x-axis (1 is at the far right of the graph)
        xanchor='auto',  # Anchoring the legend
        y=1,  # Positioning on the y-axis (1 is at the top of the graph)
        yanchor='auto'  # Anchoring the legend

    )
)
fig.update_layout(
    width=233 * 6,  # Width of the plot in pixels
    height=92 * 6  # Height of the plot in pixels
)
fig.show()

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99


## market nuetral hedge funds

In [None]:
name = 'HFRXEMN'
returns = prepare_hedgfund_and_sp500(name, 0 )

t = show_result_bootstrap( name , returns)


x_test = np.array([ 8.7 + i * 0.1 for i in range(500)])
data = {'VIX' : x_test , 'estimated correlation' : np.mean(t, axis = 0) ,
      'upper bound':  np.sort(t, axis=0)[2,:]
,

        'lower bound' :np.sort(t, axis=0)[-3,:] }
df = pd.DataFrame(data)

fig = px.line(df, x="VIX", y = df.columns)
fig.update_layout(yaxis_range=[-0.8,1])
fig.update_layout(xaxis=dict(
        title='VIX',
        titlefont=dict(size=24 , family="Times New Roman"),
        tickfont=dict(size=24, family="Times New Roman"),# Set x-axis title font size here
    ),
    yaxis=dict(
        title=r'Conditional Correaltion',
        titlefont=dict(size=30, family="Times New Roman"),
        tickfont=dict(size=24, family="Times New Roman"),
    ),
    legend=dict(
          title='',
          title_font=dict(size=24, family="Times New Roman"),
          font=dict(size=28, family="Times New Roman"),
          bgcolor='rgba(255,255,255,0.5)',
          bordercolor="Black",
          borderwidth=2,
        x=1,  # Positioning on the x-axis (1 is at the far right of the graph)
        xanchor='auto',  # Anchoring the legend
        y=1,  # Positioning on the y-axis (1 is at the top of the graph)
        yanchor='auto'  # Anchoring the legend

    )
)
fig.update_layout(
    width=233 * 6,  # Width of the plot in pixels
    height=92 * 6  # Height of the plot in pixels
)
fig.show()

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75


## merger arbitrage

In [None]:
name = 'HFRXMA'
returns = prepare_hedgfund_and_sp500(name, 0 )

t = show_result_bootstrap( name , returns)


x_test = np.array([ 8.7 + i * 0.1 for i in range(500)])
data = {'VIX' : x_test , 'estimated correlation' : np.mean(t, axis = 0) ,
      'upper bound':  np.sort(t, axis=0)[2,:]
,

        'lower bound' :np.sort(t, axis=0)[-3,:] }
df = pd.DataFrame(data)

fig = px.line(df, x="VIX", y = df.columns)
fig.update_layout(yaxis_range=[-0.8,1])
fig.update_layout(xaxis=dict(
        title='VIX',
        titlefont=dict(size=24 , family="Times New Roman"),
        tickfont=dict(size=24, family="Times New Roman"),# Set x-axis title font size here
    ),
    yaxis=dict(
        title=r'Conditional Correaltion',
        titlefont=dict(size=30, family="Times New Roman"),
        tickfont=dict(size=24, family="Times New Roman"),
    ),
    legend=dict(
          title='',
          title_font=dict(size=24, family="Times New Roman"),
          font=dict(size=28, family="Times New Roman"),
          bgcolor='rgba(255,255,255,0.5)',
          bordercolor="Black",
          borderwidth=2,
        x=1,  # Positioning on the x-axis (1 is at the far right of the graph)
        xanchor='auto',  # Anchoring the legend
        y=0,  # Positioning on the y-axis (1 is at the top of the graph)
        yanchor='auto'  # Anchoring the legend

    )
)
fig.update_layout(
    width=233 * 6,  # Width of the plot in pixels
    height=92 * 6  # Height of the plot in pixels
)
fig.show()

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99


## market directional

In [None]:
name = 'HFRXMD'
returns = prepare_hedgfund_and_sp500(name, 0 )

t = show_result_bootstrap( name , returns)


x_test = np.array([ 8.7 + i * 0.1 for i in range(500)])
data = {'VIX' : x_test , 'estimated correlation' : np.mean(t, axis = 0) ,
      'upper bound':  np.sort(t, axis=0)[2,:]
,

        'lower bound' :np.sort(t, axis=0)[-3,:] }
df = pd.DataFrame(data)

fig = px.line(df, x="VIX", y = df.columns)
fig.update_layout(yaxis_range=[-0.8,1])
fig.update_layout(xaxis=dict(
        title='VIX',
        titlefont=dict(size=24 , family="Times New Roman"),
        tickfont=dict(size=24, family="Times New Roman"),# Set x-axis title font size here
    ),
    yaxis=dict(
        title=r'Conditional Correaltion',
        titlefont=dict(size=30, family="Times New Roman"),
        tickfont=dict(size=24, family="Times New Roman"),
    ),
    legend=dict(
          title='',
          title_font=dict(size=24, family="Times New Roman"),
          font=dict(size=28, family="Times New Roman"),
          bgcolor='rgba(255,255,255,0.5)',
          bordercolor="Black",
          borderwidth=2,
        x=1,  # Positioning on the x-axis (1 is at the far right of the graph)
        xanchor='auto',  # Anchoring the legend
        y=0,  # Positioning on the y-axis (1 is at the top of the graph)
        yanchor='auto'  # Anchoring the legend

    )
)
fig.update_layout(
    width=233 * 6,  # Width of the plot in pixels
    height=92 * 6  # Height of the plot in pixels
)
fig.show()

FileNotFoundError: [Errno 2] No such file or directory: 'HFRX_historical_HFRXMD.csv'