---
# **SWEFI: Applications to Macroeconomics**

---

In [2]:
from stability_weighted_ensemble_feature_importance import *
from synthetic_dataset_generation import *

In [24]:
import numpy as np
import pandas as pd
import yfinance as yf
from datetime import datetime

# ------------------------------------------------------------------------------
# Assumption: SWEFI and UAMeasure are imported from your library.
# For example:
# from my_swefi_module import SWEFI, UAMeasure
# ------------------------------------------------------------------------------

def load_and_process_hf_file(file_path, skip_rows=4):
    """
    Loads a hedge fund CSV file by skipping extra header rows.
    Then:
      - Keeps only the 'Date' and 'Daily ROR' columns,
      - Parses the 'Daily ROR' column (removing '%' and converting to float),
      - Converts the 'Date' column to datetime (invalid dates become NaT),
      - Drops rows missing Date or Daily ROR,
      - Normalizes the Date index (time set to midnight) and sorts.
    """
    df = pd.read_csv(file_path, skiprows=skip_rows)
    df = df[['Date', 'Daily ROR']]
    
    def parse_ror(x):
        try:
            if isinstance(x, str):
                return float(x.strip().strip('%')) / 100.0
            return float(x)
        except Exception:
            return np.nan

    df['Daily ROR'] = df['Daily ROR'].apply(parse_ror)
    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
    df = df.dropna(subset=['Date', 'Daily ROR'])
    df.set_index('Date', inplace=True)
    df.index = df.index.normalize()
    df.sort_index(inplace=True)
    return df

# ------------------ Load Hedge Fund Data ------------------
file_paths = {
    'HFRXEMN': 'hedgefund_data/HFRX_historical_HFRXEMN.csv',
    'HFRXM':   'hedgefund_data/HFRX_historical_HFRXM.csv',
    'HFRXMA':  'hedgefund_data/HFRX_historical_HFRXMA.csv',
    'HFRXMD':  'hedgefund_data/HFRX_historical_HFRXMD.csv',
    'HFRXSDV': 'hedgefund_data/HFRX_historical_HFRXSDV.csv'
}

hfrx_emn = load_and_process_hf_file(file_paths['HFRXEMN'], skip_rows=4)
hfrx_macro_cta = load_and_process_hf_file(file_paths['HFRXM'], skip_rows=4)
hfrx_ed = load_and_process_hf_file(file_paths['HFRXMA'], skip_rows=4)
hfrx_md = load_and_process_hf_file(file_paths['HFRXMD'], skip_rows=4)
hfrx_sdv = load_and_process_hf_file(file_paths['HFRXSDV'], skip_rows=4)

# Rename columns for clarity.
hfrx_emn.rename(columns={'Daily ROR': 'HFRXEMN_ROR'}, inplace=True)
hfrx_macro_cta.rename(columns={'Daily ROR': 'HFRXM_ROR'}, inplace=True)
hfrx_ed.rename(columns={'Daily ROR': 'HFRXMA_ROR'}, inplace=True)
hfrx_md.rename(columns={'Daily ROR': 'HFRXMD_ROR'}, inplace=True)
hfrx_sdv.rename(columns={'Daily ROR': 'HFRXSDV_ROR'}, inplace=True)

# ------------------ Download Public Data (S&P500 and VIX) ------------------
# Use hedge fund data's date range as reference.
hf_start = hfrx_emn.index.min()
hf_end   = hfrx_emn.index.max()
start_date = hf_start.strftime('%Y-%m-%d')
end_date   = hf_end.strftime('%Y-%m-%d')

print("Hedge fund date range:", hf_start, "to", hf_end)

# Download SP500 data.
sp500_data = yf.download("^GSPC", start=start_date, end=end_date, interval="1d")
print("Raw SP500 data date range:", sp500_data.index.min(), "to", sp500_data.index.max())
# If columns are MultiIndex, flatten them.
if isinstance(sp500_data.columns, pd.MultiIndex):
    sp500_data.columns = sp500_data.columns.get_level_values(1)
    
if 'Adj Close' in sp500_data.columns:
    sp500 = sp500_data['Adj Close']
elif 'Close' in sp500_data.columns:
    sp500 = sp500_data['Close']
else:
    print("SP500 data columns:", sp500_data.columns.tolist())
    sp500 = sp500_data.iloc[:, 0]  # default to the first column

sp500 = sp500.sort_index().astype('float64')
print("First 5 rows of SP500:\n", sp500.head())
sp500_return = sp500.pct_change().fillna(method='ffill').fillna(method='bfill')
print("First 5 rows of SP500_Return:\n", sp500_return.head())
sp500_return.name = "SP500_Return"
sp500_return_df = pd.DataFrame(sp500_return)
sp500_return_df.index = sp500_return_df.index.normalize()

# Download VIX data.
vix_data = yf.download("^VIX", start=start_date, end=end_date, interval="1d")
print("Raw VIX data date range:", vix_data.index.min(), "to", vix_data.index.max())
if isinstance(vix_data.columns, pd.MultiIndex):
    vix_data.columns = vix_data.columns.get_level_values(1)
    
if 'Adj Close' in vix_data.columns:
    vix = vix_data['Adj Close']
elif 'Close' in vix_data.columns:
    vix = vix_data['Close']
else:
    print("VIX data columns:", vix_data.columns.tolist())
    vix = vix_data.iloc[:, 0]
vix = vix.sort_index().astype('float64')
vix_return = vix.pct_change().fillna(method='ffill').fillna(method='bfill')
vix_return.name = "VIX_ROR"
vix_return_df = pd.DataFrame(vix_return)
vix_return_df.index = vix_return_df.index.normalize()

# ------------------ Merge Datasets ------------------
hedge_fund_df = pd.concat([hfrx_emn, hfrx_macro_cta, hfrx_ed, hfrx_md, hfrx_sdv], axis=1)
# df = hedge_fund_df.join(sp500_return_df, how='inner').join(vix_return_df, how='inner')
df = hedge_fund_df.join(sp500_return_df, how='inner')
print("Merged DataFrame columns:", df.columns.tolist())
print("Merged DataFrame date range:", df.index.min(), "to", df.index.max())

# critical_columns = ['HFRXEMN_ROR', 'HFRXM_ROR', 'HFRXMA_ROR', 'HFRXMD_ROR', 'HFRXSDV_ROR', 'SP500_Return', 'VIX_ROR']
critical_columns = ['HFRXEMN_ROR', 'HFRXM_ROR', 'HFRXMA_ROR', 'HFRXMD_ROR', 'HFRXSDV_ROR', 'SP500_Return']
df.dropna(subset=critical_columns, inplace=True)

# ------------------ Define Features and Response ------------------
if "SP500_Return" not in df.columns:
    raise KeyError("SP500_Return column not found in merged DataFrame. Check date ranges and column names.")

df['Direction'] = np.where(df['SP500_Return'] > 0, 1, 0)
print("Target value counts:\n", df['Direction'].value_counts())
print("SP500_Return stats:\n", df['SP500_Return'].describe())

if df['Direction'].empty:
    raise ValueError("The target variable is empty after processing. Check the SP500_Return values.")

start_analysis_date = pd.to_datetime("2020-01-01")
end_analysis_date = pd.to_datetime("2021-01-01")
df = df.loc[start_analysis_date:end_analysis_date]

X = df.drop(columns=['SP500_Return', 'Direction'])
y = df['Direction']

# ------------------ Apply the SWEFI Algorithm ------------------
select_n_model = 5
bootstrap_method = SWEFI.stationary_bootstrap  # Adjust per your implementation.
hpo_n_fold = 4
hpo_n_iter = 25
hpo_metric = 'AUC'
hpo_search_library = 'scikit-optimize'
hpo_search_algorithm = 'bayesian'
n_iteration = 5
percentage = 0.6

swefi_hf = SWEFI(X, y, n_fold=10)
# Override the internal PyCaret setup.
swefi_hf.clfx.setup(data=X, target=y, fold=10, train_size=0.8, data_split_stratify=False,
                    session_id=123, n_jobs=-1, normalize=True, normalize_method='zscore')

swefi_hf.select_models(select_n_model=select_n_model)
swefi_hf.fine_tune_selected_models(
    hpo_n_fold=hpo_n_fold,
    hpo_n_iter=hpo_n_iter,
    hpo_metric=hpo_metric,
    hpo_search_algorithm=hpo_search_algorithm,
    hpo_search_library=hpo_search_library
)
swefi_hf.select_univariate_analysis_measurements(measurements=[
    UAMeasure.MUTUAL_INFORMATION.value,
    UAMeasure.ANOVA_F.value,
])
swefi_hf.compute_feature_importance_data(
    bootstrap_method=bootstrap_method,
    n_iteration=n_iteration,
    n_repeats=10
)
swefi_hf.compute_swefi_scores(percentage=percentage)
swefi_scores_hf = swefi_hf.get_swefi_scores()

swefi_scores_hf.to_csv('swefi_hedge_fund_results.csv')
print("SWEFI analysis on hedge fund data completed successfully!")


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Hedge fund date range: 2003-03-31 00:00:00 to 2023-08-23 00:00:00
Raw SP500 data date range: 2003-03-31 00:00:00 to 2023-08-22 00:00:00
SP500 data columns: ['^GSPC', '^GSPC', '^GSPC', '^GSPC', '^GSPC']
First 5 rows of SP500:
 Date
2003-03-31    848.179993
2003-04-01    858.479980
2003-04-02    880.900024
2003-04-03    876.450012
2003-04-04    878.849976
Name: ^GSPC, dtype: float64
First 5 rows of SP500_Return:
 Date
2003-03-31    0.012144
2003-04-01    0.012144
2003-04-02    0.026116
2003-04-03   -0.005052
2003-04-04    0.002738
Name: ^GSPC, dtype: float64
Raw VIX data date range: 2003-03-31 00:00:00 to 2023-08-22 00:00:00
VIX data columns: ['^VIX', '^VIX', '^VIX', '^VIX', '^VIX']
Merged DataFrame columns: ['HFRXEMN_ROR', 'HFRXM_ROR', 'HFRXMA_ROR', 'HFRXMD_ROR', 'HFRXSDV_ROR', 'SP500_Return']
Merged DataFrame date range: 2003-03-31 00:00:00 to 2023-08-22 00:00:00
Target value counts:
 Direction
1    2019
0    1706
Name: count, dtype: int64
SP500_Return stats:
 count    3725.000000
mean




Unnamed: 0,Description,Value
0,Session id,123
1,Target,Direction
2,Target type,Binary
3,Original data shape,"(253, 6)"
4,Transformed data shape,"(253, 6)"
5,Transformed train set shape,"(250, 6)"
6,Transformed test set shape,"(3, 6)"
7,Numeric features,5
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Description,Value
0,Session id,123
1,Target,Direction
2,Target type,Binary
3,Original data shape,"(253, 6)"
4,Transformed data shape,"(253, 6)"
5,Transformed train set shape,"(202, 6)"
6,Transformed test set shape,"(51, 6)"
7,Numeric features,5
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
0,Extra Trees Classifier,0.7026,0.7272,0.7439,0.732,0.7348,0.3959,0.4006,0.829
1,MLP Classifier,0.6983,0.7244,0.7439,0.7265,0.7343,0.3851,0.3864,0.158
9,SVM - Radial Kernel,0.6981,0.7411,0.8318,0.6947,0.7537,0.3703,0.3872,0.004
6,Random Forest Classifier,0.6979,0.7305,0.7091,0.7508,0.7248,0.3896,0.3962,0.021
8,Gradient Boosting Classifier,0.6971,0.7348,0.7598,0.7266,0.7373,0.3783,0.3875,0.011
5,Logistic Regression,0.6929,0.7441,0.8045,0.6967,0.7448,0.362,0.3717,0.003
4,Linear Discriminant Analysis,0.6679,0.7262,0.8568,0.6548,0.7411,0.2976,0.3265,0.005
7,Ridge Classifier,0.6679,0.731,0.8568,0.6548,0.7411,0.2976,0.3265,0.006
3,Decision Tree Classifier,0.6486,0.6489,0.6477,0.7139,0.6738,0.2931,0.301,0.116
2,Quadratic Discriminant Analysis,0.6143,0.6217,0.8242,0.6176,0.7047,0.1787,0.202,0.154


--------------------------------------------------------------------------------
ExtraTreesClassifier


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6863,0.7555,0.6552,0.76,0.7037,0.3742,0.3789
1,0.8235,0.8401,0.8571,0.8276,0.8421,0.6422,0.6428
2,0.68,0.8003,0.75,0.7,0.7241,0.3443,0.3454
3,0.72,0.7273,0.8214,0.7188,0.7667,0.4205,0.4264
Mean,0.7275,0.7808,0.7709,0.7516,0.7592,0.4453,0.4484
Std,0.0575,0.043,0.0772,0.049,0.053,0.1169,0.1159


Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fi

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6863,0.779,0.6207,0.7826,0.6923,0.3809,0.3916
1,0.8627,0.8882,0.8929,0.8621,0.8772,0.7217,0.7223
2,0.58,0.6558,0.7857,0.5946,0.6769,0.1087,0.1176
3,0.76,0.7597,0.7857,0.7857,0.7857,0.513,0.513
Mean,0.7223,0.7707,0.7712,0.7562,0.758,0.4311,0.4361
Std,0.1033,0.0824,0.0973,0.0986,0.0804,0.2223,0.2187


Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fi

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7059,0.768,0.8276,0.7059,0.7619,0.3836,0.3919
1,0.7647,0.8028,0.8571,0.75,0.8,0.5174,0.5242
2,0.68,0.7338,0.8929,0.6579,0.7576,0.3174,0.3509
3,0.7,0.7013,0.7857,0.7097,0.7458,0.3822,0.3852
Mean,0.7126,0.7515,0.8408,0.7059,0.7663,0.4001,0.4131
Std,0.0316,0.0379,0.0393,0.0326,0.0203,0.0728,0.066


Fitting 4 folds for each of 1 candidates, totalling 4 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).
--------------------------------------------------------------------------------
RandomForestClassifier


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.7255,0.7524,0.7931,0.7419,0.7667,0.4342,0.4357
1,0.8039,0.8447,0.8571,0.8,0.8276,0.6009,0.6029
2,0.66,0.7865,0.6786,0.7037,0.6909,0.3134,0.3137
3,0.72,0.7654,0.75,0.75,0.75,0.4318,0.4318
Mean,0.7274,0.7873,0.7697,0.7489,0.7588,0.4451,0.446
Std,0.0511,0.0353,0.065,0.0343,0.0487,0.1024,0.103


Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fits
Fitting 4 folds for each of 1 candidates, totalling 4 fi

  0%|          | 0/5 [00:00<?, ?it/s]

SWEFI analysis on hedge fund data completed successfully!


In [4]:
import pandas as pd
import plotly.graph_objects as go
import yfinance as yf

# ------------------ Helper Function to Load Hedge Fund Price Data ------------------
def load_hf_price_file(file_path, skip_rows=4):
    """
    Loads a hedge fund CSV file (e.g. HFRXEMN) by skipping extra header rows.
    Returns a DataFrame with the 'Index Value' column (price level) and Date as index.
    """
    df = pd.read_csv(file_path, skiprows=skip_rows)
    # Keep only the 'Date' and 'Index Value' columns.
    df = df[['Date', 'Index Value']]
    # Convert Date to datetime.
    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
    # Drop rows with missing Date or Index Value.
    df = df.dropna(subset=['Date', 'Index Value'])
    # Remove any commas and convert the Index Value to float.
    df['Index Value'] = df['Index Value'].astype(str).str.replace(',', '').astype(float)
    df.set_index('Date', inplace=True)
    # Normalize index to date only (time=00:00:00).
    df.index = df.index.normalize()
    df.sort_index(inplace=True)
    return df

# ------------------ Load HFRXEMN Price Data ------------------
hf_price = load_hf_price_file('hedgefund_data/HFRX_historical_HFRXEMN.csv', skip_rows=4)
hf_price.rename(columns={'Index Value': 'HFRXEMN_Price'}, inplace=True)

# ------------------ Download S&P500 Price Data ------------------
# Use the same date range as the hedge fund data.
start_date = hf_price.index.min().strftime('%Y-%m-%d')
end_date   = hf_price.index.max().strftime('%Y-%m-%d')
sp500_data = yf.download("^GSPC", start=start_date, end=end_date, interval="1d")

# If the downloaded DataFrame has a MultiIndex in columns, flatten it.
if isinstance(sp500_data.columns, pd.MultiIndex):
    sp500_data.columns = sp500_data.columns.get_level_values(1)

# Use 'Adj Close' if available, otherwise 'Close'.  
if 'Adj Close' in sp500_data.columns:
    sp500_price = sp500_data['Adj Close']
elif 'Close' in sp500_data.columns:
    sp500_price = sp500_data['Close']
else:
    print("Available SP500 columns:", sp500_data.columns.tolist())
    sp500_price = sp500_data.iloc[:, 0]

sp500_price = sp500_price.sort_index().astype('float64')
sp500_price.index = sp500_price.index.normalize()

# ------------------ Download VIX Price Data ------------------
vix_data = yf.download("^VIX", start=start_date, end=end_date, interval="1d")

if isinstance(vix_data.columns, pd.MultiIndex):
    vix_data.columns = vix_data.columns.get_level_values(1)

if 'Adj Close' in vix_data.columns:
    vix_price = vix_data['Adj Close']
elif 'Close' in vix_data.columns:
    vix_price = vix_data['Close']
else:
    print("Available VIX columns:", vix_data.columns.tolist())
    vix_price = vix_data.iloc[:, 0]

vix_price = vix_price.sort_index().astype('float64')
vix_price.index = vix_price.index.normalize()

# ------------------ Print Date Ranges for Verification ------------------
print("HFRXEMN Price date range:", hf_price.index.min(), "to", hf_price.index.max())
print("SP500 Price date range:", sp500_price.index.min(), "to", sp500_price.index.max())
print("VIX Price date range:", vix_price.index.min(), "to", vix_price.index.max())

# ------------------ Define Periods Based on Common Dates ------------------
# Compute the common index between HF and SP500.
common_index = hf_price.index.intersection(sp500_price.index)

# Define two periods:
period1_mask = (common_index >= '2003-03-31') #& (common_index <= '2017-12-31')
# period2_mask = (common_index >= '2018-01-01') & (common_index <= end_date)

common_index_period1 = common_index[period1_mask]
# common_index_period2 = common_index[period2_mask]

hf_price_period1 = hf_price.loc[common_index_period1]
# hf_price_period2 = hf_price.loc[common_index_period2]
sp500_price_period1 = sp500_price.loc[common_index_period1]
# sp500_price_period2 = sp500_price.loc[common_index_period2]

# ------------------ Figure 1: S&P500 Price and VIX Price (Dual Axis) ------------------
fig_sp_vix = go.Figure()

fig_sp_vix.add_trace(go.Scatter(
    x=sp500_price.index,
    y=sp500_price,
    mode='lines',
    name='S&P500 Price',
    line=dict(color='blue')
))

fig_sp_vix.add_trace(go.Scatter(
    x=vix_price.index,
    y=vix_price,
    mode='lines',
    name='VIX Price',
    line=dict(color='red'),
    yaxis="y2"
))

fig_sp_vix.update_layout(
    title="S&P500 Price and VIX Price Over Time",
    xaxis_title="Date",
    yaxis=dict(
        title="S&P500 Price",
        titlefont=dict(color="blue"),
        tickfont=dict(color="blue")
    ),
    yaxis2=dict(
        title="VIX Price",
        titlefont=dict(color="red"),
        tickfont=dict(color="red"),
        overlaying="y",
        side="right"
    ),
    legend=dict(x=0.01, y=0.99)
)
fig_sp_vix.show()

# ------------------ Figure 2: S&P500 Price and HFRXEMN Price ------------------
fig_period1 = go.Figure()

fig_period1.add_trace(go.Scatter(
    x=sp500_price_period1.index,
    y=sp500_price_period1,
    mode='lines',
    name='S&P500 Price (2003-2017)',
    line=dict(color='blue')
))

fig_period1.add_trace(go.Scatter(
    x=hf_price_period1.index,
    y=hf_price_period1['HFRXEMN_Price'],
    mode='lines',
    name='HFRXEMN Price (2003-2017)',
    line=dict(color='red'),
    yaxis="y2"
))

fig_period1.update_layout(
    title="S&P500 Price and HFRXEMN Price (2003-2017)",
    xaxis_title="Date",
    yaxis=dict(
        title="S&P500 Price",
        titlefont=dict(color="blue"),
        tickfont=dict(color="blue")
    ),
    yaxis2=dict(
        title="HFRXEMN Price",
        titlefont=dict(color="red"),
        tickfont=dict(color="red"),
        overlaying="y",
        side="right"
    ),
    legend=dict(x=0.01, y=0.99)
)
fig_period1.show()



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Available SP500 columns: ['^GSPC', '^GSPC', '^GSPC', '^GSPC', '^GSPC']
Available VIX columns: ['^VIX', '^VIX', '^VIX', '^VIX', '^VIX']
HFRXEMN Price date range: 2003-03-31 00:00:00 to 2023-08-23 00:00:00
SP500 Price date range: 2003-03-31 00:00:00 to 2023-08-22 00:00:00
VIX Price date range: 2003-03-31 00:00:00 to 2023-08-22 00:00:00





In [26]:
# ------------------ Figure 3: Feature Importance Scores from SWEFI ------------------
# Here, we plot the mean feature importance scores with error bars showing standard deviation.
# Adjust the column names if your SWEFI result DataFrame differs.

# swefi_hf.compute_swefi_scores(percentage=percentage)
swefi_scores_hf = swefi_hf.get_swefi_scores()

# swefi_scores_hf.to_csv('swefi_hedge_fund_results.csv')
# print("SWEFI analysis on hedge fund data completed successfully!")

fig3 = go.Figure(data=[
    go.Bar(
        name="SWEFI Score",
        x=swefi_scores_hf.index,
        y=swefi_scores_hf['mean(SWEFI)'],
        error_y=dict(
            type='data',
            array=swefi_scores_hf['std(SWEFI)']
        )
    )
])

fig3.update_layout(
    title="Feature Importance Scores from SWEFI",
    xaxis_title="Feature",
    yaxis_title="Mean SWEFI Score",
    barmode='group',
    xaxis=dict(
        tickangle=45,
        tickfont=dict(
            family='Arial',
            size=12,
            color='black'
        )
    ),
    yaxis=dict(
        titlefont=dict(
            family='Arial',
            size=14,
            color='black'
        )
    ),
    margin=dict(l=50, r=50, b=100, t=100),
    paper_bgcolor='white',
    plot_bgcolor='lightgrey'
)
fig3.show()

In [None]:
# import plotly.graph_objects as go

features = ["HFRXEMN_ROR", "HFRXM_ROR", "HFRXSDV_ROR", "HFRXMA_ROR", "HFRXMD_ROR"]

# The followign are hard coded values from the SWEFI analysis.
# There are two periods: from 2019 to 2020 and then from 2020 to 2021.  
# The first period is considered a normal period and the second a crisis period.

normal_scores = [0.01, 0.12, 0.20, 0.23, 0.44]
crisis_scores = [0.03, 0.04, 0.15, 0.20, 0.55]

normal_errors = [0.005, 0.01, 0.015, 0.015, 0.02]
crisis_errors = [0.005, 0.005, 0.02, 0.02, 0.02]

fig = go.Figure()

fig.add_trace(go.Bar(
    name='Normal Period',
    x=features,
    y=normal_scores,
    error_y=dict(type='data', array=normal_errors, visible=True),
    marker_color='skyblue'
))

fig.add_trace(go.Bar(
    name='Crisis Period',
    x=features,
    y=crisis_scores,
    error_y=dict(type='data', array=crisis_errors, visible=True),
    marker_color='dodgerblue'
))

fig.update_layout(
    barmode='group',
    title='Hedge Fund Feature Importance Scores from SWEFI (Normal vs Crisis Period)',
    xaxis_title='Feature',
    yaxis_title='Mean SWEFI Score',
    xaxis_tickangle=-45,
    legend_title_text='Period',
    template='plotly_white',
    height=600,
    width=900
)

fig.show()
