In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

# Function to calculate Newey-West standard error
def newey_west_variance(differences, lag=1):
    """
    Computes the Newey-West variance for the series of differences.
    
    Parameters:
    - differences: Array-like of differences (e.g., error differences between two models)
    - lag: Maximum lag to use for the Newey-West estimator (default is 1)
    
    Returns:
    - Newey-West variance of the differences
    """
    # Create a simple OLS model for the differences
    X = np.ones(len(differences))  # Constant term
    ols_model = sm.OLS(differences, X).fit(cov_type='HAC', cov_kwds={'maxlags': lag})
    
    # Return the estimated variance (squared standard error of the constant term)
    nw_variance = ols_model.bse[0]**2
    return nw_variance

# Function to compute the DM test statistic
def dm_test_statistic(errors1, errors2, lag=1):
    """
    Computes the Diebold-Mariano test statistic between two sets of errors.
    
    Parameters:
    - errors1: First set of errors (e.g., from the first model)
    - errors2: Second set of errors (e.g., from the second model)
    - lag: Maximum lag to use for the Newey-West estimator (default is 1)
    
    Returns:
    - DM test statistic
    """
    # Calculate the error differences
    differences = errors1 - errors2
    
    # Calculate the average difference (mean of d_t)
    mean_diff = np.mean(differences)
    
    # Calculate Newey-West variance
    nw_variance = newey_west_variance(differences, lag)
    
    # Compute the DM test statistic
    dm_statistic = mean_diff / np.sqrt(nw_variance / len(differences))
    
    return dm_statistic

# Load the CSV files
dm_test_data = pd.read_csv('/Users/sbjpipers/Desktop/FinalThesisQF/FinalThesisQF/Notebooks/performence_evaluation/dm_test_errors_ols.csv')
adhbs_test_data = pd.read_csv('/Users/sbjpipers/Desktop/FinalThesisQF/FinalThesisQF/Notebooks/performence_evaluation/dm_test_errors_adhbs.csv')

# Ensure both dataframes have the same length (if not, pad with zeros)
max_length = max(len(dm_test_data), len(adhbs_test_data))

dm_test_data = dm_test_data.reindex(range(max_length), fill_value=0)
adhbs_test_data = adhbs_test_data.reindex(range(max_length), fill_value=0)

# Calculate DM test statistic for Call errors
dm_call_errors = dm_test_data['Call Errors']
adhbs_call_errors = adhbs_test_data['Call Errors']

dm_statistic_call = dm_test_statistic(dm_call_errors, adhbs_call_errors, lag=1)

# Calculate DM test statistic for Put errors
dm_put_errors = dm_test_data['Put Errors']
adhbs_put_errors = adhbs_test_data['Put Errors']

dm_statistic_put = dm_test_statistic(dm_put_errors, adhbs_put_errors, lag=1)

# Calculate the average of the two DM test statistics
dm_statistic_avg = (dm_statistic_call + dm_statistic_put) / 2

# Output the DM test statistics
print(f"DM Test Statistic for Calls: {dm_statistic_call}")
print(f"DM Test Statistic for Puts: {dm_statistic_put}")
print(f"Average DM Test Statistic: {dm_statistic_avg}")


DM Test Statistic for Calls: 7175.329374924573
DM Test Statistic for Puts: 12101.872043841688
Average DM Test Statistic: 9638.60070938313


  nw_variance = ols_model.bse[0]**2


In [33]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

# Function to calculate Newey-West standard error
def newey_west_variance(differences, lag=1):
    """
    Computes the Newey-West variance for the series of differences.
    
    Parameters:
    - differences: Array-like of differences (e.g., error differences between two models)
    - lag: Maximum lag to use for the Newey-West estimator (default is 1)
    
    Returns:
    - Newey-West variance of the differences
    """
    # Create a simple OLS model for the differences
    X = np.ones(len(differences))  # Constant term
    ols_model = sm.OLS(differences, X).fit(cov_type='HAC', cov_kwds={'maxlags': lag})
    
    # Return the estimated variance (squared standard error of the constant term)
    nw_variance = ols_model.bse[0]**2
    return nw_variance

# Function to compute the DM test statistic
def dm_test_statistic(errors1, errors2, lag=1):
    """
    Computes the Diebold-Mariano test statistic between two sets of errors.
    
    Parameters:
    - errors1: First set of errors (e.g., from the first model)
    - errors2: Second set of errors (e.g., from the second model)
    - lag: Maximum lag to use for the Newey-West estimator (default is 1)
    
    Returns:
    - DM test statistic
    """
    # Calculate the error differences
    # Step 1: Square both sets of errors
    squared_errors1 = np.square(errors1)
    squared_errors2 = np.square(errors2)

    # Step 2: Calculate the difference between the squared errors
    squared_diff = squared_errors1 - squared_errors2

    # Step 3: Calculate the average difference (mean of d_t)
    mean_diff = np.mean(squared_diff)
    print(mean_diff)
    # Calculate Newey-West variance
    nw_variance = newey_west_variance(squared_diff, lag)
    print(nw_variance)

    # Compute the DM test statistic
    dm_statistic = mean_diff / np.sqrt(nw_variance)
    
    return dm_statistic

# Load the CSV files
adhbs_dm_test_data = pd.read_csv('/Users/sbjpipers/Desktop/FinalThesisQF/FinalThesisQF/Notebooks/performence_evaluation/dm_test_errors_adhbs.csv')
ols_dm_test_data = pd.read_csv('/Users/sbjpipers/Desktop/FinalThesisQF/FinalThesisQF/Notebooks/performence_evaluation/dm_test_errors_ols.csv')
enet_dm_test_data = pd.read_csv('/Users/sbjpipers/Desktop/FinalThesisQF/FinalThesisQF/Notebooks/performence_evaluation/dm_test_errors_enet.csv')
rf_dm_test_data = pd.read_csv('/Users/sbjpipers/Desktop/FinalThesisQF/FinalThesisQF/Notebooks/performence_evaluation/dm_test_errors_rf.csv')
xgb_dm_test_data = pd.read_csv('/Users/sbjpipers/Desktop/FinalThesisQF/FinalThesisQF/Notebooks/performence_evaluation/dm_test_errors_xgb.csv')
nn_dm_test_data = pd.read_csv('/Users/sbjpipers/Desktop/FinalThesisQF/FinalThesisQF/Notebooks/performence_evaluation/dm_test_errors_nn.csv')

# Ensure both dataframes have the same length (if not, pad with zeros)
max_length = max(len(dm_test_data), len(adhbs_test_data))

dm_test_data = dm_test_data.reindex(range(max_length), fill_value=0)
adhbs_test_data = adhbs_test_data.reindex(range(max_length), fill_value=0)

# Calculate DM test statistic for Call errors
dm_call_errors = dm_test_data['Call Errors']
adhbs_call_errors = adhbs_test_data['Call Errors']

dm_statistic_call = dm_test_statistic(dm_call_errors, adhbs_call_errors, lag=1)

# Calculate DM test statistic for Put errors
dm_put_errors = dm_test_data['Put Errors']
adhbs_put_errors = adhbs_test_data['Put Errors']

dm_statistic_put = dm_test_statistic(dm_put_errors, adhbs_put_errors, lag=1)

# Calculate the average of the two DM test statistics
dm_statistic_avg = (dm_statistic_call + dm_statistic_put) / 2

# Output the DM test statistics
print(f"DM Test Statistic for Calls: {dm_statistic_call}")
print(f"DM Test Statistic for Puts: {dm_statistic_put}")
print(f"Average DM Test Statistic: {dm_statistic_avg}")


0.02048935609173135
7.202397430647932e-06
0.02788108710405005
8.371082981263438e-06
DM Test Statistic for Calls: 7.634661197209441
DM Test Statistic for Puts: 9.636490102765851
Average DM Test Statistic: 8.635575649987647


  nw_variance = ols_model.bse[0]**2


In [37]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

# Function to calculate Newey-West standard error
def newey_west_variance(differences, lag=1):
    """
    Computes the Newey-West variance for the series of differences.
    
    Parameters:
    - differences: Array-like of differences (e.g., error differences between two models)
    - lag: Maximum lag to use for the Newey-West estimator (default is 1)
    
    Returns:
    - Newey-West variance of the differences
    """
    # Create a simple OLS model for the differences
    X = np.ones(len(differences))  # Constant term
    ols_model = sm.OLS(differences, X).fit(cov_type='HAC', cov_kwds={'maxlags': lag})
    
    # Return the estimated variance (squared standard error of the constant term)
    nw_variance = ols_model.bse[0]**2
    return nw_variance

# Function to compute the DM test statistic
def dm_test_statistic(errors1, errors2, lag=1):
    """
    Computes the Diebold-Mariano test statistic between two sets of errors.
    
    Parameters:
    - errors1: First set of errors (e.g., from the first model)
    - errors2: Second set of errors (e.g., from the second model)
    - lag: Maximum lag to use for the Newey-West estimator (default is 1)
    
    Returns:
    - DM test statistic
    """
    # Step 1: Square both sets of errors
    squared_errors1 = np.square(errors1)
    squared_errors2 = np.square(errors2)

    # Step 2: Calculate the difference between the squared errors
    squared_diff = squared_errors1 - squared_errors2

    # Step 3: Calculate the average difference (mean of d_t)
    mean_diff = np.mean(squared_diff)

    # Step 4: Calculate Newey-West variance
    nw_variance = newey_west_variance(squared_diff, lag)

    # Step 5: Compute the DM test statistic
    dm_statistic = mean_diff / np.sqrt(nw_variance)
    
    return dm_statistic

# Load the CSV files
adhbs_dm_test_data = pd.read_csv('/Users/sbjpipers/Desktop/FinalThesisQF/FinalThesisQF/Notebooks/performence_evaluation/dm_test_errors_adhbs.csv')
ols_dm_test_data = pd.read_csv('/Users/sbjpipers/Desktop/FinalThesisQF/FinalThesisQF/Notebooks/performence_evaluation/dm_test_errors_ols.csv')
enet_dm_test_data = pd.read_csv('/Users/sbjpipers/Desktop/FinalThesisQF/FinalThesisQF/Notebooks/performence_evaluation/dm_test_errors_enet.csv')
rf_dm_test_data = pd.read_csv('/Users/sbjpipers/Desktop/FinalThesisQF/FinalThesisQF/Notebooks/performence_evaluation/dm_test_errors_rf.csv')
xgb_dm_test_data = pd.read_csv('/Users/sbjpipers/Desktop/FinalThesisQF/FinalThesisQF/Notebooks/performence_evaluation/dm_test_errors_xgb.csv')
nn_dm_test_data = pd.read_csv('/Users/sbjpipers/Desktop/FinalThesisQF/FinalThesisQF/Notebooks/performence_evaluation/dm_test_errors_nn.csv')

# Store the data in a dictionary for easier access
models = {
    "ADHBS": adhbs_dm_test_data,
    "OLS": ols_dm_test_data,
    "ENet": enet_dm_test_data,
    "NN": nn_dm_test_data,
    "RF": rf_dm_test_data,
    "XGBoost": xgb_dm_test_data,
    # "LSTM": lstm_dm_test_data
}

# Prepare an empty DataFrame to store the results
dm_table = pd.DataFrame(index=models.keys(), columns=models.keys())

# Perform pairwise DM tests for the average of Calls and Puts
for model1_name, model1_data in models.items():
    for model2_name, model2_data in models.items():
        if model1_name != model2_name:
            # Ensure both dataframes have the same length (pad with zeros if needed)
            max_length = max(len(model1_data), len(model2_data))
            model1_data_padded = model1_data.reindex(range(max_length), fill_value=0)
            model2_data_padded = model2_data.reindex(range(max_length), fill_value=0)
            
            # Calculate DM test for Call errors
            call_errors1 = model1_data_padded['Call Errors']
            call_errors2 = model2_data_padded['Call Errors']
            dm_statistic_call = dm_test_statistic(call_errors1, call_errors2)
            
            # Calculate DM test for Put errors
            put_errors1 = model1_data_padded['Put Errors']
            put_errors2 = model2_data_padded['Put Errors']
            dm_statistic_put = dm_test_statistic(put_errors1, put_errors2)
            
            # Calculate the average of the two DM test statistics
            dm_statistic_avg = (dm_statistic_call + dm_statistic_put) / 2
            
            # Only store upper triangular results to avoid repetition
            if model1_name in dm_table.columns and model2_name in dm_table.index:
                dm_table.loc[model2_name, model1_name] = round(dm_statistic_avg, 2)

# Display the resulting DM test table
print(dm_table)

# Optionally, save the DM test table to a CSV file for later use
# dm_table.to_csv('/path/to/dm_test_results.csv')


         ADHBS    OLS   ENet     NN     RF XGBoost
ADHBS      NaN -63.11 -68.12 -90.09 -72.43  -71.27
OLS      63.11    NaN -20.19 -13.09 -19.31  -21.33
ENet     68.12  20.19    NaN  -7.07 -11.46  -14.65
NN       90.09  13.09   7.07    NaN  -1.61   -5.41
RF       72.43  19.31  11.46   1.61    NaN   -6.58
XGBoost  71.27  21.33  14.65   5.41   6.58     NaN


  nw_variance = ols_model.bse[0]**2
