### Import the Data

In [1]:
import pandas as pd
import numpy as np
from IvsS_Evaluation_Utils import load_metadata, load_generated_data
from IvsS_Evaluation_Utils import load_cost_structure, nvps_profit, calculate_cochran, get_constants


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
# Define Path to the Data Repository
data_path = 'C:/Users/lanza/Master_Thesis_EL/Integrated-vs-Seperated-Master-Thesis/04 Data/'

# Define Path to the meta data
meta_path = 'C:/Users/lanza/Master_Thesis_EL/Integrated-vs-Seperated-Master-Thesis/05 Models/'


In [3]:
# Load metadata from all models
metadata_list = load_metadata(meta_path)

# Convert to DataFrame for better visualization
df_metadata = pd.DataFrame(metadata_list)

### Calculate distribution characteristics

The following Block calculates different metric values for the different Datasets

In [4]:
# Get cost strucutre
underage_data, overage_data, alpha_data, underage_data_single, overage_data_single = get_constants(1)

# Max Profit for multi-item newsvendor problem
X_train, y_train, X_val, y_val, X_test, y_test = load_generated_data(path=data_path+'Size/Low_variance/set_130650_data.h5', multi=True)
load_cost_structure(alpha_input=alpha_data, underage_input=underage_data, overage_input=overage_data)
max_profit_multi_normal = nvps_profit(y_test,y_test)
Q_0_m, I2_0_m, p_0_m = calculate_cochran(X_test, y_test)
variance_50_m = np.var(y_test)

# Max Profit for variance increase - multi-item newsvendor problem
X_train, y_train, X_val, y_val, X_test, y_test = load_generated_data(path=data_path+'Volatility/High_size/set_4306100_data.h5', multi=True)
max_profit_multi_var_100 = nvps_profit(y_test,y_test)
variance_100_m = np.mean(np.var(y_test, axis=0))
X_train, y_train, X_val, y_val, X_test, y_test = load_generated_data(path=data_path+'Volatility/High_size/set_4306150_data.h5', multi=True)
max_profit_multi_var_150 = nvps_profit(y_test,y_test)
variance_150_m = np.mean(np.var(y_test, axis=0))
X_train, y_train, X_val, y_val, X_test, y_test = load_generated_data(path=data_path+'Volatility/High_size/set_4306200_data.h5', multi=True)
max_profit_multi_var_200 = nvps_profit(y_test,y_test)
variance_200_m = np.mean(np.var(y_test, axis=0))
X_train, y_train, X_val, y_val, X_test, y_test = load_generated_data(path=data_path+'Volatility/High_size/set_4306250_data.h5', multi=True)
max_profit_multi_var_250 = nvps_profit(y_test,y_test)
variance_250_m = np.mean(np.var(y_test, axis=0))
X_train, y_train, X_val, y_val, X_test, y_test = load_generated_data(path=data_path+'Volatility/High_size/set_4306300_data.h5', multi=True)
max_profit_multi_var_300 = nvps_profit(y_test,y_test)
variance_300_m = np.mean(np.var(y_test, axis=0))

# Max Profit for heterogenity - multi-item newsvendor problem
X_train, y_train, X_val, y_val, X_test, y_test = load_generated_data(path=data_path+'Heterogenity/High_size/set_4306510_data.h5', multi=True)
max_profit_multi_het_10 = nvps_profit(y_test,y_test)
Q_10_m, I2_10_m, p_10_m = calculate_cochran(X_test, y_test)
X_train, y_train, X_val, y_val, X_test, y_test = load_generated_data(path=data_path+'Heterogenity/High_size/set_4306520_data.h5', multi=True)
max_profit_multi_het_20 = nvps_profit(y_test,y_test)
Q_20_m, I2_20_m, p_20_m = calculate_cochran(X_test, y_test)
X_train, y_train, X_val, y_val, X_test, y_test = load_generated_data(path=data_path+'Heterogenity/High_size/set_4306530_data.h5', multi=True)
max_profit_multi_het_30 = nvps_profit(y_test,y_test)
Q_30_m, I2_30_m, p_30_m = calculate_cochran(X_test, y_test)
X_train, y_train, X_val, y_val, X_test, y_test = load_generated_data(path=data_path+'Heterogenity/High_size/set_4306540_data.h5', multi=True)
max_profit_multi_het_40 = nvps_profit(y_test,y_test)
Q_40_m, I2_40_m, p_40_m = calculate_cochran(X_test, y_test)
X_train, y_train, X_val, y_val, X_test, y_test = load_generated_data(path=data_path+'Heterogenity/High_size/set_4306550_data.h5', multi=True)
max_profit_multi_het_50 = nvps_profit(y_test,y_test)
Q_50_m, I2_50_m, p_50_m = calculate_cochran(X_test, y_test)

# Max Profit for single-item newsvendor problem
X_train, y_train, X_val, y_val, X_test, y_test = load_generated_data(path=data_path+'Size/Low_variance/set_130650_data.h5', multi=False)
load_cost_structure(alpha_input=alpha_data, underage_input=underage_data_single, overage_input=overage_data_single)
max_profit_single_normal = nvps_profit(y_test,y_test)
Q_0_s, I2_0_s, p_0_s = calculate_cochran(X_test, y_test)
variance_50_s = np.var(y_test)

# Max Profit for variance increase - single-item newsvendor problem
X_train, y_train, X_val, y_val, X_test, y_test = load_generated_data(path=data_path+'Volatility/High_size/set_4306100_data.h5', multi=False)
max_profit_single_var_100 = nvps_profit(y_test,y_test)
variance_100_s = np.var(y_test)
X_train, y_train, X_val, y_val, X_test, y_test = load_generated_data(path=data_path+'Volatility/High_size/set_4306150_data.h5', multi=False)
max_profit_single_var_150 = nvps_profit(y_test,y_test)
variance_150_s = np.var(y_test)
X_train, y_train, X_val, y_val, X_test, y_test = load_generated_data(path=data_path+'Volatility/High_size/set_4306200_data.h5', multi=False)
max_profit_single_var_200 = nvps_profit(y_test,y_test)
variance_200_s = np.var(y_test)
X_train, y_train, X_val, y_val, X_test, y_test = load_generated_data(path=data_path+'Volatility/High_size/set_4306250_data.h5', multi=False)
max_profit_single_var_250 = nvps_profit(y_test,y_test)
variance_250_s = np.var(y_test)
X_train, y_train, X_val, y_val, X_test, y_test = load_generated_data(path=data_path+'Volatility/High_size/set_4306300_data.h5', multi=False)
max_profit_single_var_300 = nvps_profit(y_test,y_test) 
variance_300_s = np.var(y_test)

# Max Profit for heterogenity - single-item newsvendor problem
X_train, y_train, X_val, y_val, X_test, y_test = load_generated_data(path=data_path+'Heterogenity/High_size/set_4306510_data.h5', multi=False)
max_profit_single_het_10 = nvps_profit(y_test,y_test)
Q_10_s, I2_10_s, p_10_s = calculate_cochran(X_test, y_test)
X_train, y_train, X_val, y_val, X_test, y_test = load_generated_data(path=data_path+'Heterogenity/High_size/set_4306520_data.h5', multi=False)
max_profit_single_het_20 = nvps_profit(y_test,y_test)
Q_20_s, I2_20_s, p_20_s = calculate_cochran(X_test, y_test)
X_train, y_train, X_val, y_val, X_test, y_test = load_generated_data(path=data_path+'Heterogenity/High_size/set_4306530_data.h5', multi=False)
max_profit_single_het_30 = nvps_profit(y_test,y_test)
Q_30_s, I2_30_s, p_30_s = calculate_cochran(X_test, y_test)
X_train, y_train, X_val, y_val, X_test, y_test = load_generated_data(path=data_path+'Heterogenity/High_size/set_4306540_data.h5', multi=False)
max_profit_single_het_40 = nvps_profit(y_test,y_test)
Q_40_s, I2_40_s, p_40_s = calculate_cochran(X_test, y_test)
X_train, y_train, X_val, y_val, X_test, y_test = load_generated_data(path=data_path+'Heterogenity/High_size/set_4306550_data.h5', multi=False)
max_profit_single_het_50 = nvps_profit(y_test,y_test)
Q_50_s, I2_50_s, p_50_s = calculate_cochran(X_test, y_test)


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


### Calculate the relative profit regarding the optimal profit

In [5]:
df_simple = df_metadata[df_metadata['file_name'].str.contains('simple')]
df_complex = df_metadata[df_metadata['file_name'].str.contains('complex')]

df_simple['relative_profit'] = df_metadata['profit'] / max_profit_single_normal
df_complex['relative_profit'] = df_metadata['profit'] / max_profit_multi_normal

df_simple = df_simple.sort_values(by='relative_profit', ascending=True)
df_complex = df_complex.sort_values(by='relative_profit', ascending=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_simple['relative_profit'] = df_metadata['profit'] / max_profit_single_normal
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_complex['relative_profit'] = df_metadata['profit'] / max_profit_multi_normal


In [6]:
# Variance - 4 size
df_simple.loc[df_simple['file_name'].str.contains('4306100'), 'relative_profit'] = df_simple['profit'] / max_profit_single_var_100
df_simple.loc[df_simple['file_name'].str.contains('4306150'), 'relative_profit'] = df_simple['profit'] / max_profit_single_var_150
df_simple.loc[df_simple['file_name'].str.contains('4306200'), 'relative_profit'] = df_simple['profit'] / max_profit_single_var_200
df_simple.loc[df_simple['file_name'].str.contains('4306250'), 'relative_profit'] = df_simple['profit'] / max_profit_single_var_250
df_simple.loc[df_simple['file_name'].str.contains('4306300'), 'relative_profit'] = df_simple['profit'] / max_profit_single_var_300

df_complex.loc[df_complex['file_name'].str.contains('4306100'), 'relative_profit'] = df_complex['profit'] / max_profit_multi_var_100
df_complex.loc[df_complex['file_name'].str.contains('4306150'), 'relative_profit'] = df_complex['profit'] / max_profit_multi_var_150
df_complex.loc[df_complex['file_name'].str.contains('4306200'), 'relative_profit'] = df_complex['profit'] / max_profit_multi_var_200
df_complex.loc[df_complex['file_name'].str.contains('4306250'), 'relative_profit'] = df_complex['profit'] / max_profit_multi_var_250
df_complex.loc[df_complex['file_name'].str.contains('4306300'), 'relative_profit'] = df_complex['profit'] / max_profit_multi_var_300

# Variance - 2 size
df_simple.loc[df_simple['file_name'].str.contains('2306100'), 'relative_profit'] = df_simple['profit'] / max_profit_single_var_100
df_simple.loc[df_simple['file_name'].str.contains('2306150'), 'relative_profit'] = df_simple['profit'] / max_profit_single_var_150
df_simple.loc[df_simple['file_name'].str.contains('2306200'), 'relative_profit'] = df_simple['profit'] / max_profit_single_var_200
df_simple.loc[df_simple['file_name'].str.contains('2306250'), 'relative_profit'] = df_simple['profit'] / max_profit_single_var_250
df_simple.loc[df_simple['file_name'].str.contains('2306300'), 'relative_profit'] = df_simple['profit'] / max_profit_single_var_300

df_complex.loc[df_complex['file_name'].str.contains('2306100'), 'relative_profit'] = df_complex['profit'] / max_profit_multi_var_100
df_complex.loc[df_complex['file_name'].str.contains('2306150'), 'relative_profit'] = df_complex['profit'] / max_profit_multi_var_150
df_complex.loc[df_complex['file_name'].str.contains('2306200'), 'relative_profit'] = df_complex['profit'] / max_profit_multi_var_200
df_complex.loc[df_complex['file_name'].str.contains('2306250'), 'relative_profit'] = df_complex['profit'] / max_profit_multi_var_250
df_complex.loc[df_complex['file_name'].str.contains('2306300'), 'relative_profit'] = df_complex['profit'] / max_profit_multi_var_300

# Heterogenity - 4 size
df_simple.loc[df_simple['file_name'].str.contains('4306510'), 'relative_profit'] = df_simple['profit'] / max_profit_single_het_10
df_simple.loc[df_simple['file_name'].str.contains('4306520'), 'relative_profit'] = df_simple['profit'] / max_profit_single_het_20
df_simple.loc[df_simple['file_name'].str.contains('4306530'), 'relative_profit'] = df_simple['profit'] / max_profit_single_het_30
df_simple.loc[df_simple['file_name'].str.contains('4306540'), 'relative_profit'] = df_simple['profit'] / max_profit_single_het_40
df_simple.loc[df_simple['file_name'].str.contains('4306550'), 'relative_profit'] = df_simple['profit'] / max_profit_single_het_50

df_complex.loc[df_complex['file_name'].str.contains('4306510'), 'relative_profit'] = df_complex['profit'] / max_profit_multi_het_10
df_complex.loc[df_complex['file_name'].str.contains('4306520'), 'relative_profit'] = df_complex['profit'] / max_profit_multi_het_20
df_complex.loc[df_complex['file_name'].str.contains('4306530'), 'relative_profit'] = df_complex['profit'] / max_profit_multi_het_30
df_complex.loc[df_complex['file_name'].str.contains('4306540'), 'relative_profit'] = df_complex['profit'] / max_profit_multi_het_40
df_complex.loc[df_complex['file_name'].str.contains('4306550'), 'relative_profit'] = df_complex['profit'] / max_profit_multi_het_50

# Heterogenity - 2 size 
df_simple.loc[df_simple['file_name'].str.contains('2306510'), 'relative_profit'] = df_simple['profit'] / max_profit_single_het_10
df_simple.loc[df_simple['file_name'].str.contains('2306520'), 'relative_profit'] = df_simple['profit'] / max_profit_single_het_20
df_simple.loc[df_simple['file_name'].str.contains('2306530'), 'relative_profit'] = df_simple['profit'] / max_profit_single_het_30
df_simple.loc[df_simple['file_name'].str.contains('2306540'), 'relative_profit'] = df_simple['profit'] / max_profit_single_het_40
df_simple.loc[df_simple['file_name'].str.contains('2306550'), 'relative_profit'] = df_simple['profit'] / max_profit_single_het_50

df_complex.loc[df_complex['file_name'].str.contains('2306510'), 'relative_profit'] = df_complex['profit'] / max_profit_multi_het_10
df_complex.loc[df_complex['file_name'].str.contains('2306520'), 'relative_profit'] = df_complex['profit'] / max_profit_multi_het_20
df_complex.loc[df_complex['file_name'].str.contains('2306530'), 'relative_profit'] = df_complex['profit'] / max_profit_multi_het_30
df_complex.loc[df_complex['file_name'].str.contains('2306540'), 'relative_profit'] = df_complex['profit'] / max_profit_multi_het_40
df_complex.loc[df_complex['file_name'].str.contains('2306550'), 'relative_profit'] = df_complex['profit'] / max_profit_multi_het_50

# Size - high variance
df_simple.loc[df_simple['file_name'].str.contains('1306200'), 'relative_profit'] = df_simple['profit'] / max_profit_single_var_200
df_simple.loc[df_simple['file_name'].str.contains('2306200'), 'relative_profit'] = df_simple['profit'] / max_profit_single_var_200
df_simple.loc[df_simple['file_name'].str.contains('3306200'), 'relative_profit'] = df_simple['profit'] / max_profit_single_var_200
df_simple.loc[df_simple['file_name'].str.contains('4306200'), 'relative_profit'] = df_simple['profit'] / max_profit_single_var_200
df_simple.loc[df_simple['file_name'].str.contains('5306200'), 'relative_profit'] = df_simple['profit'] / max_profit_single_var_200

df_complex.loc[df_complex['file_name'].str.contains('1306200'), 'relative_profit'] = df_complex['profit'] / max_profit_multi_var_200
df_complex.loc[df_complex['file_name'].str.contains('2306200'), 'relative_profit'] = df_complex['profit'] / max_profit_multi_var_200
df_complex.loc[df_complex['file_name'].str.contains('3306200'), 'relative_profit'] = df_complex['profit'] / max_profit_multi_var_200
df_complex.loc[df_complex['file_name'].str.contains('4306200'), 'relative_profit'] = df_complex['profit'] / max_profit_multi_var_200
df_complex.loc[df_complex['file_name'].str.contains('5306200'), 'relative_profit'] = df_complex['profit'] / max_profit_multi_var_200


# Size

### Simple - basis var

In [7]:
# Initialize DataFrame with the correct dtype for relative_profit values
df_simple_size = pd.DataFrame(0.0, index=range(5), columns=['ann_ioa', 'ann_soap', 'ann_soanp', 'xgb_ioa', 'xgb_soap', 'xgb_soanp'])

# Use .loc for assignments and ensure values are cast to float
for i in range(5):
    df_simple_size.loc[i, 'ann_ioa'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}30650_ANN_simple_IOA')]['relative_profit'].astype(float).values
    df_simple_size.loc[i, 'ann_soap'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}30650_ANN_simple_SOAp')]['relative_profit'].astype(float).values
    df_simple_size.loc[i, 'ann_soanp'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}30650_ANN_simple_SOAnp')]['relative_profit'].astype(float).values
    df_simple_size.loc[i, 'xgb_ioa'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}30650_XGB_simple_IOA')]['relative_profit'].astype(float).values
    df_simple_size.loc[i, 'xgb_soap'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}30650_XGB_simple_SOAp')]['relative_profit'].astype(float).values
    df_simple_size.loc[i, 'xgb_soanp'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}30650_XGB_simple_SOAnp')]['relative_profit'].astype(float).values

print(df_simple_size)

    ann_ioa  ann_soap  ann_soanp   xgb_ioa  xgb_soap  xgb_soanp
0  0.862598  0.880472   0.852277  0.835252  0.832092   0.832092
1  0.896691  0.898209   0.897659  0.839697  0.742786   0.742457
2  0.897652  0.897313   0.899207  0.891991  0.765452   0.765452
3  0.913769  0.903814   0.906451  0.910251  0.887815   0.888861
4  0.914846  0.900479   0.894702  0.911962  0.898662   0.897200


### Simple - high var 

In [8]:
df_simple_size_var200 = pd.DataFrame(0.0, index=range(4), columns=['ann_ioa', 'ann_soap', 'ann_soanp', 'xgb_ioa', 'xgb_soap', 'xgb_soanp'])


for i in range(5):
    df_simple_size_var200.loc[i, 'ann_ioa'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}306200_ANN_simple_IOA')]['relative_profit'].astype(float).values
    df_simple_size_var200.loc[i, 'ann_soap'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}306200_ANN_simple_SOAp')]['relative_profit'].astype(float).values
    df_simple_size_var200.loc[i, 'ann_soanp'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}306200_ANN_simple_SOAnp')]['relative_profit'].astype(float).values
    df_simple_size_var200.loc[i, 'xgb_ioa'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}306200_XGB_simple_IOA')]['relative_profit'].astype(float).values
    df_simple_size_var200.loc[i, 'xgb_soap'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}306200_XGB_simple_SOAp')]['relative_profit'].astype(float).values
    df_simple_size_var200.loc[i, 'xgb_soanp'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}306200_XGB_simple_SOAnp')]['relative_profit'].astype(float).values

print(df_simple_size_var200)


    ann_ioa  ann_soap  ann_soanp   xgb_ioa  xgb_soap  xgb_soanp
0  0.767883  0.715927   0.703315  0.675832  0.660632   0.660105
1  0.806726  0.766096   0.797469  0.785944  0.548372   0.548372
2  0.829977  0.803446   0.804549  0.794718  0.712431   0.715161
3  0.831767  0.811847   0.812483  0.833719  0.767265   0.770275
4  0.839978  0.837126   0.841770  0.833800  0.797267   0.804939


### Complex - basis var

In [9]:
df_complex_size = pd.DataFrame(0.0, index=range(5), columns=['ann_ioa', 'ann_soap', 'ann_soanp', 'xgb_ioa', 'xgb_soap', 'xgb_soanp'])

for i in range(5):
    df_complex_size.loc[i, 'ann_ioa'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}30650_ANN_complex_IOA')]['relative_profit'].astype(float).mean()
    df_complex_size.loc[i, 'ann_soap'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}30650_ANN_complex_SOAp')]['relative_profit'].astype(float).mean()
    df_complex_size.loc[i, 'ann_soanp'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}30650_ANN_complex_SOAnp')]['relative_profit'].astype(float).mean()
    df_complex_size.loc[i, 'xgb_ioa'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}30650_XGB_complex_IOA')]['relative_profit'].astype(float).mean()
    df_complex_size.loc[i, 'xgb_soap'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}30650_XGB_complex_SOAp')]['relative_profit'].astype(float).mean()
    df_complex_size.loc[i, 'xgb_soanp'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}30650_XGB_complex_SOAnp')]['relative_profit'].astype(float).mean()

print(df_complex_size)

    ann_ioa  ann_soap  ann_soanp   xgb_ioa  xgb_soap  xgb_soanp
0  0.955186  0.906511   0.938987  0.951921  0.929045   0.929045
1  0.966058  0.940309   0.940512  0.965459  0.952436   0.952436
2  0.978203  0.939256   0.937129  0.967975  0.957892   0.957912
3  0.992953  0.966285   0.966284  0.969654  0.942793   0.941935
4  0.992893  0.966967   0.966963  0.967803  0.954004   0.953547


### Complex - high var 

In [10]:
df_complex_size_var200 = pd.DataFrame(0.0, index=range(5), columns=['ann_ioa', 'ann_soap', 'ann_soanp', 'xgb_ioa', 'xgb_soap', 'xgb_soanp'])

for i in range(5):
    df_complex_size_var200.loc[i, 'ann_ioa'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}306200_ANN_complex_IOA')]['relative_profit'].astype(float).mean()
    df_complex_size_var200.loc[i, 'ann_soap'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}306200_ANN_complex_SOAp')]['relative_profit'].astype(float).mean()
    df_complex_size_var200.loc[i, 'ann_soanp'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}306200_ANN_complex_SOAnp')]['relative_profit'].astype(float).mean()
    df_complex_size_var200.loc[i, 'xgb_ioa'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}306200_XGB_complex_IOA')]['relative_profit'].astype(float).mean()
    df_complex_size_var200.loc[i, 'xgb_soap'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}306200_XGB_complex_SOAp')]['relative_profit'].astype(float).mean()
    df_complex_size_var200.loc[i, 'xgb_soanp'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}306200_XGB_complex_SOAnp')]['relative_profit'].astype(float).mean()

print(df_complex_size_var200)

    ann_ioa  ann_soap  ann_soanp   xgb_ioa  xgb_soap  xgb_soanp
0  0.892870  0.853470   0.921876  0.904760  0.898036   0.898036
1  0.932964  0.908366   0.918610  0.930178  0.861338   0.861337
2  0.947924  0.921907   0.914170  0.934654  0.876981   0.876975
3  0.954835  0.910113   0.909266  0.940459  0.882392   0.882245
4  0.938168  0.910746   0.910985  0.942986  0.910456   0.910067


# Variance

### Simple - basis size

In [11]:
df_simple_variance = pd.DataFrame(0.0, index=range(6), columns=['ann_ioa', 'ann_soap', 'ann_soanp', 'xgb_ioa', 'xgb_soap', 'xgb_soanp'])

# Iterate over each row index
for i in range(6):
    # For each column, filter the dataframe, convert to float, and assign the value
    df_simple_variance.loc[i, 'ann_ioa'] = df_simple[df_simple['file_name'].str.contains(f'4306{i*50+50}_ANN_simple_IOA')]['relative_profit'].astype(float).values
    df_simple_variance.loc[i, 'ann_soap'] = df_simple[df_simple['file_name'].str.contains(f'4306{i*50+50}_ANN_simple_SOAp')]['relative_profit'].astype(float).values
    df_simple_variance.loc[i, 'ann_soanp'] = df_simple[df_simple['file_name'].str.contains(f'4306{i*50+50}_ANN_simple_SOAnp')]['relative_profit'].astype(float).values
    df_simple_variance.loc[i, 'xgb_ioa'] = df_simple[df_simple['file_name'].str.contains(f'4306{i*50+50}_XGB_simple_IOA')]['relative_profit'].astype(float).values
    df_simple_variance.loc[i, 'xgb_soap'] = df_simple[df_simple['file_name'].str.contains(f'4306{i*50+50}_XGB_simple_SOAp')]['relative_profit'].astype(float).values
    df_simple_variance.loc[i, 'xgb_soanp'] = df_simple[df_simple['file_name'].str.contains(f'4306{i*50+50}_XGB_simple_SOAnp')]['relative_profit'].astype(float).values

df_simple_variance['variance'] = [variance_50_s, variance_100_s, variance_150_s, variance_200_s, variance_250_s, variance_300_s]

df_simple_variance = df_simple_variance.drop(4)
df_simple_variance = df_simple_variance.drop(2)

print(df_simple_variance)

    ann_ioa  ann_soap  ann_soanp   xgb_ioa  xgb_soap  xgb_soanp    variance
0  0.913769  0.903814   0.906451  0.910251  0.887815   0.888861   3891.7984
1  0.878958  0.859357   0.856314  0.874261  0.807859   0.806101   6640.4091
3  0.831767  0.811847   0.812483  0.833719  0.767265   0.770275  11886.2200
5  0.794756  0.759551   0.751876  0.783079  0.730972   0.736693  17092.7636


### basis - small size

In [12]:
df_simple_variance_small = pd.DataFrame(0.0, index=range(6), columns=['ann_ioa', 'ann_soap', 'ann_soanp', 'xgb_ioa', 'xgb_soap', 'xgb_soanp'])

# Iterate over each row index
for i in range(6):
    # For each column, filter the dataframe, convert to float, and assign the value
    df_simple_variance_small.loc[i, 'ann_ioa'] = df_simple[df_simple['file_name'].str.contains(f'2306{i*50+50}_ANN_simple_IOA')]['relative_profit'].astype(float).values.mean()
    df_simple_variance_small.loc[i, 'ann_soap'] = df_simple[df_simple['file_name'].str.contains(f'2306{i*50+50}_ANN_simple_SOAp')]['relative_profit'].astype(float).values.mean()
    df_simple_variance_small.loc[i, 'ann_soanp'] = df_simple[df_simple['file_name'].str.contains(f'2306{i*50+50}_ANN_simple_SOAnp')]['relative_profit'].astype(float).values.mean()
    df_simple_variance_small.loc[i, 'xgb_ioa'] = df_simple[df_simple['file_name'].str.contains(f'2306{i*50+50}_XGB_simple_IOA')]['relative_profit'].astype(float).values.mean()
    df_simple_variance_small.loc[i, 'xgb_soap'] = df_simple[df_simple['file_name'].str.contains(f'2306{i*50+50}_XGB_simple_SOAp')]['relative_profit'].astype(float).values.mean()
    df_simple_variance_small.loc[i, 'xgb_soanp'] = df_simple[df_simple['file_name'].str.contains(f'2306{i*50+50}_XGB_simple_SOAnp')]['relative_profit'].astype(float).values.mean()

df_simple_variance_small['variance'] = [variance_50_s, variance_100_s, variance_150_s, variance_200_s, variance_250_s, variance_300_s]

df_simple_variance_small = df_simple_variance_small.drop(4)
df_simple_variance_small = df_simple_variance_small.drop(2)

print(df_simple_variance_small)

    ann_ioa  ann_soap  ann_soanp   xgb_ioa  xgb_soap  xgb_soanp    variance
0  0.896691  0.898209   0.897659  0.839697  0.742786   0.742457   3891.7984
1  0.864000  0.861907   0.855582  0.813987  0.687233   0.687233   6640.4091
3  0.806726  0.766096   0.797469  0.785944  0.548372   0.548372  11886.2200
5  0.793451  0.755742   0.734703  0.653798  0.371334   0.371334  17092.7636


### complex - basis size

In [13]:
df_complex_variance = pd.DataFrame(0.0, index=range(6), columns=['ann_ioa', 'ann_soap', 'ann_soanp', 'xgb_ioa', 'xgb_soap', 'xgb_soanp'])

for i in range(6):
    df_complex_variance.loc[i, 'ann_ioa'] = df_complex[df_complex['file_name'].str.contains(f'4306{i*50+50}_ANN_complex_IOA')]['relative_profit'].astype(float).mean()
    df_complex_variance.loc[i, 'ann_soap'] = df_complex[df_complex['file_name'].str.contains(f'4306{i*50+50}_ANN_complex_SOAp')]['relative_profit'].astype(float).mean()
    df_complex_variance.loc[i, 'ann_soanp'] = df_complex[df_complex['file_name'].str.contains(f'4306{i*50+50}_ANN_complex_SOAnp')]['relative_profit'].astype(float).mean()
    df_complex_variance.loc[i, 'xgb_ioa'] = df_complex[df_complex['file_name'].str.contains(f'4306{i*50+50}_XGB_complex_IOA')]['relative_profit'].astype(float).mean()
    df_complex_variance.loc[i, 'xgb_soap'] = df_complex[df_complex['file_name'].str.contains(f'4306{i*50+50}_XGB_complex_SOAp')]['relative_profit'].astype(float).mean()
    df_complex_variance.loc[i, 'xgb_soanp'] = df_complex[df_complex['file_name'].str.contains(f'4306{i*50+50}_XGB_complex_SOAnp')]['relative_profit'].astype(float).mean()

df_complex_variance['variance'] = [variance_50_m, variance_100_m, variance_150_m, variance_200_m, variance_250_m, variance_300_m]

df_complex_variance = df_complex_variance.drop(4)
df_complex_variance = df_complex_variance.drop(2)

print(df_complex_variance)

    ann_ioa  ann_soap  ann_soanp   xgb_ioa  xgb_soap  xgb_soanp      variance
0  0.992953  0.966285   0.966284  0.969654  0.942793   0.941935   2914.918489
1  0.978284  0.940645   0.942652  0.956810  0.894411   0.894469   5477.888283
3  0.954835  0.910113   0.909266  0.940459  0.882392   0.882245  10104.723083
5  0.939608  0.887309   0.886317  0.929575  0.857581   0.857383  14428.968017


### complex - small size

In [14]:
df_complex_variance_small = pd.DataFrame(0.0, index=range(6), columns=['ann_ioa', 'ann_soap', 'ann_soanp', 'xgb_ioa', 'xgb_soap', 'xgb_soanp'])

for i in range(6):
    df_complex_variance_small.loc[i, 'ann_ioa'] = df_complex[df_complex['file_name'].str.contains(f'2306{i*50+50}_ANN_complex_IOA')]['relative_profit'].astype(float).mean()
    df_complex_variance_small.loc[i, 'ann_soap'] = df_complex[df_complex['file_name'].str.contains(f'2306{i*50+50}_ANN_complex_SOAp')]['relative_profit'].astype(float).mean()
    df_complex_variance_small.loc[i, 'ann_soanp'] = df_complex[df_complex['file_name'].str.contains(f'2306{i*50+50}_ANN_complex_SOAnp')]['relative_profit'].astype(float).mean()
    df_complex_variance_small.loc[i, 'xgb_ioa'] = df_complex[df_complex['file_name'].str.contains(f'2306{i*50+50}_XGB_complex_IOA')]['relative_profit'].astype(float).mean()
    df_complex_variance_small.loc[i, 'xgb_soap'] = df_complex[df_complex['file_name'].str.contains(f'2306{i*50+50}_XGB_complex_SOAp')]['relative_profit'].astype(float).mean()
    df_complex_variance_small.loc[i, 'xgb_soanp'] = df_complex[df_complex['file_name'].str.contains(f'2306{i*50+50}_XGB_complex_SOAnp')]['relative_profit'].astype(float).mean()

df_complex_variance_small['variance'] = [variance_50_m, variance_100_m, variance_150_m, variance_200_m, variance_250_m, variance_300_m]

df_complex_variance_small = df_complex_variance_small.drop(4)
df_complex_variance_small = df_complex_variance_small.drop(2)

print(df_complex_variance_small)

    ann_ioa  ann_soap  ann_soanp   xgb_ioa  xgb_soap  xgb_soanp      variance
0  0.966058  0.940309   0.940512  0.965459  0.952436   0.952436   2914.918489
1  0.952029  0.941758   0.922276  0.950642  0.906816   0.906885   5477.888283
3  0.932964  0.908366   0.918610  0.930178  0.861338   0.861337  10104.723083
5  0.936156  0.930649   0.898121  0.913861  0.823876   0.823891  14428.968017


# Noise Features

### simple

In [15]:
df_simple_feature_noise = pd.DataFrame(0.0, index=range(7), columns=['ann_ioa', 'ann_soap', 'ann_soanp', 'xgb_ioa', 'xgb_soap', 'xgb_soanp'])

for i in range(7):
    x = 3* (2**i)
    df_simple_feature_noise.loc[i, 'ann_ioa'] = df_simple[df_simple['file_name'].str.contains(f'4{x}0650_ANN_simple_IOA')]['relative_profit'].astype(float).mean()
    df_simple_feature_noise.loc[i, 'ann_soap'] = df_simple[df_simple['file_name'].str.contains(f'4{x}0650_ANN_simple_SOAp')]['relative_profit'].astype(float).mean()
    df_simple_feature_noise.loc[i, 'ann_soanp'] = df_simple[df_simple['file_name'].str.contains(f'4{x}0650_ANN_simple_SOAnp')]['relative_profit'].astype(float).mean()
    df_simple_feature_noise.loc[i, 'xgb_ioa'] = df_simple[df_simple['file_name'].str.contains(f'4{x}0650_XGB_simple_IOA')]['relative_profit'].astype(float).mean()
    df_simple_feature_noise.loc[i, 'xgb_soap'] = df_simple[df_simple['file_name'].str.contains(f'4{x}0650_XGB_simple_SOAp')]['relative_profit'].astype(float).mean()
    df_simple_feature_noise.loc[i, 'xgb_soanp'] = df_simple[df_simple['file_name'].str.contains(f'4{x}0650_XGB_simple_SOAnp')]['relative_profit'].astype(float).mean()

print(df_simple_feature_noise)

    ann_ioa  ann_soap  ann_soanp   xgb_ioa  xgb_soap  xgb_soanp
0  0.913769  0.903814   0.906451  0.910251  0.887815   0.888861
1  0.909353  0.900166   0.899653  0.907183  0.888036   0.891534
2  0.909230  0.896601   0.897729  0.897371  0.863515   0.866718
3  0.911992  0.892588   0.888369  0.896236  0.873771   0.874498
4  0.903797  0.900159   0.899524  0.899794  0.860145   0.858850
5  0.901535  0.898837   0.898583  0.896505  0.838015   0.838139
6  0.892487  0.900243   0.899524  0.888062  0.862163   0.865670


### complex

In [16]:
df_complex_feature_noise = pd.DataFrame(0.0, index=range(7), columns=['ann_ioa', 'ann_soap', 'ann_soanp', 'xgb_ioa', 'xgb_soap', 'xgb_soanp'])

for i in range(7):
    x = 3* (2**i)
    df_complex_feature_noise.loc[i, 'ann_ioa'] = df_complex[df_complex['file_name'].str.contains(f'4{x}0650_ANN_complex_IOA')]['relative_profit'].astype(float).mean()
    df_complex_feature_noise.loc[i, 'ann_soap'] = df_complex[df_complex['file_name'].str.contains(f'4{x}0650_ANN_complex_SOAp')]['relative_profit'].astype(float).mean()
    df_complex_feature_noise.loc[i, 'ann_soanp'] = df_complex[df_complex['file_name'].str.contains(f'4{x}0650_ANN_complex_SOAnp')]['relative_profit'].astype(float).mean()
    df_complex_feature_noise.loc[i, 'xgb_ioa'] = df_complex[df_complex['file_name'].str.contains(f'4{x}0650_XGB_complex_IOA')]['relative_profit'].astype(float).mean()
    df_complex_feature_noise.loc[i, 'xgb_soap'] = df_complex[df_complex['file_name'].str.contains(f'4{x}0650_XGB_complex_SOAp')]['relative_profit'].astype(float).mean()
    df_complex_feature_noise.loc[i, 'xgb_soanp'] = df_complex[df_complex['file_name'].str.contains(f'4{x}0650_XGB_complex_SOAnp')]['relative_profit'].astype(float).mean()

print(df_complex_feature_noise)

    ann_ioa  ann_soap  ann_soanp   xgb_ioa  xgb_soap  xgb_soanp
0  0.992953  0.966285   0.966284  0.969654  0.942793   0.941935
1  0.990878  0.968843   0.966284  0.969216  0.921939   0.921827
2  0.992939  0.963267   0.965787  0.968864  0.922533   0.922123
3  0.980883  0.966427   0.966242  0.968263  0.914210   0.914218
4  0.992108  0.966296   0.966284  0.967957  0.933323   0.932874
5  0.991700  0.965992   0.966284  0.967922  0.947228   0.947215
6  0.986184  0.967062   0.966990  0.968630  0.937310   0.937280


# Heterogenity

### simple - basis size

In [17]:
df_simple_heterogenity = pd.DataFrame(0.0, index=range(6), columns=['ann_ioa', 'ann_soap', 'ann_soanp', 'xgb_ioa', 'xgb_soap', 'xgb_soanp'])


df_simple_heterogenity.loc[0, 'ann_ioa'] = df_simple[df_simple['file_name'].str.contains(f'430650_ANN_simple_IOA')]['relative_profit'].astype(float).mean()
df_simple_heterogenity.loc[0, 'ann_soap'] = df_simple[df_simple['file_name'].str.contains(f'430650_ANN_simple_SOAp')]['relative_profit'].astype(float).mean()
df_simple_heterogenity.loc[0, 'ann_soanp'] = df_simple[df_simple['file_name'].str.contains(f'430650_ANN_simple_SOAnp')]['relative_profit'].astype(float).mean()
df_simple_heterogenity.loc[0, 'xgb_ioa'] = df_simple[df_simple['file_name'].str.contains(f'430650_XGB_simple_IOA')]['relative_profit'].astype(float).mean()
df_simple_heterogenity.loc[0, 'xgb_soap'] = df_simple[df_simple['file_name'].str.contains(f'430650_XGB_simple_SOAp')]['relative_profit'].astype(float).mean()
df_simple_heterogenity.loc[0, 'xgb_soanp'] = df_simple[df_simple['file_name'].str.contains(f'430650_XGB_simple_SOAnp')]['relative_profit'].astype(float).mean()


for i in range(1,6):
    df_simple_heterogenity.loc[i, 'ann_ioa'] = df_simple[df_simple['file_name'].str.contains(f'43065{10*i}no_ANN_simple_IOA')]['relative_profit'].astype(float).mean()
    df_simple_heterogenity.loc[i, 'ann_soap'] = df_simple[df_simple['file_name'].str.contains(f'43065{10*i}no_ANN_simple_SOAp')]['relative_profit'].astype(float).mean()
    df_simple_heterogenity.loc[i, 'ann_soanp'] = df_simple[df_simple['file_name'].str.contains(f'43065{10*i}no_ANN_simple_SOAnp')]['relative_profit'].astype(float).mean()
    df_simple_heterogenity.loc[i, 'xgb_ioa'] = df_simple[df_simple['file_name'].str.contains(f'43065{10*i}no_XGB_simple_IOA')]['relative_profit'].astype(float).mean()
    df_simple_heterogenity.loc[i, 'xgb_soap'] = df_simple[df_simple['file_name'].str.contains(f'43065{10*i}no_XGB_simple_SOAp')]['relative_profit'].astype(float).mean()
    df_simple_heterogenity.loc[i, 'xgb_soanp'] = df_simple[df_simple['file_name'].str.contains(f'43065{10*i}no_XGB_simple_SOAnp')]['relative_profit'].astype(float).mean()

df_simple_heterogenity['Cochran´s Q'] = [Q_0_s, Q_10_s, Q_20_s, Q_30_s, Q_40_s, Q_50_s]
df_simple_heterogenity['I2'] = [I2_0_s, I2_10_s, I2_20_s, I2_30_s, I2_40_s, I2_50_s]
df_simple_heterogenity['p-value'] = [p_0_s, p_10_s, p_20_s, p_30_s, p_40_s, p_50_s]

df_simple_heterogenity = df_simple_heterogenity.drop(2)

print(df_simple_heterogenity)



    ann_ioa  ann_soap  ann_soanp   xgb_ioa  xgb_soap  xgb_soanp  Cochran´s Q  \
0  0.913769  0.903814   0.906451  0.910251  0.887815   0.888861          NaN   
1  0.906868  0.899437   0.906316  0.908053  0.897241   0.889052     1.377094   
3  0.862310  0.851784   0.848240  0.859207  0.824696   0.834410     1.766058   
4  0.859490  0.847999   0.832919  0.850765  0.672087   0.670033     2.296314   
5  0.863946  0.834140   0.839333  0.845708  0.484482   0.488103     3.624476   

          I2   p-value  
0   0.000000       NaN  
1  27.383290  0.240597  
3  43.376710  0.183871  
4  56.451943  0.129681  
5  72.409805  0.056935  


### complex - basis size

In [18]:
df_simple_heterogenity_small = pd.DataFrame(0.0, index=range(6), columns=['ann_ioa', 'ann_soap', 'ann_soanp', 'xgb_ioa', 'xgb_soap', 'xgb_soanp'])

df_simple_heterogenity_small.loc[0, 'ann_ioa'] = df_simple[df_simple['file_name'].str.contains(f'230650_ANN_simple_IOA')]['relative_profit'].astype(float).mean()
df_simple_heterogenity_small.loc[0, 'ann_soap'] = df_simple[df_simple['file_name'].str.contains(f'230650_ANN_simple_SOAp')]['relative_profit'].astype(float).mean()
df_simple_heterogenity_small.loc[0, 'ann_soanp'] = df_simple[df_simple['file_name'].str.contains(f'230650_ANN_simple_SOAnp')]['relative_profit'].astype(float).mean()
df_simple_heterogenity_small.loc[0, 'xgb_ioa'] = df_simple[df_simple['file_name'].str.contains(f'230650_XGB_simple_IOA')]['relative_profit'].astype(float).mean()
df_simple_heterogenity_small.loc[0, 'xgb_soap'] = df_simple[df_simple['file_name'].str.contains(f'230650_XGB_simple_SOAp')]['relative_profit'].astype(float).mean()
df_simple_heterogenity_small.loc[0, 'xgb_soanp'] = df_simple[df_simple['file_name'].str.contains(f'230650_XGB_simple_SOAnp')]['relative_profit'].astype(float).mean()


for i in range(1,6):
    df_simple_heterogenity_small.loc[i, 'ann_ioa'] = df_simple[df_simple['file_name'].str.contains(f'23065{10*i}no_ANN_simple_IOA')]['relative_profit'].astype(float).mean()
    df_simple_heterogenity_small.loc[i, 'ann_soap'] = df_simple[df_simple['file_name'].str.contains(f'23065{10*i}no_ANN_simple_SOAp')]['relative_profit'].astype(float).mean()
    df_simple_heterogenity_small.loc[i, 'ann_soanp'] = df_simple[df_simple['file_name'].str.contains(f'23065{10*i}no_ANN_simple_SOAnp')]['relative_profit'].astype(float).mean()
    df_simple_heterogenity_small.loc[i, 'xgb_ioa'] = df_simple[df_simple['file_name'].str.contains(f'23065{10*i}no_XGB_simple_IOA')]['relative_profit'].astype(float).mean()
    df_simple_heterogenity_small.loc[i, 'xgb_soap'] = df_simple[df_simple['file_name'].str.contains(f'23065{10*i}no_XGB_simple_SOAp')]['relative_profit'].astype(float).mean()
    df_simple_heterogenity_small.loc[i, 'xgb_soanp'] = df_simple[df_simple['file_name'].str.contains(f'23065{10*i}no_XGB_simple_SOAnp')]['relative_profit'].astype(float).mean()

df_simple_heterogenity_small['Cochran´s Q'] = [Q_0_s, Q_10_s, Q_20_s, Q_30_s, Q_40_s, Q_50_s]
df_simple_heterogenity_small['I2'] = [I2_0_s, I2_10_s, I2_20_s, I2_30_s, I2_40_s, I2_50_s]
df_simple_heterogenity_small['p-value'] = [p_0_s, p_10_s, p_20_s, p_30_s, p_40_s, p_50_s]

df_simple_heterogenity_small = df_simple_heterogenity_small.drop(2)

print(df_simple_heterogenity_small)

    ann_ioa  ann_soap  ann_soanp   xgb_ioa  xgb_soap  xgb_soanp  Cochran´s Q  \
0  0.896691  0.898209   0.897659  0.839697  0.742786   0.742457          NaN   
1  0.893845  0.889386   0.892633  0.766696  0.648191   0.648191     1.377094   
3  0.837713  0.824538   0.823767  0.728438  0.654580   0.654491     1.766058   
4  0.834823  0.818936   0.826813  0.723863  0.611584   0.611584     2.296314   
5  0.831567  0.822617   0.813910  0.703560  0.579605   0.579605     3.624476   

          I2   p-value  
0   0.000000       NaN  
1  27.383290  0.240597  
3  43.376710  0.183871  
4  56.451943  0.129681  
5  72.409805  0.056935  


### complex - basis size

In [19]:
df_complex_heterogenity_no = pd.DataFrame(0.0, index=range(6), columns=['ann_ioa', 'ann_soap', 'ann_soanp', 'xgb_ioa', 'xgb_soap', 'xgb_soanp'])

df_complex_heterogenity_no.loc[0, 'ann_ioa'] = df_complex[df_complex['file_name'].str.contains(f'430650_ANN_complex_IOA')]['relative_profit'].astype(float).mean()
df_complex_heterogenity_no.loc[0, 'ann_soap'] = df_complex[df_complex['file_name'].str.contains(f'430650_ANN_complex_SOAp')]['relative_profit'].astype(float).mean()
df_complex_heterogenity_no.loc[0, 'ann_soanp'] = df_complex[df_complex['file_name'].str.contains(f'430650_ANN_complex_SOAnp')]['relative_profit'].astype(float).mean()
df_complex_heterogenity_no.loc[0, 'xgb_ioa'] = df_complex[df_complex['file_name'].str.contains(f'430650_XGB_complex_IOA')]['relative_profit'].astype(float).mean()
df_complex_heterogenity_no.loc[0, 'xgb_soap'] = df_complex[df_complex['file_name'].str.contains(f'430650_XGB_complex_SOAp')]['relative_profit'].astype(float).mean()
df_complex_heterogenity_no.loc[0, 'xgb_soanp'] = df_complex[df_complex['file_name'].str.contains(f'430650_XGB_complex_SOAnp')]['relative_profit'].astype(float).mean()

for i in range(1,6):
    df_complex_heterogenity_no.loc[i, 'ann_ioa'] = df_complex[df_complex['file_name'].str.contains(f'43065{10*i}no_ANN_complex_IOA')]['relative_profit'].astype(float).mean()
    df_complex_heterogenity_no.loc[i, 'ann_soap'] = df_complex[df_complex['file_name'].str.contains(f'43065{10*i}no_ANN_complex_SOAp')]['relative_profit'].astype(float).mean()
    df_complex_heterogenity_no.loc[i, 'ann_soanp'] = df_complex[df_complex['file_name'].str.contains(f'43065{10*i}no_ANN_complex_SOAnp')]['relative_profit'].astype(float).mean()
    df_complex_heterogenity_no.loc[i, 'xgb_ioa'] = df_complex[df_complex['file_name'].str.contains(f'43065{10*i}no_XGB_complex_IOA')]['relative_profit'].astype(float).mean()
    df_complex_heterogenity_no.loc[i, 'xgb_soap'] = df_complex[df_complex['file_name'].str.contains(f'43065{10*i}no_XGB_complex_SOAp')]['relative_profit'].astype(float).mean()
    df_complex_heterogenity_no.loc[i, 'xgb_soanp'] = df_complex[df_complex['file_name'].str.contains(f'43065{10*i}no_XGB_complex_SOAnp')]['relative_profit'].astype(float).mean()

df_complex_heterogenity_no['Cochran´s Q'] = [Q_0_m, Q_10_m, Q_20_m, Q_30_m, Q_40_m, Q_50_m]
df_complex_heterogenity_no['I2'] = [I2_0_m, I2_10_m, I2_20_m, I2_30_m, I2_40_m, I2_50_m]
df_complex_heterogenity_no['p-value'] = [p_0_m, p_10_m, p_20_m, p_30_m, p_40_m, p_50_m]

print(df_complex_heterogenity_no)

    ann_ioa  ann_soap  ann_soanp   xgb_ioa  xgb_soap  xgb_soanp  Cochran´s Q  \
0  0.992953  0.966285   0.966284  0.969654  0.942793   0.941935          NaN   
1  0.985833  0.968934   0.968900  0.958853  0.930381   0.930641     1.350184   
2  0.970811  0.904032   0.903697  0.935595  0.901630   0.902850     2.367605   
3  0.969874  0.887784   0.887862  0.936393  0.879764   0.880234     3.110097   
4  0.966648  0.866892   0.866794  0.934185  0.814337   0.814569     4.284557   
5  0.966782  0.829850   0.829836  0.935231  0.802731   0.802761     6.182867   

          I2   p-value  
0   0.000000       NaN  
1  25.936011  0.245246  
2  57.763229  0.123877  
3  67.846665  0.077808  
4  76.660362  0.038460  
5  83.826274  0.012899  


### complex - small size

In [20]:
df_complex_heterogenity_small = pd.DataFrame(0.0, index=range(6), columns=['ann_ioa', 'ann_soap', 'ann_soanp', 'xgb_ioa', 'xgb_soap', 'xgb_soanp'])

df_complex_heterogenity_small.loc[0, 'ann_ioa'] = df_complex[df_complex['file_name'].str.contains(f'230650_ANN_complex_IOA')]['relative_profit'].astype(float).mean()
df_complex_heterogenity_small.loc[0, 'ann_soap'] = df_complex[df_complex['file_name'].str.contains(f'230650_ANN_complex_SOAp')]['relative_profit'].astype(float).mean()
df_complex_heterogenity_small.loc[0, 'ann_soanp'] = df_complex[df_complex['file_name'].str.contains(f'230650_ANN_complex_SOAnp')]['relative_profit'].astype(float).mean()
df_complex_heterogenity_small.loc[0, 'xgb_ioa'] = df_complex[df_complex['file_name'].str.contains(f'230650_XGB_complex_IOA')]['relative_profit'].astype(float).mean()
df_complex_heterogenity_small.loc[0, 'xgb_soap'] = df_complex[df_complex['file_name'].str.contains(f'230650_XGB_complex_SOAp')]['relative_profit'].astype(float).mean()
df_complex_heterogenity_small.loc[0, 'xgb_soanp'] = df_complex[df_complex['file_name'].str.contains(f'230650_XGB_complex_SOAnp')]['relative_profit'].astype(float).mean()


for i in range(1,6):
    df_complex_heterogenity_small.loc[i, 'ann_ioa'] = df_complex[df_complex['file_name'].str.contains(f'23065{10*i}no_ANN_complex_IOA')]['relative_profit'].astype(float).mean()
    df_complex_heterogenity_small.loc[i, 'ann_soap'] = df_complex[df_complex['file_name'].str.contains(f'23065{10*i}no_ANN_complex_SOAp')]['relative_profit'].astype(float).mean()
    df_complex_heterogenity_small.loc[i, 'ann_soanp'] = df_complex[df_complex['file_name'].str.contains(f'23065{10*i}no_ANN_complex_SOAnp')]['relative_profit'].astype(float).mean()
    df_complex_heterogenity_small.loc[i, 'xgb_ioa'] = df_complex[df_complex['file_name'].str.contains(f'23065{10*i}no_XGB_complex_IOA')]['relative_profit'].astype(float).mean()
    df_complex_heterogenity_small.loc[i, 'xgb_soap'] = df_complex[df_complex['file_name'].str.contains(f'23065{10*i}no_XGB_complex_SOAp')]['relative_profit'].astype(float).mean()
    df_complex_heterogenity_small.loc[i, 'xgb_soanp'] = df_complex[df_complex['file_name'].str.contains(f'23065{10*i}no_XGB_complex_SOAnp')]['relative_profit'].astype(float).mean()

df_complex_heterogenity_small['Cochran´s Q'] = [Q_0_m, Q_10_m, Q_20_m, Q_30_m, Q_40_m, Q_50_m]

print(df_complex_heterogenity_small)

    ann_ioa  ann_soap  ann_soanp   xgb_ioa  xgb_soap  xgb_soanp  Cochran´s Q
0  0.966058  0.940309   0.940512  0.965459  0.952436   0.952436          NaN
1  0.958255  0.974294   0.973375  0.961705  0.956312   0.956312     1.350184
2  0.955797  0.915235   0.908281  0.918226  0.890185   0.890185     2.367605
3  0.225273  0.905311   0.902235  0.909248  0.876997   0.876992     3.110097
4  0.945511  0.891023   0.885111  0.906350  0.849323   0.849323     4.284557
5  0.941456  0.854319   0.841801  0.887747  0.821586   0.821578     6.182867


# Elapsed Time

### simple - basis var

In [21]:
# Initialize DataFrame with the correct dtype for relative_profit values
df_simple_size = pd.DataFrame(0.0, index=range(5), columns=['ann_ioa', 'ann_soap', 'ann_soanp', 'xgb_ioa', 'xgb_soap', 'xgb_soanp'])

# Use .loc for assignments and ensure values are cast to float
for i in range(5):
    df_simple_size.loc[i, 'ann_ioa'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}30650_ANN_simple_IOA')]['elapsed_time'].astype(float).values
    df_simple_size.loc[i, 'ann_soap'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}30650_ANN_simple_SOAp')]['elapsed_time'].astype(float).values
    df_simple_size.loc[i, 'ann_soanp'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}30650_ANN_simple_SOAnp')]['elapsed_time'].astype(float).values    
    df_simple_size.loc[i, 'xgb_ioa'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}30650_XGB_simple_IOA')]['elapsed_time'].astype(float).values
    df_simple_size.loc[i, 'xgb_soap'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}30650_XGB_simple_SOAp')]['elapsed_time'].astype(float).values
    df_simple_size.loc[i, 'xgb_soanp'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}30650_XGB_simple_SOAnp')]['elapsed_time'].astype(float).values
    

print(df_simple_size)

       ann_ioa     ann_soap    ann_soanp     xgb_ioa    xgb_soap   xgb_soanp
0   423.664309   608.046489   624.473018  310.513162  312.485574  312.485169
1   435.976349   365.817610   365.797994  352.715270  327.048781  327.046369
2   438.748961   539.041733   539.040427  372.529320  283.779637  283.779284
3  1093.965960  1348.775359  1348.774147  277.047058  304.279848  304.279789
4  4612.916231  5841.503571  5841.485346  355.386725  367.057751  367.057562


### simple - high var

In [22]:
df_simple_size_var200 = pd.DataFrame(0.0, index=range(4), columns=['ann_ioa', 'ann_soap', 'ann_soanp', 'xgb_ioa', 'xgb_soap', 'xgb_soanp'])

for i in range(5):
    df_simple_size_var200.loc[i, 'ann_ioa'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}306200_ANN_simple_IOA')]['elapsed_time'].astype(float).values
    df_simple_size_var200.loc[i, 'ann_soap'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}306200_ANN_simple_SOAp')]['elapsed_time'].astype(float).values
    df_simple_size_var200.loc[i, 'ann_soanp'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}306200_ANN_simple_SOAnp')]['elapsed_time'].astype(float).values
    df_simple_size_var200.loc[i, 'xgb_ioa'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}306200_XGB_simple_IOA')]['elapsed_time'].astype(float).values
    df_simple_size_var200.loc[i, 'xgb_soap'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}306200_XGB_simple_SOAp')]['elapsed_time'].astype(float).values
    df_simple_size_var200.loc[i, 'xgb_soanp'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}306200_XGB_simple_SOAnp')]['elapsed_time'].astype(float).values

print(df_simple_size_var200)

       ann_ioa     ann_soap    ann_soanp     xgb_ioa    xgb_soap   xgb_soanp
0   365.438847   688.687054   688.669945  321.491835  350.344737  350.344541
1   364.742442   586.807392   586.805333  334.693431  285.467420  285.461613
2   556.237807   640.833177   640.815724  348.469695  330.278955  330.280093
3   926.330657  1046.621161  1046.620563  277.296389  270.670218  270.669974
4  6052.384864  3978.643239  3978.643490  471.934483  307.116521  307.116829


### complex - basis var

In [23]:
df_complex_size = pd.DataFrame(0.0, index=range(5), columns=['ann_ioa', 'ann_soap', 'ann_soanp', 'xgb_ioa', 'xgb_soap', 'xgb_soanp'])

for i in range(5):
    df_complex_size.loc[i, 'ann_ioa'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}30650_ANN_complex_IOA')]['elapsed_time'].astype(float).mean()
    df_complex_size.loc[i, 'ann_soap'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}30650_ANN_complex_SOAp')]['elapsed_time'].astype(float).mean()
    df_complex_size.loc[i, 'ann_soanp'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}30650_ANN_complex_SOAnp')]['elapsed_time'].astype(float).mean()
    df_complex_size.loc[i, 'xgb_ioa'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}30650_XGB_complex_IOA')]['elapsed_time'].astype(float).mean()
    df_complex_size.loc[i, 'xgb_soap'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}30650_XGB_complex_SOAp')]['elapsed_time'].astype(float).mean()
    df_complex_size.loc[i, 'xgb_soanp'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}30650_XGB_complex_SOAnp')]['elapsed_time'].astype(float).mean()

print(df_complex_size)

       ann_ioa     ann_soap    ann_soanp      xgb_ioa     xgb_soap  \
0   385.756685   689.942904   680.731759   151.423028   227.574907   
1   395.776927   632.139911   628.357375   170.102392  1046.437113   
2   598.089324   713.090926   705.471764   203.463330  1024.228453   
3   968.945080  1548.125519  1513.206116  1659.576647  1598.127457   
4  8114.167667  5525.452829  5514.386474  1062.531468  1148.578070   

     xgb_soanp  
0   223.204138  
1  1066.232464  
2  1043.964876  
3  1600.545804  
4  1166.693915  


### complex - high var

In [24]:
df_complex_size_var200 = pd.DataFrame(0.0, index=range(4), columns=['ann_ioa', 'ann_soap', 'ann_soanp', 'xgb_ioa', 'xgb_soap', 'xgb_soanp'])

for i in range(5):
    df_complex_size_var200.loc[i, 'ann_ioa'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}306200_ANN_complex_IOA')]['elapsed_time'].astype(float).mean()
    df_complex_size_var200.loc[i, 'ann_soap'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}306200_ANN_complex_SOAp')]['elapsed_time'].astype(float).mean()
    df_complex_size_var200.loc[i, 'ann_soanp'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}306200_ANN_complex_SOAnp')]['elapsed_time'].astype(float).mean()
    df_complex_size_var200.loc[i, 'xgb_ioa'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}306200_XGB_complex_IOA')]['elapsed_time'].astype(float).mean()
    df_complex_size_var200.loc[i, 'xgb_soap'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}306200_XGB_complex_SOAp')]['elapsed_time'].astype(float).mean()
    df_complex_size_var200.loc[i, 'xgb_soanp'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}306200_XGB_complex_SOAnp')]['elapsed_time'].astype(float).mean()

print(df_complex_size_var200)

       ann_ioa     ann_soap    ann_soanp      xgb_ioa     xgb_soap  \
0   377.080958   501.560345   476.508161   316.718548   441.143211   
1   382.171129   501.128040   486.626281   458.304004   427.258550   
2   435.989852   642.240051   629.571339   262.375064  1059.056955   
3   921.126149  1122.060366  1109.749612   211.237675  1155.647667   
4  7272.012689  4842.427354  4825.692755  2120.766047  1166.262467   

     xgb_soanp  
0   428.180393  
1   420.951300  
2  1078.600045  
3  1174.230778  
4  1186.558450  


# Average Memory Consumption

### simple - basis var

In [25]:
df_simple_size = pd.DataFrame(0.0, index=range(5), columns=['ann_ioa', 'ann_soap', 'ann_soanp', 'xgb_ioa', 'xgb_soap', 'xgb_soanp'])

for i in range(5):
    df_simple_size.loc[i, 'ann_ioa'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}30650_ANN_simple_IOA')]['avg_memory'].astype(float).mean()
    df_simple_size.loc[i, 'ann_soap'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}30650_ANN_simple_SOAp')]['avg_memory'].astype(float).mean()
    df_simple_size.loc[i, 'ann_soanp'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}30650_ANN_simple_SOAnp')]['avg_memory'].astype(float).mean()
    df_simple_size.loc[i, 'xgb_ioa'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}30650_XGB_simple_IOA')]['avg_memory'].astype(float).mean()
    df_simple_size.loc[i, 'xgb_soap'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}30650_XGB_simple_SOAp')]['avg_memory'].astype(float).mean()
    df_simple_size.loc[i, 'xgb_soanp'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}30650_XGB_simple_SOAnp')]['avg_memory'].astype(float).mean()

print(df_simple_size)

       ann_ioa     ann_soap    ann_soanp      xgb_ioa      xgb_soap  \
0  1942.411554  4096.453125  4301.968750  4161.143630   4526.777344   
1  2032.693157  2168.148438  2168.148438  4699.202164   4991.667969   
2  1833.999270  2603.234375  2603.250000  5340.901266   4658.074219   
3  2741.428422  4491.985943  4491.985943  4494.936523   5960.789062   
4  2871.348979  5498.613693  5498.613693  5351.057575  15649.083984   

      xgb_soanp  
0   4526.753906  
1   4991.667969  
2   4658.074219  
3   5960.789062  
4  15649.083984  


### simple - high var

In [26]:
df_simple_size_var200 = pd.DataFrame(0.0, index=range(4), columns=['ann_ioa', 'ann_soap', 'ann_soanp', 'xgb_ioa', 'xgb_soap', 'xgb_soanp'])

for i in range(5):
    df_simple_size_var200.loc[i, 'ann_ioa'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}306200_ANN_simple_IOA')]['avg_memory'].astype(float).mean()
    df_simple_size_var200.loc[i, 'ann_soap'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}306200_ANN_simple_SOAp')]['avg_memory'].astype(float).mean()
    df_simple_size_var200.loc[i, 'ann_soanp'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}306200_ANN_simple_SOAnp')]['avg_memory'].astype(float).mean()
    df_simple_size_var200.loc[i, 'xgb_ioa'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}306200_XGB_simple_IOA')]['avg_memory'].astype(float).mean()
    df_simple_size_var200.loc[i, 'xgb_soap'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}306200_XGB_simple_SOAp')]['avg_memory'].astype(float).mean()
    df_simple_size_var200.loc[i, 'xgb_soanp'] = df_simple[df_simple['file_name'].str.contains(f'{i+1}306200_XGB_simple_SOAnp')]['avg_memory'].astype(float).mean()

print(df_simple_size_var200)


       ann_ioa     ann_soap    ann_soanp      xgb_ioa      xgb_soap  \
0  1866.281820  4678.167969  4678.191406  4340.767004   4880.109375   
1  1927.842453  4602.832031  4602.878906  4645.538187   4226.914062   
2  2808.122784  4861.707031  4861.785156  5111.532665   5356.562500   
3  2655.780388  4439.832031  4439.886719  4583.034923   5849.613281   
4  3428.224405  4565.425611  4565.425611  5739.666374  13209.753348   

      xgb_soanp  
0   4880.109375  
1   4226.914062  
2   5356.562500  
3   5849.613281  
4  13209.753348  


### complex - basis var

In [27]:
df_complex_size = pd.DataFrame(0.0, index=range(5), columns=['ann_ioa', 'ann_soap', 'ann_soanp', 'xgb_ioa', 'xgb_soap', 'xgb_soanp'])

for i in range(5):
    df_complex_size.loc[i, 'ann_ioa'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}30650_ANN_complex_IOA')]['peak_memory'].astype(float).mean()
    df_complex_size.loc[i, 'ann_soap'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}30650_ANN_complex_SOAp')]['peak_memory'].astype(float).mean()
    df_complex_size.loc[i, 'ann_soanp'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}30650_ANN_complex_SOAnp')]['peak_memory'].astype(float).mean()
    df_complex_size.loc[i, 'xgb_ioa'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}30650_XGB_complex_IOA')]['peak_memory'].astype(float).mean()
    df_complex_size.loc[i, 'xgb_soap'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}30650_XGB_complex_SOAp')]['peak_memory'].astype(float).mean()
    df_complex_size.loc[i, 'xgb_soanp'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}30650_XGB_complex_SOAnp')]['peak_memory'].astype(float).mean()

print(df_complex_size)


       ann_ioa      ann_soap     ann_soanp       xgb_ioa      xgb_soap  \
0  2559.113281   6679.226562  10214.804688   2200.406250   4682.664062   
1  2861.375000   6578.429688  10208.214844   2660.019531  15052.500000   
2  3841.792969   6555.558594  10202.027344   3063.261719  15001.496094   
3  3291.453125  12159.195312  16245.003906  16691.015625  21449.457031   
4  3752.714844   8117.250000  12186.191406   2284.605469  17567.949219   

      xgb_soanp  
0   8738.257812  
1  19103.050781  
2  19053.703125  
3  25515.007812  
4  21619.593750  


### complex - high var

In [28]:
df_complex_size_var200 = pd.DataFrame(0.0, index=range(4), columns=['ann_ioa', 'ann_soap', 'ann_soanp', 'xgb_ioa', 'xgb_soap', 'xgb_soanp'])

for i in range(5):
    df_complex_size_var200.loc[i, 'ann_ioa'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}306200_ANN_complex_IOA')]['peak_memory'].astype(float).mean()
    df_complex_size_var200.loc[i, 'ann_soap'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}306200_ANN_complex_SOAp')]['peak_memory'].astype(float).mean()
    df_complex_size_var200.loc[i, 'ann_soanp'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}306200_ANN_complex_SOAnp')]['peak_memory'].astype(float).mean()
    df_complex_size_var200.loc[i, 'xgb_ioa'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}306200_XGB_complex_IOA')]['peak_memory'].astype(float).mean()
    df_complex_size_var200.loc[i, 'xgb_soap'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}306200_XGB_complex_SOAp')]['peak_memory'].astype(float).mean()
    df_complex_size_var200.loc[i, 'xgb_soanp'] = df_complex[df_complex['file_name'].str.contains(f'{i+1}306200_XGB_complex_SOAnp')]['peak_memory'].astype(float).mean()

print(df_complex_size_var200)

       ann_ioa     ann_soap     ann_soanp       xgb_ioa      xgb_soap  \
0  2686.074219  6431.101562  10472.886719   4220.238281   8423.648438   
1  2833.093750  6471.964844  10512.910156   6137.253906   8147.285156   
2  2770.933594  6753.714844  10815.011719   4229.035156  15195.593750   
3  2947.367188  6867.011719  10957.515625   2742.156250  15379.257812   
4  3403.390625  7243.128906  11281.761719  15136.058594  16490.285156   

      xgb_soanp  
0  12542.144531  
1  12222.863281  
2  19271.769531  
3  19412.410156  
4  20557.121094  


# Risk - Cost Structure

### Calculation optimal profit and quantile

In [29]:
metadata_risk = []

# Loop through the risk levels and load metadata with risk level included
for risk_level in [1.5, 2, 2.5, 3, 3.5, 4]:
    # Load the metadata for the current risk level
    if risk_level == 1.5:
        r = 15
    elif risk_level == 2.5:
        r = 25
    elif risk_level == 3.5:
        r = 35
    else:
        r = int(risk_level)
    current_metadata = load_metadata(meta_path+f'Risk-{r}')
    # Append the risk level to each dictionary in the list
    for item in current_metadata:
        item['risk_level'] = risk_level
    # Extend the main list with the modified current list
    metadata_risk.extend(current_metadata)

# Convert the list of dictionaries to a DataFrame
metadata_risk = pd.DataFrame(metadata_risk)

# Filter rows where 'file_name' contains 'simple'
df_risk_simple = metadata_risk[metadata_risk['file_name'].str.contains('simple')]
df_risk_complex = metadata_risk[metadata_risk['file_name'].str.contains('complex')]

# Filter df_simple for rows where 'file_name' contains '430650'
df_simple_430650 = df_simple[df_simple['file_name'].str.contains('430650')]
df_simple_430650['risk_level'] = 1
df_complex_430650 = df_complex[df_complex['file_name'].str.contains('430650')]
df_complex_430650['risk_level'] = 1
df_simple_230650 = df_simple[df_simple['file_name'].str.contains('230650')]
df_simple_230650['risk_level'] = 1
df_complex_230650 = df_complex[df_complex['file_name'].str.contains('230650')]
df_complex_230650['risk_level'] = 1
df_simple_4306200 = df_simple[df_simple['file_name'].str.contains('4306200')]
df_simple_4306200['risk_level'] = 1
df_complex_4306200 = df_complex[df_complex['file_name'].str.contains('4306200')]
df_complex_4306200['risk_level'] = 1

# Concatenate these rows with df_risk_simple
df_risk_simple = pd.concat([df_risk_simple, df_simple_430650], ignore_index=True)
df_risk_complex = pd.concat([df_risk_complex, df_complex_430650], ignore_index=True)
df_risk_simple = pd.concat([df_risk_simple, df_simple_230650], ignore_index=True)
df_risk_complex = pd.concat([df_risk_complex, df_complex_230650], ignore_index=True)
df_risk_simple = pd.concat([df_risk_simple, df_simple_4306200], ignore_index=True)
df_risk_complex = pd.concat([df_risk_complex, df_complex_4306200], ignore_index=True)

# Get cost structures for different risk levels
underage_risk_1, overage_risk_1, alpha_risk_1, underage_single_risk_1, overage_single_risk_1 = get_constants(1)
underage_risk_15, overage_risk_15, alpha_risk_15, underage_single_risk_15, overage_single_risk_15 = get_constants(1.5)
underage_risk_2, overage_risk_2, alpha_risk_2, underage_single_risk_2, overage_single_risk_2 = get_constants(2)
underage_risk_25, overage_risk_25, alpha_risk_25, underage_single_risk_25, overage_single_risk_25 = get_constants(2.5)
underage_risk_3, overage_risk_3, alpha_risk_3, underage_single_risk_3, overage_single_risk_3 = get_constants(3)
underage_risk_35, overage_risk_35, alpha_risk_35, underage_single_risk_35, overage_single_risk_35 = get_constants(3.5)
underage_risk_4, overage_risk_4, alpha_risk_4, underage_single_risk_4, overage_single_risk_4 = get_constants(4)

# Calculate the maximum profit for each risk level
X_train, y_train, X_val, y_val, X_test, y_test = load_generated_data(path=data_path+'Size/Low_variance/set_430650_data.h5', multi=False)

load_cost_structure(underage_input=underage_single_risk_1, overage_input=overage_single_risk_1, alpha_input=alpha_risk_1)
max_profit_risk_1 = nvps_profit(y_test, y_test)
quantile_risk_1 = underage_single_risk_1 / (underage_single_risk_1 + overage_single_risk_1)
load_cost_structure(underage_input=underage_single_risk_15, overage_input=overage_single_risk_15, alpha_input=alpha_risk_15)
max_profit_risk_15 = nvps_profit(y_test, y_test)
quantile_risk_15 = underage_single_risk_15 / (underage_single_risk_15 + overage_single_risk_15)
load_cost_structure(underage_input=underage_single_risk_2, overage_input=overage_single_risk_2, alpha_input=alpha_risk_2)
max_profit_risk_2 = nvps_profit(y_test, y_test)
quantile_risk_2 = underage_single_risk_2 / (underage_single_risk_2 + overage_single_risk_2)
load_cost_structure(underage_input=underage_single_risk_25, overage_input=overage_single_risk_25, alpha_input=alpha_risk_25)
max_profit_risk_25 = nvps_profit(y_test, y_test)
quantile_risk_25 = underage_single_risk_25 / (underage_single_risk_25 + overage_single_risk_25)
load_cost_structure(underage_input=underage_single_risk_3, overage_input=overage_single_risk_3, alpha_input=alpha_risk_3)
max_profit_risk_3 = nvps_profit(y_test, y_test)
quantile_risk_3 = underage_single_risk_3 / (underage_single_risk_3 + overage_single_risk_3) 
load_cost_structure(underage_input=underage_single_risk_35, overage_input=overage_single_risk_35, alpha_input=alpha_risk_35)
max_profit_risk_35 = nvps_profit(y_test, y_test)
quantile_risk_35 = underage_single_risk_35 / (underage_single_risk_35 + overage_single_risk_35)
load_cost_structure(underage_input=underage_single_risk_4, overage_input=overage_single_risk_4, alpha_input=alpha_risk_4)
max_profit_risk_4 = nvps_profit(y_test, y_test)
quantile_risk_4 = underage_single_risk_4 / (underage_single_risk_4 + overage_single_risk_4)

# Calculate the maximum profit for each risk level
X_train, y_train, X_val, y_val, X_test, y_test = load_generated_data(path=data_path+'Size/Low_variance/set_430650_data.h5', multi=True)

load_cost_structure(underage_input=underage_risk_1, overage_input=overage_risk_1, alpha_input=alpha_risk_1)
max_profit_multi_risk_1 = nvps_profit(y_test, y_test)
quantile_risk_multi_1 = underage_risk_1 / (underage_risk_1 + overage_risk_1)
load_cost_structure(underage_input=underage_risk_15, overage_input=overage_risk_15, alpha_input=alpha_risk_15)
max_profit_multi_risk_15 = nvps_profit(y_test, y_test)
quantile_risk_multi_15 = underage_risk_15 / (underage_risk_15 + overage_risk_15)
load_cost_structure(underage_input=underage_risk_2, overage_input=overage_risk_2, alpha_input=alpha_risk_2)
max_profit_multi_risk_2 = nvps_profit(y_test, y_test)
quantile_risk_multi_2 = underage_risk_2 / (underage_risk_2 + overage_risk_2)
load_cost_structure(underage_input=underage_risk_25, overage_input=overage_risk_25, alpha_input=alpha_risk_25)
max_profit_multi_risk_25 = nvps_profit(y_test, y_test)
quantile_risk_multi_25 = underage_risk_25 / (underage_risk_25 + overage_risk_25)
load_cost_structure(underage_input=underage_risk_3, overage_input=overage_risk_3, alpha_input=alpha_risk_3)
max_profit_multi_risk_3 = nvps_profit(y_test, y_test)
quantile_risk_multi_3 = underage_risk_3 / (underage_risk_3 + overage_risk_3)
load_cost_structure(underage_input=underage_risk_35, overage_input=overage_risk_35, alpha_input=alpha_risk_35)
max_profit_multi_risk_35 = nvps_profit(y_test, y_test)
quantile_risk_multi_35 = underage_risk_35 / (underage_risk_35 + overage_risk_35)
load_cost_structure(underage_input=underage_risk_4, overage_input=overage_risk_4, alpha_input=alpha_risk_4)
max_profit_multi_risk_4 = nvps_profit(y_test, y_test)
quantile_risk_multi_4 = underage_risk_4 / (underage_risk_4 + overage_risk_4)

# Calculate the relative profit for each risk level
df_risk_simple.loc[df_risk_simple['risk_level']==1, 'relative_profit'] = df_risk_simple['profit'] / max_profit_risk_1
df_risk_simple.loc[df_risk_simple['risk_level']==1.5, 'relative_profit'] = df_risk_simple['profit'] / max_profit_risk_15
df_risk_simple.loc[df_risk_simple['risk_level']==2, 'relative_profit'] = df_risk_simple['profit'] / max_profit_risk_2
df_risk_simple.loc[df_risk_simple['risk_level']==2.5, 'relative_profit'] = df_risk_simple['profit'] / max_profit_risk_25
df_risk_simple.loc[df_risk_simple['risk_level']==3, 'relative_profit'] = df_risk_simple['profit'] / max_profit_risk_3
df_risk_simple.loc[df_risk_simple['risk_level']==3.5, 'relative_profit'] = df_risk_simple['profit'] / max_profit_risk_35
df_risk_simple.loc[df_risk_simple['risk_level']==4, 'relative_profit'] = df_risk_simple['profit'] / max_profit_risk_4

df_risk_complex.loc[df_risk_complex['risk_level']==1, 'relative_profit'] = df_risk_complex['profit'] / max_profit_multi_risk_1
df_risk_complex.loc[df_risk_complex['risk_level']==1.5, 'relative_profit'] = df_risk_complex['profit'] / max_profit_multi_risk_15
df_risk_complex.loc[df_risk_complex['risk_level']==2, 'relative_profit'] = df_risk_complex['profit'] / max_profit_multi_risk_2
df_risk_complex.loc[df_risk_complex['risk_level']==2.5, 'relative_profit'] = df_risk_complex['profit'] / max_profit_multi_risk_25
df_risk_complex.loc[df_risk_complex['risk_level']==3, 'relative_profit'] = df_risk_complex['profit'] / max_profit_multi_risk_3
df_risk_complex.loc[df_risk_complex['risk_level']==3.5, 'relative_profit'] = df_risk_complex['profit'] / max_profit_multi_risk_35
df_risk_complex.loc[df_risk_complex['risk_level']==4, 'relative_profit'] = df_risk_complex['profit'] / max_profit_multi_risk_4

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_simple_430650['risk_level'] = 1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_complex_430650['risk_level'] = 1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_simple_230650['risk_level'] = 1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_i

### simple - basis size

In [30]:
df_risk_simple_show = pd.DataFrame(0.0, index=range(7), columns=['ann_ioa', 'ann_soap', 'ann_soanp', 'xgb_ioa', 'xgb_soap', 'xgb_soanp'])

for  i in range(7):
    df_risk_simple_show.loc[i, 'ann_ioa'] = df_risk_simple[(df_risk_simple['risk_level']==1+i*0.5) & df_risk_simple['file_name'].str.contains('430650_ANN_simple_IOA')]['relative_profit'].astype(float).mean()
    df_risk_simple_show.loc[i, 'ann_soap'] = df_risk_simple[(df_risk_simple['risk_level']==1+i*0.5) & df_risk_simple['file_name'].str.contains('430650_ANN_simple_SOAp')]['relative_profit'].astype(float).mean()
    df_risk_simple_show.loc[i, 'ann_soanp'] = df_risk_simple[(df_risk_simple['risk_level']==1+i*0.5) & df_risk_simple['file_name'].str.contains('430650_ANN_simple_SOAnp')]['relative_profit'].astype(float).mean()
    df_risk_simple_show.loc[i, 'xgb_ioa'] = df_risk_simple[(df_risk_simple['risk_level']==1+i*0.5) & df_risk_simple['file_name'].str.contains('430650_XGB_simple_IOA')]['relative_profit'].astype(float).mean()
    df_risk_simple_show.loc[i, 'xgb_soap'] = df_risk_simple[(df_risk_simple['risk_level']==1+i*0.5) & df_risk_simple['file_name'].str.contains('430650_XGB_simple_SOAp')]['relative_profit'].astype(float).mean()
    df_risk_simple_show.loc[i, 'xgb_soanp'] = df_risk_simple[(df_risk_simple['risk_level']==1+i*0.5) & df_risk_simple['file_name'].str.contains('430650_XGB_simple_SOAnp')]['relative_profit'].astype(float).mean()


df_risk_simple_show['quantile'] = [quantile_risk_1, quantile_risk_15, quantile_risk_2, quantile_risk_25, quantile_risk_3, quantile_risk_35, quantile_risk_4]
print(df_risk_simple_show)

    ann_ioa  ann_soap  ann_soanp   xgb_ioa  xgb_soap  xgb_soanp  quantile
0  0.913769  0.903814   0.906451  0.910251  0.887815   0.888861  0.827586
1  0.864408  0.856148   0.855187  0.861136  0.802854   0.802078  0.724138
2  0.817568  0.808865   0.797864  0.807688  0.786256   0.788583  0.620690
3  0.768082  0.758185   0.737135  0.759655  0.709491   0.708695  0.517241
4  0.713470  0.675966   0.664136  0.700499  0.676734   0.677159  0.413793
5  0.598319  0.585802   0.573065  0.594909  0.588006   0.590310  0.310345
6  0.532292  0.438961   0.434277  0.510993  0.374787   0.383401  0.206897


### simple - small size

In [31]:
df_risk_simple_small = pd.DataFrame(0.0, index=range(7), columns=['ann_ioa', 'ann_soap', 'ann_soanp', 'xgb_ioa', 'xgb_soap', 'xgb_soanp'])

for  i in range(7):
    df_risk_simple_small.loc[i, 'ann_ioa'] = df_risk_simple[(df_risk_simple['risk_level']==1+i*0.5) & df_risk_simple['file_name'].str.contains('230650_ANN_simple_IOA')]['relative_profit'].astype(float).mean()
    df_risk_simple_small.loc[i, 'ann_soap'] = df_risk_simple[(df_risk_simple['risk_level']==1+i*0.5) & df_risk_simple['file_name'].str.contains('230650_ANN_simple_SOAp')]['relative_profit'].astype(float).mean()
    df_risk_simple_small.loc[i, 'ann_soanp'] = df_risk_simple[(df_risk_simple['risk_level']==1+i*0.5) & df_risk_simple['file_name'].str.contains('230650_ANN_simple_SOAnp')]['relative_profit'].astype(float).mean()
    df_risk_simple_small.loc[i, 'xgb_ioa'] = df_risk_simple[(df_risk_simple['risk_level']==1+i*0.5) & df_risk_simple['file_name'].str.contains('230650_XGB_simple_IOA')]['relative_profit'].astype(float).mean()
    df_risk_simple_small.loc[i, 'xgb_soap'] = df_risk_simple[(df_risk_simple['risk_level']==1+i*0.5) & df_risk_simple['file_name'].str.contains('230650_XGB_simple_SOAp')]['relative_profit'].astype(float).mean()
    df_risk_simple_small.loc[i, 'xgb_soanp'] = df_risk_simple[(df_risk_simple['risk_level']==1+i*0.5) & df_risk_simple['file_name'].str.contains('230650_XGB_simple_SOAnp')]['relative_profit'].astype(float).mean()

df_risk_simple_small['quantile'] = [quantile_risk_1, quantile_risk_15, quantile_risk_2, quantile_risk_25, quantile_risk_3, quantile_risk_35, quantile_risk_4]
print(df_risk_simple_small)

    ann_ioa  ann_soap  ann_soanp   xgb_ioa  xgb_soap  xgb_soanp  quantile
0  0.896691  0.898209   0.897659  0.839697  0.742786   0.742457  0.827586
1  0.852625  0.829364   0.849290  0.781976  0.726155   0.726155  0.724138
2  0.801126  0.788933   0.801199  0.763631  0.674459   0.674352  0.620690
3  0.769838  0.630656   0.741359  0.713167  0.582960   0.582960  0.517241
4  0.696278  0.678002   0.662326  0.656757  0.592817   0.593195  0.413793
5  0.637042  0.344452   0.558171  0.553123  0.397603   0.398014  0.310345
6  0.505445  0.369987   0.395285  0.461625  0.245563   0.245563  0.206897


### simple - high var

In [32]:
df_risk_simple_var200 = pd.DataFrame(0.0, index=range(7), columns=['ann_ioa', 'ann_soap', 'ann_soanp', 'xgb_ioa', 'xgb_soap', 'xgb_soanp'])

for  i in range(7):
    df_risk_simple_var200.loc[i, 'ann_ioa'] = df_risk_simple[(df_risk_simple['risk_level']==1+i*0.5) & df_risk_simple['file_name'].str.contains('4306200_ANN_simple_IOA')]['relative_profit'].astype(float).mean()
    df_risk_simple_var200.loc[i, 'ann_soap'] = df_risk_simple[(df_risk_simple['risk_level']==1+i*0.5) & df_risk_simple['file_name'].str.contains('4306200_ANN_simple_SOAp')]['relative_profit'].astype(float).mean()
    df_risk_simple_var200.loc[i, 'ann_soanp'] = df_risk_simple[(df_risk_simple['risk_level']==1+i*0.5) & df_risk_simple['file_name'].str.contains('4306200_ANN_simple_SOAnp')]['relative_profit'].astype(float).mean()
    df_risk_simple_var200.loc[i, 'xgb_ioa'] = df_risk_simple[(df_risk_simple['risk_level']==1+i*0.5) & df_risk_simple['file_name'].str.contains('4306200_XGB_simple_IOA')]['relative_profit'].astype(float).mean()
    df_risk_simple_var200.loc[i, 'xgb_soap'] = df_risk_simple[(df_risk_simple['risk_level']==1+i*0.5) & df_risk_simple['file_name'].str.contains('4306200_XGB_simple_SOAp')]['relative_profit'].astype(float).mean()
    df_risk_simple_var200.loc[i, 'xgb_soanp'] = df_risk_simple[(df_risk_simple['risk_level']==1+i*0.5) & df_risk_simple['file_name'].str.contains('4306200_XGB_simple_SOAnp')]['relative_profit'].astype(float).mean()

df_risk_simple_var200['quantile'] = [quantile_risk_1, quantile_risk_15, quantile_risk_2, quantile_risk_25, quantile_risk_3, quantile_risk_35, quantile_risk_4]
print(df_risk_simple_var200)

    ann_ioa  ann_soap  ann_soanp   xgb_ioa  xgb_soap  xgb_soanp  quantile
0  0.827523  0.807705   0.808337  0.829466  0.763350   0.766345  0.827586
1  0.744377  0.718204   0.704227  0.734079  0.690088   0.703270  0.724138
2  0.654060  0.632031   0.609996  0.642951  0.607015   0.619118  0.620690
3  0.560029  0.526657   0.504147  0.550143  0.521297   0.525790  0.517241
4  0.455093  0.398107   0.374976  0.429134  0.407313   0.400839  0.413793
5  0.329438  0.235636   0.215749  0.268570  0.281133   0.291703  0.310345
6  0.157916  0.007893  -0.017727  0.109516 -0.028212  -0.060031  0.206897


### complex - basis size

In [33]:
df_risk_complex_show = pd.DataFrame(0.0, index=range(7), columns=['ann_ioa', 'ann_soap', 'ann_soanp', 'xgb_ioa', 'xgb_soap', 'xgb_soanp'])

for i in range(7):
    df_risk_complex_show.loc[i, 'ann_ioa'] = df_risk_complex[(df_risk_complex['risk_level']==1+i*0.5)& df_risk_complex['file_name'].str.contains('430650_ANN_complex_IOA')]['relative_profit'].astype(float).mean()
    df_risk_complex_show.loc[i, 'ann_soap'] = df_risk_complex[(df_risk_complex['risk_level']==1+i*0.5)& df_risk_complex['file_name'].str.contains('430650_ANN_complex_SOAp')]['relative_profit'].astype(float).mean()
    df_risk_complex_show.loc[i, 'ann_soanp'] = df_risk_complex[(df_risk_complex['risk_level']==1+i*0.5)& df_risk_complex['file_name'].str.contains('430650_ANN_complex_SOAnp')]['relative_profit'].astype(float).mean()
    df_risk_complex_show.loc[i, 'xgb_ioa'] = df_risk_complex[(df_risk_complex['risk_level']==1+i*0.5)& df_risk_complex['file_name'].str.contains('430650_XGB_complex_IOA')]['relative_profit'].astype(float).mean()
    df_risk_complex_show.loc[i, 'xgb_soap'] = df_risk_complex[(df_risk_complex['risk_level']==1+i*0.5)& df_risk_complex['file_name'].str.contains('430650_XGB_complex_SOAp')]['relative_profit'].astype(float).mean()
    df_risk_complex_show.loc[i, 'xgb_soanp'] = df_risk_complex[(df_risk_complex['risk_level']==1+i*0.5)& df_risk_complex['file_name'].str.contains('430650_XGB_complex_SOAnp')]['relative_profit'].astype(float).mean()

df_risk_complex_show['quantile'] = [np.mean(quantile_risk_multi_1), np.mean(quantile_risk_multi_15), np.mean(quantile_risk_multi_2), np.mean(quantile_risk_multi_25), np.mean(quantile_risk_multi_3), np.mean(quantile_risk_multi_35), np.mean(quantile_risk_multi_4)]

print(df_risk_complex_show)

    ann_ioa  ann_soap  ann_soanp   xgb_ioa  xgb_soap  xgb_soanp  quantile
0  0.992953  0.966285   0.966284  0.969654  0.942793   0.941935  0.889113
1  0.974786  0.966258   0.967464  0.957256  0.927660   0.927206  0.822581
2  0.975057  0.967154   0.968788  0.945007  0.920943   0.920907  0.756048
3  0.968460  0.970718   0.970320  0.939168  0.947983   0.947830  0.689516
4  0.874225  0.972425   0.972316  0.930774  0.945642   0.945420  0.622984
5  0.973613  0.974823   0.974501  0.926123  0.954629   0.954777  0.556451
6  0.970816  0.978651   0.977342  0.927565  0.937647   0.936755  0.489919


### complex - small size

In [34]:
df_risk_complex_small = pd.DataFrame(0.0, index=range(7), columns=['ann_ioa', 'ann_soap', 'ann_soanp', 'xgb_ioa', 'xgb_soap', 'xgb_soanp'])

for i in range(7):
    df_risk_complex_small.loc[i, 'ann_ioa'] = df_risk_complex[(df_risk_complex['risk_level']==1+i*0.5)& df_risk_complex['file_name'].str.contains('230650_ANN_complex_IOA')]['relative_profit'].astype(float).mean()
    df_risk_complex_small.loc[i, 'ann_soap'] = df_risk_complex[(df_risk_complex['risk_level']==1+i*0.5)& df_risk_complex['file_name'].str.contains('230650_ANN_complex_SOAp')]['relative_profit'].astype(float).mean()
    df_risk_complex_small.loc[i, 'ann_soanp'] = df_risk_complex[(df_risk_complex['risk_level']==1+i*0.5)& df_risk_complex['file_name'].str.contains('230650_ANN_complex_SOAnp')]['relative_profit'].astype(float).mean()
    df_risk_complex_small.loc[i, 'xgb_ioa'] = df_risk_complex[(df_risk_complex['risk_level']==1+i*0.5)& df_risk_complex['file_name'].str.contains('230650_XGB_complex_IOA')]['relative_profit'].astype(float).mean()
    df_risk_complex_small.loc[i, 'xgb_soap'] = df_risk_complex[(df_risk_complex['risk_level']==1+i*0.5)& df_risk_complex['file_name'].str.contains('230650_XGB_complex_SOAp')]['relative_profit'].astype(float).mean()
    df_risk_complex_small.loc[i, 'xgb_soanp'] = df_risk_complex[(df_risk_complex['risk_level']==1+i*0.5)& df_risk_complex['file_name'].str.contains('230650_XGB_complex_SOAnp')]['relative_profit'].astype(float).mean()

df_risk_complex_small['quantile'] = [np.mean(quantile_risk_multi_1), np.mean(quantile_risk_multi_15), np.mean(quantile_risk_multi_2), np.mean(quantile_risk_multi_25), np.mean(quantile_risk_multi_3), np.mean(quantile_risk_multi_35), np.mean(quantile_risk_multi_4)]

print(df_risk_complex_small)

    ann_ioa  ann_soap  ann_soanp   xgb_ioa  xgb_soap  xgb_soanp  quantile
0  0.966058  0.940309   0.940512  0.965459  0.952436   0.952436  0.889113
1  0.774874  0.970119   0.972885  0.951972  0.942752   0.942737  0.822581
2  0.932999  0.973020   0.971136  0.942586  0.918793   0.918662  0.756048
3  0.974590  0.967556   0.972391  0.929119  0.949937   0.949937  0.689516
4  0.913410  0.971795   0.971836  0.927620  0.923976   0.923896  0.622984
5  0.035603  0.975399   0.973069  0.920233  0.932976   0.932979  0.556451
6  0.833771  0.977039   0.974999  0.923888  0.911621   0.911656  0.489919


### complex - high var

In [35]:
df_risk_complex_var200 = pd.DataFrame(0.0, index=range(7), columns=['ann_ioa', 'ann_soap', 'ann_soanp', 'xgb_ioa', 'xgb_soap', 'xgb_soanp'])

for i in range(7):
    df_risk_complex_var200.loc[i, 'ann_ioa'] = df_risk_complex[(df_risk_complex['risk_level']==1+i*0.5)& df_risk_complex['file_name'].str.contains('4306200_ANN_complex_IOA')]['relative_profit'].astype(float).mean()
    df_risk_complex_var200.loc[i, 'ann_soap'] = df_risk_complex[(df_risk_complex['risk_level']==1+i*0.5)& df_risk_complex['file_name'].str.contains('4306200_ANN_complex_SOAp')]['relative_profit'].astype(float).mean()
    df_risk_complex_var200.loc[i, 'ann_soanp'] = df_risk_complex[(df_risk_complex['risk_level']==1+i*0.5)& df_risk_complex['file_name'].str.contains('4306200_ANN_complex_SOAnp')]['relative_profit'].astype(float).mean()
    df_risk_complex_var200.loc[i, 'xgb_ioa'] = df_risk_complex[(df_risk_complex['risk_level']==1+i*0.5)& df_risk_complex['file_name'].str.contains('4306200_XGB_complex_IOA')]['relative_profit'].astype(float).mean()
    df_risk_complex_var200.loc[i, 'xgb_soap'] = df_risk_complex[(df_risk_complex['risk_level']==1+i*0.5)& df_risk_complex['file_name'].str.contains('4306200_XGB_complex_SOAp')]['relative_profit'].astype(float).mean()
    df_risk_complex_var200.loc[i, 'xgb_soanp'] = df_risk_complex[(df_risk_complex['risk_level']==1+i*0.5)& df_risk_complex['file_name'].str.contains('4306200_XGB_complex_SOAnp')]['relative_profit'].astype(float).mean()

df_risk_complex_var200['quantile'] = [np.mean(quantile_risk_multi_1), np.mean(quantile_risk_multi_15), np.mean(quantile_risk_multi_2), np.mean(quantile_risk_multi_25), np.mean(quantile_risk_multi_3), np.mean(quantile_risk_multi_35), np.mean(quantile_risk_multi_4)]
                                      
print(df_risk_complex_var200)

        ann_ioa  ann_soap  ann_soanp   xgb_ioa  xgb_soap  xgb_soanp  quantile
0  9.631389e-01  0.918027   0.917174  0.948638  0.890065   0.889917  0.889113
1 -4.209345e+08       NaN        NaN  0.924714       NaN        NaN  0.822581
2  9.323570e-01       NaN        NaN  0.901086       NaN        NaN  0.756048
3  7.012355e-01       NaN        NaN  0.882121       NaN        NaN  0.689516
4  5.698256e-01       NaN        NaN  0.868886       NaN        NaN  0.622984
5  7.296298e-01       NaN        NaN  0.853520       NaN        NaN  0.556451
6  8.602391e-01       NaN        NaN  0.842554       NaN        NaN  0.489919
