In [1]:
import os.path
import re
from itertools import chain
from pathlib import Path
import pandas as pd
from matplotlib import pyplot as plt
from pandas import DataFrame


In [28]:
"""Load files into memory"""

# Baseline results
TEST_RESULT_FOLDER = Path("C:\\Users\\tizia\\PycharmProjects\\DDQN_Trading_MSC\\model_archive\\TestResults\\DensityTests")
GRAPH_OUTPUT = Path("./graphs")
name = "db_density_check"
title = "Deutsche Bank"
OPTIMIZATION_TESTS = TEST_RESULT_FOLDER.joinpath(name)
METRICS_RELATIVE = "final\\validation\\metrics.csv"



required_columns = ['model_name', 'mode', 'return', 'total_volatility']

result_metrics = []
ref_bh = None
if os.path.exists(OPTIMIZATION_TESTS):
    test_directories = os.listdir(OPTIMIZATION_TESTS)
    print(f"Found {len(test_directories)} test folders:")
    print(test_directories)
    for i, test in enumerate(test_directories):
        test_path = OPTIMIZATION_TESTS.joinpath(test).joinpath(METRICS_RELATIVE)
        sample_df = pd.read_csv(test_path)
        print(sample_df)
        if i == 0:
            ref_bh = sample_df.head(1)
        sample_df = sample_df.tail(2)
        sample_df['mode'] = sample_df['evaluation_mode'].apply(lambda x: "Eval" if x == True else "Train")
        sample_df = sample_df[required_columns]
        result_metrics.append(sample_df)

else:
    print(f"Folder: {OPTIMIZATION_TESTS} was not found. Nothing was loaded.")

chunk_size = 3
chunked_metrics = []
for i in range(0, len(result_metrics), chunk_size):
    chunk = result_metrics[i:i + chunk_size]
    chunked_metrics.append(chunk)

for i, x in enumerate([1, 3, 5, 10, 20, 30]):
    for j in range(chunk_size):
        chunked_metrics[i][j]['density'] = x

metrics = pd.concat(list(chain(*chunked_metrics)))
metrics

Found 18 test folders:
['1695788469', '1695792000', '1695795366', '1695798535', '1695799611', '1695800684', '1695801747', '1695802369', '1695802982', '1695803597', '1695803907', '1695804222', '1695804529', '1695804665', '1695804798', '1695804933', '1695805024', '1695805114']
   Unnamed: 0                     model_name evaluation_mode          stock   
0           0                   (*) Buy&Hold             NaN  Deutsche Bank  \
1           1  best_1473_for-seed_1234567890           False  Deutsche Bank   
2           2  best_1473_for-seed_1234567890            True  Deutsche Bank   

       V_i      V_f   profit    return  amean_return  gmean_return  ...   
0  10000.0  9875.96  -124.04 -0.012404      0.000364     -0.000011  ...  \
1  10000.0  2285.08 -7714.92 -0.771492     -0.001176     -0.001357  ...   
2  10000.0  9105.93  -894.07 -0.089407     -0.000066     -0.000086  ...   

   log_return  log_rate_of_return  daily_volatility  total_volatility   
0   -0.012482           -0.000011

Unnamed: 0,model_name,mode,return,total_volatility,density
1,best_1473_for-seed_1234567890,Train,-0.771492,0.62864,1
2,best_1473_for-seed_1234567890,Eval,-0.089407,0.211796,1
1,best_1337_for-seed_1234567891,Train,-0.720894,0.5594,1
2,best_1337_for-seed_1234567891,Eval,-0.158202,0.413216,1
1,best_1439_for-seed_1234567892,Train,-0.725462,0.535556,1
2,best_1439_for-seed_1234567892,Eval,-0.056857,0.261859,1
1,best_1150_for-seed_1234567890,Train,-0.442858,0.654421,3
2,best_1150_for-seed_1234567890,Eval,-0.103985,0.869956,3
1,best_1259_for-seed_1234567891,Train,-0.399058,0.7145,3
2,best_1259_for-seed_1234567891,Eval,-0.069382,0.850138,3


In [29]:
"""Make box plots and lines"""
#metrics = metrics[metrics['mode'] == 'Train']


fig, ax = plt.subplots(figsize=(12, 5))
#metrics.sort_values(by=['op_space', 'window_size'])
metrics.boxplot(column=['return'], by=['mode', 'density'], ax=ax)
ax.axhline(y=ref_bh['return'][0], color='red', linestyle='dashed', label='Buy&Hold Return', zorder=-1)
plt.legend()
plt.suptitle('')
plt.title(f'Return comparison ({title})', fontsize=18)
plt.tick_params(axis='both', which='major', labelsize=12)
plt.xticks(rotation=45)
plt.xlabel('Tuple of (Mode, Density)', fontsize=14)
plt.ylabel('Return factor', fontsize=14)
plt.tight_layout()
#plt.show()
plt.savefig(GRAPH_OUTPUT.joinpath(f"{name}_density_return_comp.png"))
plt.close()

fig, ax = plt.subplots(figsize=(12, 5))
metrics.boxplot(column=['total_volatility'], by=['mode', 'density'], ax=ax)
ax.axhline(y=ref_bh['total_volatility'][0], color='red', linestyle='dashed', label='Buy&Hold Volatility')
plt.legend()
plt.suptitle('')
plt.title(f'Volatility comparison ({title})', fontsize=18)
plt.tick_params(axis='both', which='major', labelsize=12)
plt.xticks(rotation=45)
plt.xlabel('Tuple of (Mode, Density)', fontsize=14)
plt.ylabel('Total Volatility', fontsize=14)
plt.tight_layout()
#plt.show()
plt.savefig(GRAPH_OUTPUT.joinpath(f"{name}_density_vol_comp.png"))
plt.close()
