In [1]:
from portfolio_optimizer import PortfolioOptimizer, calc_sharpe_ratio, calc_sortino_ratio
import pandas as pd
from portfolio_optimizer.stock_choice import MLChoice
from portfolio_optimizer.weight_allocators import SimpleAllocator
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import plotly.graph_objects as go
pd.options.mode.chained_assignment = None

## Machine learning model performance comparison

For 50 assets over 5 year testing period, and 7 jear training period

In [2]:
results_df = pd.read_csv("experiments/results/final_results.csv")

In [3]:
relevant_results = ([
    key for key in results_df.columns 
    if ("50" in key 
        and "60" in key
        and "MLChoice" in key
        and "expanding" not in key 
        and "rolling" not in key)
        or key == "timestamp"
        or key == "benchmark"
    ]
)

rel_results_df = results_df[relevant_results]

In [4]:
for key in rel_results_df.columns:
    if key == "timestamp" or "correct" in key or "alloc" in key:
        continue
    else:
        rel_results_df.loc[:, f'cumulative_{key}'] = ((1 + rel_results_df.loc[:, key]).cumprod()) - 1
        # print(investment, np.std(results[key]), np.mean(results[key]))

In [5]:
df = rel_results_df.copy(deep=True)

In [6]:
# Assuming your DataFrame is called 'df'

# Rename the columns for better readability in the legend
df = df.rename(columns={
    'cumulative_benchmark': 'S&P500',
    'cumulative_returns_assets50_MLChoice-xgboost-60-fixed_SimpleAllocator': 'XGB-1/N',
    'cumulative_returns_assets50_MLChoice-random_forest-60-fixed_SimpleAllocator': 'RF-1/N',
    'cumulative_returns_assets50_MLChoice-xgboost-60-fixed_HRP-36': 'XGB-HRP-36',
    'cumulative_returns_assets50_MLChoice-random_forest-60-fixed_HRP-36': 'RF-HRP-36'
})[["timestamp", "S&P500", "XGB-1/N", "RF-1/N", "XGB-HRP-36", "RF-HRP-36"]]

# Create the line chart
fig = go.Figure()

# Add each column to the chart
for column in df.columns[1:]:
    fig.add_trace(go.Scatter(
        x=df['timestamp'],
        y=df[column],
        mode='lines',
        name=column
    ))

# Define an eye-pleasing color scheme
color_scheme = [
    '#1f77b4',  # blue
    '#ff7f0e',  # orange
    '#2ca02c',  # green
    '#d62728',  # red
    '#9467bd',   # purple
    '#e5b800'   # yellow
]

# Assign colors to each line
for i, trace in enumerate(fig.data):
    trace.line.color = color_scheme[i]

# Set the title and axis labels
fig.update_layout(
    #title='Kumulativni povrat različitih strategija kroz testni period',
    #title_x=0.5,
    xaxis_title='Vremenska oznaka',
    yaxis_title='Kumulativni povrat'
)

# Show the legend with the formatted names
fig.update_layout(
    legend=dict(
        title='Strategije:',
        orientation='h',
        yanchor='bottom',
        y=1.02,
        xanchor='right',
        x=1
    )
)

fig.update_yaxes(tickformat='.0%')

# Display the chart
fig.show()


In [7]:
returns_df = rel_results_df.copy(deep=True)

returns_df = returns_df.rename(columns={
    'benchmark': 'S&P500',
    'returns_assets50_MLChoice-xgboost-60-fixed_SimpleAllocator': 'XGB-1/N',
    'returns_assets50_MLChoice-random_forest-60-fixed_SimpleAllocator': 'RF-1/N',
    'returns_assets50_MLChoice-xgboost-60-fixed_HRP-36': 'XGB-HRP-36',
    'returns_assets50_MLChoice-random_forest-60-fixed_HRP-36': 'RF-HRP-36'
})[["S&P500", "XGB-1/N", "RF-1/N", "XGB-HRP-36", "RF-HRP-36"]]

In [8]:
metrics_df = pd.DataFrame()

for column in returns_df.columns:
    returns = returns_df[column]
    excess_returns = (((1 + returns).cumprod()).iloc[-1] * 100) - 100
    avg_returns = returns.mean()
    std_returns = returns.std()
    sharpe_ratio = calc_sharpe_ratio(returns)
    calmar_ratio = calc_sortino_ratio(returns)

    metrics_df[column] = [excess_returns, avg_returns, std_returns, sharpe_ratio, calmar_ratio]

metrics_df.index = ['Cumulative Returns', 'Average Returns', 'Standard Deviation', 'Sharpe Ratio', 'Sortino Ratio']
print("Metrics calculated monthly for the 5 year testing period")
metrics_df.style.highlight_max(color = 'darkblue', axis = 1)

Metrics calculated monthly for the 5 year testing period


Unnamed: 0,S&P500,XGB-1/N,RF-1/N,XGB-HRP-36,RF-HRP-36
Cumulative Returns,71.350133,155.463193,122.708347,121.573447,87.068681
Average Returns,0.010443,0.019572,0.016783,0.016203,0.012947
Standard Deviation,0.053662,0.087481,0.080184,0.075097,0.068516
Sharpe Ratio,1.266889,1.585388,1.460266,1.499311,1.275237
Sortino Ratio,1.875442,2.56225,2.164907,2.272458,1.807045


In [9]:
for col in metrics_df.columns:
    print(f"{col} & {metrics_df[col].iloc[0]:.2f} & {metrics_df[col].iloc[1]:.4f} & {metrics_df[col].iloc[2]:.4f} & {metrics_df[col].iloc[3]:.4f} & {metrics_df[col].iloc[4]:.4f}\\\\")
    print("\hline")

S&P500 & 71.35 & 0.0104 & 0.0537 & 1.2669 & 1.8754\\
\hline
XGB-1/N & 155.46 & 0.0196 & 0.0875 & 1.5854 & 2.5623\\
\hline
RF-1/N & 122.71 & 0.0168 & 0.0802 & 1.4603 & 2.1649\\
\hline
XGB-HRP-36 & 121.57 & 0.0162 & 0.0751 & 1.4993 & 2.2725\\
\hline
RF-HRP-36 & 87.07 & 0.0129 & 0.0685 & 1.2752 & 1.8070\\
\hline


In [10]:
returns_df = rel_results_df.copy(deep=True)

returns_df = returns_df.rename(columns={
    'benchmark': 'S&P500',
    'returns_assets50_MLChoice-xgboost-60-fixed_SimpleAllocator': 'XGB-1/N',
    'returns_assets50_MLChoice-random_forest-60-fixed_SimpleAllocator': 'RF-1/N',
    'returns_assets50_MLChoice-xgboost-60-fixed_HRP-36': 'XGB-HRP-36',
    'returns_assets50_MLChoice-random_forest-60-fixed_HRP-36': 'RF-HRP-36'
})[["timestamp", "S&P500", "XGB-1/N", "RF-1/N", "XGB-HRP-36", "RF-HRP-36"]]

In [11]:
sharpe_df = pd.DataFrame()
calmar_df = pd.DataFrame()
cumret_df = pd.DataFrame()
std_df = pd.DataFrame()
    
for column in returns_df.columns[1:]:
    sharpe_ratio = []
    calmar_ratio = []
    cumret = []
    std = []
    timestamp = []
    for i in range(len(returns_df)//12):
        timestamp.append(returns_df['timestamp'][i*12:(i+1)*12].iloc[-1])
        returns = returns_df[column][i*12:(i+1)*12]
        cumret.append(((1 + returns).cumprod()).iloc[-1] - 1)
        std.append(returns.std())
        sharpe_ratio.append(calc_sharpe_ratio(returns))
        calmar_ratio.append(calc_sortino_ratio(returns))

    sharpe_df[column] = sharpe_ratio
    calmar_df[column] = calmar_ratio
    cumret_df[column] = cumret
    std_df[column] = std

sharpe_df['timestamp'] = timestamp
calmar_df['timestamp'] = timestamp
cumret_df['timestamp'] = timestamp
std_df['timestamp'] = timestamp

In [12]:
from plotly.subplots import make_subplots

# Assuming you have the 'sharpe_df' and 'calmar_df' dataframes

# Define the color scheme
color_scheme = [
    '#1f77b4',  # blue
    '#ff7f0e',  # orange
    '#2ca02c',  # green
    '#d62728',  # red
    '#9467bd',  # purple
    '#e5b800'   # yellow
]

# Create subplots with two charts side by side
fig = make_subplots(rows=2, cols=2)

# Add lines to the first subplot
for i, column in enumerate(sharpe_df.columns[:-1]):
    fig.add_trace(go.Scatter(
        x=sharpe_df['timestamp'],
        y=sharpe_df[column],
        mode='lines',
        name=column,
        line=dict(color=color_scheme[i]),
        showlegend=True
    ), row=1, col=1)

# Add lines to the second subplot
for i, column in enumerate(calmar_df.columns[:-1]):
    fig.add_trace(go.Scatter(
        x=calmar_df['timestamp'],
        y=calmar_df[column],
        mode='lines',
        name=column,
        line=dict(color=color_scheme[i]),
        showlegend=False
    ), row=1, col=2)

# Add lines to the third subplot
for i, column in enumerate(sharpe_df.columns[:-1]):
    fig.add_trace(go.Scatter(
        x=cumret_df['timestamp'],
        y=cumret_df[column],
        mode='lines',
        name=column,
        line=dict(color=color_scheme[i]),
        showlegend=False
    ), row=2, col=1)

# Add lines to the fourth subplot
for i, column in enumerate(calmar_df.columns[:-1]):
    fig.add_trace(go.Scatter(
        x=std_df['timestamp'],
        y=std_df[column],
        mode='lines',
        name=column,
        line=dict(color=color_scheme[i]),
        showlegend=False
    ), row=2, col=2)

# Update layout and axis labels
fig.update_layout(height=800, width=1200)
fig.update_xaxes(title_text='Vremeska oznaka', row=1, col=1)
fig.update_xaxes(title_text='Vremenska oznaka', row=1, col=2)
fig.update_xaxes(title_text='Vremenska oznaka', row=2, col=1)
fig.update_xaxes(title_text='Vremenska oznaka', row=2, col=2)
fig.update_yaxes(title_text='Sharpe-ov omjer', row=1, col=1)
fig.update_yaxes(title_text='Sortino-ov omjer', row=1, col=2)
fig.update_yaxes(title_text='Kumulativni povrat', row=2, col=1, tickformat='.0%')
fig.update_yaxes(title_text='Standardna devijacija', row=2, col=2)

# Show the legend with the formatted names
fig.update_layout(
    legend=dict(
        title='Strategije:',
        orientation='h',
        yanchor='bottom',
        y=1.02,
        xanchor='right',
        x=1
    )
)

# Display the chart
fig.show()