In [23]:
from portfolio_optimizer import PortfolioOptimizer, calc_sharpe_ratio, calc_calmar_ratio
import pandas as pd
from portfolio_optimizer.stock_choice import MLChoice
from portfolio_optimizer.weight_allocators import SimpleAllocator
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import plotly.graph_objects as go
pd.options.mode.chained_assignment = None

## Machine learning model performance comparison

For 50 assets over 5 year testing period, and 7 jear training period

In [24]:
results_df = pd.read_csv("experiments/results/final_results.csv")

In [25]:
relevant_results = ([
    key for key in results_df.columns 
    if ("60" in key and 
        "xgboost" in key
        and "expanding" not in key 
        and "rolling" not in key)
        or key == "timestamp"
        or key == "benchmark"
    ]
)

rel_results_df = results_df[relevant_results]

In [26]:
rel_results_df.columns

Index(['timestamp', 'benchmark',
       'returns_assets10_MLChoice-xgboost-60-fixed_SimpleAllocator',
       'correct_assets10_MLChoice-xgboost-60-fixed_SimpleAllocator',
       'returns_assets10_MLChoice-xgboost-60-fixed_HRP-24',
       'correct_assets10_MLChoice-xgboost-60-fixed_HRP-24',
       'returns_assets10_MLChoice-xgboost-60-fixed_HRP-36',
       'correct_assets10_MLChoice-xgboost-60-fixed_HRP-36',
       'returns_assets20_MLChoice-xgboost-60-fixed_SimpleAllocator',
       'correct_assets20_MLChoice-xgboost-60-fixed_SimpleAllocator',
       'returns_assets20_MLChoice-xgboost-60-fixed_HRP-24',
       'correct_assets20_MLChoice-xgboost-60-fixed_HRP-24',
       'returns_assets20_MLChoice-xgboost-60-fixed_HRP-36',
       'correct_assets20_MLChoice-xgboost-60-fixed_HRP-36',
       'returns_assets50_MLChoice-xgboost-60-fixed_SimpleAllocator',
       'correct_assets50_MLChoice-xgboost-60-fixed_SimpleAllocator',
       'returns_assets50_MLChoice-xgboost-60-fixed_HRP-24',
       'corre

In [27]:
for key in rel_results_df.columns:
    if key == "timestamp" or "correct" in key:
        continue
    else:
        rel_results_df.loc[:, f'cumulative_{key}'] = ((1 + rel_results_df.loc[:, key]).cumprod()) - 1
        # print(investment, np.std(results[key]), np.mean(results[key]))

In [28]:
df = rel_results_df.copy(deep=True)

In [29]:
# Assuming your DataFrame is called 'df'

# Rename the columns for better readability in the legend
df = df.rename(columns={
    'cumulative_benchmark': 'S&P500',
    'cumulative_returns_assets10_MLChoice-xgboost-60-fixed_SimpleAllocator': '10 Dionica',
    'cumulative_returns_assets20_MLChoice-xgboost-60-fixed_SimpleAllocator': '20 Dionica',
    'cumulative_returns_assets50_MLChoice-xgboost-60-fixed_SimpleAllocator': '50 Dionica',
    'cumulative_returns_assets100_MLChoice-xgboost-60-fixed_SimpleAllocator': '100 Dionica'
})[["timestamp", "S&P500", "10 Dionica", "20 Dionica", "50 Dionica", "100 Dionica"]]

# Create the line chart
fig = go.Figure()

# Add each column to the chart
for column in df.columns[1:]:
    fig.add_trace(go.Scatter(
        x=df['timestamp'],
        y=df[column],
        mode='lines',
        name=column
    ))

# Define an eye-pleasing color scheme
color_scheme = [
    '#1f77b4',  # blue
    '#ff7f0e',  # orange
    '#2ca02c',  # green
    '#d62728',  # red
    '#9467bd',   # purple
    '#e5b800'   # yellow
]

# Assign colors to each line
for i, trace in enumerate(fig.data):
    trace.line.color = color_scheme[i]

# Set the title and axis labels
fig.update_layout(
    title='Kumulativni povrat različitih strategija kroz testni period',
    title_x=0.5,
    xaxis_title='Vremenska oznaka',
    yaxis_title='Kumulativni povrat'
)

# Show the legend with the formatted names
fig.update_layout(
    legend=dict(
        title='Veličina portfelja:',
        orientation='h',
        yanchor='bottom',
        y=1.02,
        xanchor='right',
        x=1
    )
)

fig.update_yaxes(tickformat='.0%')

# Display the chart
fig.show()


In [30]:
returns_df = rel_results_df.copy(deep=True)

returns_df = returns_df.rename(columns={
    'benchmark': 'S&P500',
    'returns_assets10_MLChoice-xgboost-60-fixed_SimpleAllocator': '10 Dionica',
    'returns_assets20_MLChoice-xgboost-60-fixed_SimpleAllocator': '20 Dionica',
    'returns_assets50_MLChoice-xgboost-60-fixed_SimpleAllocator': '50 Dionica',
    'returns_assets100_MLChoice-xgboost-60-fixed_SimpleAllocator': '100 Dionica'
})[["S&P500", "10 Dionica", "20 Dionica", "50 Dionica", "100 Dionica"]]

In [31]:
metrics_df = pd.DataFrame()

for column in returns_df.columns:
    returns = returns_df[column]
    excess_returns = (((1 + returns).cumprod()).iloc[-1] * 100) - 100
    avg_returns = returns.mean()
    std_returns = returns.std()
    sharpe_ratio = calc_sharpe_ratio(returns)
    calmar_ratio = calc_calmar_ratio(returns)

    metrics_df[column] = [excess_returns, avg_returns, std_returns, sharpe_ratio, calmar_ratio]

metrics_df.index = ['Cumulative Returns', 'Average Returns', 'Standard Deviation', 'Sharpe Ratio', 'Calmar Ratio']
print("Metrics calculated monthly for the 5 year testing period")
metrics_df.style.highlight_max(color = 'darkblue', axis = 1)

Metrics calculated monthly for the 5 year testing period


Unnamed: 0,S&P500,10 Dionica,20 Dionica,50 Dionica,100 Dionica
Cumulative Returns,71.350133,419.064531,314.711433,155.463193,134.250215
Average Returns,0.010443,0.033863,0.029524,0.019572,0.017121
Standard Deviation,0.053662,0.11338,0.105912,0.087481,0.075884
Sharpe Ratio,-0.178093,0.122271,0.089925,-0.004897,-0.037936
Calmar Ratio,0.043645,0.101172,0.081413,0.056497,0.059503


In [32]:
returns_df = rel_results_df.copy(deep=True)

returns_df = returns_df.rename(columns={
    'benchmark': 'S&P500',
    'returns_assets10_MLChoice-xgboost-60-fixed_SimpleAllocator': '10 Dionica',
    'returns_assets20_MLChoice-xgboost-60-fixed_SimpleAllocator': '20 Dionica',
    'returns_assets50_MLChoice-xgboost-60-fixed_SimpleAllocator': '50 Dionica',
    'returns_assets100_MLChoice-xgboost-60-fixed_SimpleAllocator': '100 Dionica'
})[["timestamp", "S&P500", "10 Dionica", "20 Dionica", "50 Dionica", "100 Dionica"]]

In [33]:
sharpe_df = pd.DataFrame()
calmar_df = pd.DataFrame()
cumret_df = pd.DataFrame()
std_df = pd.DataFrame()
    
for column in returns_df.columns[1:]:
    sharpe_ratio = []
    calmar_ratio = []
    cumret = []
    std = []
    timestamp = []
    for i in range(len(returns_df)//12):
        timestamp.append(returns_df['timestamp'][i*12:(i+1)*12].iloc[-1])
        returns = returns_df[column][i*12:(i+1)*12]
        cumret.append(((1 + returns).cumprod()).iloc[-1] - 1)
        std.append(returns.std())
        sharpe_ratio.append(calc_sharpe_ratio(returns))
        calmar_ratio.append(calc_calmar_ratio(returns))

    sharpe_df[column] = sharpe_ratio
    calmar_df[column] = calmar_ratio
    cumret_df[column] = cumret
    std_df[column] = std

sharpe_df['timestamp'] = timestamp
calmar_df['timestamp'] = timestamp
cumret_df['timestamp'] = timestamp
std_df['timestamp'] = timestamp

In [34]:
from plotly.subplots import make_subplots

# Assuming you have the 'sharpe_df' and 'calmar_df' dataframes

# Define the color scheme
color_scheme = [
    '#1f77b4',  # blue
    '#ff7f0e',  # orange
    '#2ca02c',  # green
    '#d62728',  # red
    '#9467bd',  # purple
    '#e5b800'   # yellow
]

# Create subplots with two charts side by side
fig = make_subplots(rows=2, cols=2)

# Add lines to the first subplot
for i, column in enumerate(sharpe_df.columns[:-1]):
    fig.add_trace(go.Scatter(
        x=sharpe_df['timestamp'],
        y=sharpe_df[column],
        mode='lines',
        name=column,
        line=dict(color=color_scheme[i]),
        showlegend=True
    ), row=1, col=1)

# Add lines to the second subplot
for i, column in enumerate(calmar_df.columns[:-1]):
    fig.add_trace(go.Scatter(
        x=calmar_df['timestamp'],
        y=calmar_df[column],
        mode='lines',
        name=column,
        line=dict(color=color_scheme[i]),
        showlegend=False
    ), row=1, col=2)

# Add lines to the third subplot
for i, column in enumerate(sharpe_df.columns[:-1]):
    fig.add_trace(go.Scatter(
        x=cumret_df['timestamp'],
        y=cumret_df[column],
        mode='lines',
        name=column,
        line=dict(color=color_scheme[i]),
        showlegend=False
    ), row=2, col=1)

# Add lines to the fourth subplot
for i, column in enumerate(calmar_df.columns[:-1]):
    fig.add_trace(go.Scatter(
        x=std_df['timestamp'],
        y=std_df[column],
        mode='lines',
        name=column,
        line=dict(color=color_scheme[i]),
        showlegend=False
    ), row=2, col=2)

# Update layout and axis labels
fig.update_layout(height=800, width=1200)
fig.update_xaxes(title_text='Vremeska oznaka', row=1, col=1)
fig.update_xaxes(title_text='Vremenska oznaka', row=1, col=2)
fig.update_xaxes(title_text='Vremenska oznaka', row=2, col=1)
fig.update_xaxes(title_text='Vremenska oznaka', row=2, col=2)
fig.update_yaxes(title_text='Sharpe-ov omjer', row=1, col=1)
fig.update_yaxes(title_text='Calmar-ov omjer', row=1, col=2)
fig.update_yaxes(title_text='Kumulativni povrat', row=2, col=1, tickformat='.0%')
fig.update_yaxes(title_text='Standardna devijacija', row=2, col=2)

# Show the legend with the formatted names
fig.update_layout(
    legend=dict(
        title='Strategije:',
        orientation='h',
        yanchor='bottom',
        y=1.02,
        xanchor='right',
        x=1
    )
)

# Display the chart
fig.show()

In [35]:
correct_df = rel_results_df.copy(deep=True)

correct_df = correct_df.rename(columns={
    'correct_assets10_MLChoice-xgboost-60-fixed_SimpleAllocator': '10 Dionica',
    'correct_assets20_MLChoice-xgboost-60-fixed_SimpleAllocator': '20 Dionica',
    'correct_assets50_MLChoice-xgboost-60-fixed_SimpleAllocator': '50 Dionica',
    'correct_assets100_MLChoice-xgboost-60-fixed_SimpleAllocator': '100 Dionica'
})[["10 Dionica", "20 Dionica", "50 Dionica", "100 Dionica"]]

In [36]:
correct_metric = pd.DataFrame()
for column in correct_df.columns:
    avg = correct_df[column].mean()
    med = correct_df[column].median()
    min_val = correct_df[column].min()
    max_val = correct_df[column].max()
    correct_metric[column] = [avg, med, min_val, max_val]

correct_metric.index = ["Average", "Median", "Minimum", "Maximum"]
correct_metric.head()

Unnamed: 0,10 Dionica,20 Dionica,50 Dionica,100 Dionica
Average,1.183333,2.766667,8.516667,24.783333
Median,1.0,3.0,8.0,25.0
Minimum,0.0,0.0,2.0,10.0
Maximum,4.0,8.0,18.0,46.0
