In [1]:
from portfolio_optimizer import PortfolioOptimizer, calc_sharpe_ratio, calc_sortino_ratio
import pandas as pd
from portfolio_optimizer.stock_choice import MLChoice
from portfolio_optimizer.weight_allocators import SimpleAllocator
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import plotly.graph_objects as go
pd.options.mode.chained_assignment = None

## Machine learning model performance comparison

For 50 assets over 5 year testing period, and 7 jear training period

In [2]:
results_df = pd.read_csv("experiments/results/final_results.csv")

In [3]:
relevant_results = ([
    key for key in results_df.columns 
    if ("50" in key 
        and "SimpleAllocator" in key 
        and ("60" in key or "MLChoice" not in key)
        and ("6" in key or ("SMA" not in key and "EMA" not in key))
        and "expanding" not in key 
        and "rolling" not in key)
        or key == "timestamp"
        or key == "benchmark"
    ]
)

rel_results_df = results_df[relevant_results]

In [4]:
for key in rel_results_df.columns:
    if key == "timestamp" or "correct" in key or "alloc" in key:
        continue
    else:
        rel_results_df.loc[:, f'cumulative_{key}'] = ((1 + rel_results_df.loc[:, key]).cumprod()) - 1
        # print(investment, np.std(results[key]), np.mean(results[key]))

In [5]:
df = rel_results_df.copy(deep=True)

In [6]:
# Assuming your DataFrame is called 'df'

# Rename the columns for better readability in the legend
df = df.rename(columns={
    'cumulative_benchmark': 'S&P500',
    'cumulative_returns_assets50_NaiveChoice_SimpleAllocator': 'Naive',
    'cumulative_returns_assets50_SMAChoice-6_SimpleAllocator': 'SMA',
    'cumulative_returns_assets50_EMAChoice-6_SimpleAllocator': 'EMA',
    'cumulative_returns_assets50_MLChoice-xgboost-60-fixed_SimpleAllocator': 'XGB',
    'cumulative_returns_assets50_MLChoice-random_forest-60-fixed_SimpleAllocator': 'RF'
})[["timestamp", "S&P500", "Naive", "SMA", "EMA", "XGB", "RF"]]

# Create the line chart
fig = go.Figure()

# Add each column to the chart
for column in df.columns[1:]:
    fig.add_trace(go.Scatter(
        x=df['timestamp'],
        y=df[column],
        mode='lines',
        name=column
    ))

# Define an eye-pleasing color scheme
color_scheme = [
    '#1f77b4',  # blue
    '#ff7f0e',  # orange
    '#2ca02c',  # green
    '#d62728',  # red
    '#9467bd',   # purple
    '#e5b800'   # yellow
]

# Assign colors to each line
for i, trace in enumerate(fig.data):
    trace.line.color = color_scheme[i]

# Set the title and axis labels
fig.update_layout(
    #title='Kumulativni povrat različitih strategija kroz testni period',
    #title_x=0.5,
    xaxis_title='Vremenska oznaka',
    yaxis_title='Kumulativni povrat'
)

# Show the legend with the formatted names
fig.update_layout(
    legend=dict(
        title='Strategije:',
        orientation='h',
        yanchor='bottom',
        y=1.02,
        xanchor='right',
        x=1
    )
)

fig.update_yaxes(tickformat='.0%')

# Display the chart
fig.show()


In [7]:
returns_df = rel_results_df.copy(deep=True)

returns_df = returns_df.rename(columns={
    'benchmark': 'S&P500',
    'returns_assets50_NaiveChoice_SimpleAllocator': 'Naive',
    'returns_assets50_SMAChoice-6_SimpleAllocator': 'SMA',
    'returns_assets50_EMAChoice-6_SimpleAllocator': 'EMA',
    'returns_assets50_MLChoice-xgboost-60-fixed_SimpleAllocator': 'XGB',
    'returns_assets50_MLChoice-random_forest-60-fixed_SimpleAllocator': 'RF'
})[["S&P500", "Naive", "SMA", "EMA", "XGB", "RF"]]

In [8]:
metrics_df = pd.DataFrame()

for column in returns_df.columns:
    returns = returns_df[column]
    excess_returns = (((1 + returns).cumprod()).iloc[-1] * 100) - 100
    avg_returns = returns.mean()
    std_returns = returns.std()
    sharpe_ratio = calc_sharpe_ratio(returns)
    calmar_ratio = calc_sortino_ratio(returns)

    metrics_df[column] = [excess_returns, avg_returns, std_returns, sharpe_ratio, calmar_ratio]

metrics_df.index = ['Cumulative Returns', 'Average Returns', 'Standard Deviation', 'Sharpe Ratio', 'Sortino Ratio']
print("Metrics calculated monthly for the 5 year testing period")
metrics_df.style.highlight_max(color = 'darkblue', axis = 1)

Metrics calculated monthly for the 5 year testing period


Unnamed: 0,S&P500,Naive,SMA,EMA,XGB,RF
Cumulative Returns,71.350133,77.713392,117.563011,115.07621,155.463193,122.708347
Average Returns,0.010443,0.011277,0.014653,0.014465,0.019572,0.016783
Standard Deviation,0.053662,0.057653,0.057649,0.057487,0.087481,0.080184
Sharpe Ratio,1.266889,1.291255,1.744854,1.724533,1.585388,1.460266
Sortino Ratio,1.875442,1.930647,2.96082,2.840193,2.56225,2.164907


In [9]:
for col in metrics_df.columns:
    print(f"{col} & {metrics_df[col].iloc[0]:.2f} & {metrics_df[col].iloc[1]:.4f} & {metrics_df[col].iloc[2]:.4f} & {metrics_df[col].iloc[3]:.4f} & {metrics_df[col].iloc[4]:.4f}\\\\")
    print("\hline")

S&P500 & 71.35 & 0.0104 & 0.0537 & 1.2669 & 1.8754\\
\hline
Naive & 77.71 & 0.0113 & 0.0577 & 1.2913 & 1.9306\\
\hline
SMA & 117.56 & 0.0147 & 0.0576 & 1.7449 & 2.9608\\
\hline
EMA & 115.08 & 0.0145 & 0.0575 & 1.7245 & 2.8402\\
\hline
XGB & 155.46 & 0.0196 & 0.0875 & 1.5854 & 2.5623\\
\hline
RF & 122.71 & 0.0168 & 0.0802 & 1.4603 & 2.1649\\
\hline


In [10]:
returns_df = rel_results_df.copy(deep=True)

returns_df = returns_df.rename(columns={
    'benchmark': 'S&P500',
    'returns_assets50_NaiveChoice_SimpleAllocator': 'Naive',
    'returns_assets50_SMAChoice-6_SimpleAllocator': 'SMA',
    'returns_assets50_EMAChoice-6_SimpleAllocator': 'EMA',
    'returns_assets50_MLChoice-xgboost-60-fixed_SimpleAllocator': 'XGB',
    'returns_assets50_MLChoice-random_forest-60-fixed_SimpleAllocator': 'RF'
})[["timestamp", "S&P500", "Naive", "SMA", "EMA", "XGB", "RF"]]

In [11]:
sharpe_df = pd.DataFrame()
calmar_df = pd.DataFrame()
cumret_df = pd.DataFrame()
std_df = pd.DataFrame()
    
for column in returns_df.columns[1:]:
    sharpe_ratio = []
    calmar_ratio = []
    cumret = []
    std = []
    timestamp = []
    for i in range(len(returns_df)//12):
        timestamp.append(returns_df['timestamp'][i*12:(i+1)*12].iloc[-1])
        returns = returns_df[column][i*12:(i+1)*12]
        cumret.append(((1 + returns).cumprod()).iloc[-1] - 1)
        std.append(returns.std())
        sharpe_ratio.append(calc_sharpe_ratio(returns))
        calmar_ratio.append(calc_sortino_ratio(returns))

    sharpe_df[column] = sharpe_ratio
    calmar_df[column] = calmar_ratio
    cumret_df[column] = cumret
    std_df[column] = std

sharpe_df['timestamp'] = timestamp
calmar_df['timestamp'] = timestamp
cumret_df['timestamp'] = timestamp
std_df['timestamp'] = timestamp

In [12]:
from plotly.subplots import make_subplots

# Assuming you have the 'sharpe_df' and 'calmar_df' dataframes

# Define the color scheme
color_scheme = [
    '#1f77b4',  # blue
    '#ff7f0e',  # orange
    '#2ca02c',  # green
    '#d62728',  # red
    '#9467bd',  # purple
    '#e5b800'   # yellow
]

# Create subplots with two charts side by side
fig = make_subplots(rows=2, cols=2)

# Add lines to the first subplot
for i, column in enumerate(sharpe_df.columns[:-1]):
    fig.add_trace(go.Scatter(
        x=sharpe_df['timestamp'],
        y=sharpe_df[column],
        mode='lines',
        name=column,
        line=dict(color=color_scheme[i]),
        showlegend=True
    ), row=1, col=1)

# Add lines to the second subplot
for i, column in enumerate(calmar_df.columns[:-1]):
    fig.add_trace(go.Scatter(
        x=calmar_df['timestamp'],
        y=calmar_df[column],
        mode='lines',
        name=column,
        line=dict(color=color_scheme[i]),
        showlegend=False
    ), row=1, col=2)

# Add lines to the third subplot
for i, column in enumerate(sharpe_df.columns[:-1]):
    fig.add_trace(go.Scatter(
        x=cumret_df['timestamp'],
        y=cumret_df[column],
        mode='lines',
        name=column,
        line=dict(color=color_scheme[i]),
        showlegend=False
    ), row=2, col=1)

# Add lines to the fourth subplot
for i, column in enumerate(calmar_df.columns[:-1]):
    fig.add_trace(go.Scatter(
        x=std_df['timestamp'],
        y=std_df[column],
        mode='lines',
        name=column,
        line=dict(color=color_scheme[i]),
        showlegend=False
    ), row=2, col=2)

# Update layout and axis labels
fig.update_layout(height=800, width=1200)
fig.update_xaxes(title_text='Vremeska oznaka', row=1, col=1)
fig.update_xaxes(title_text='Vremenska oznaka', row=1, col=2)
fig.update_xaxes(title_text='Vremenska oznaka', row=2, col=1)
fig.update_xaxes(title_text='Vremenska oznaka', row=2, col=2)
fig.update_yaxes(title_text='Sharpe-ov omjer', row=1, col=1)
fig.update_yaxes(title_text='Sortino-ov omjer', row=1, col=2)
fig.update_yaxes(title_text='Kumulativni povrat', row=2, col=1, tickformat='.0%')
fig.update_yaxes(title_text='Standardna devijacija', row=2, col=2)

# Show the legend with the formatted names
fig.update_layout(
    legend=dict(
        title='Strategije:',
        orientation='h',
        yanchor='bottom',
        y=1.02,
        xanchor='right',
        x=1
    )
)

# Display the chart
fig.show()

In [13]:
correct_df = rel_results_df.copy(deep=True)

correct_df = correct_df.rename(columns={
    'correct_assets50_NaiveChoice_SimpleAllocator': 'Naive',
    'correct_assets50_SMAChoice-6_SimpleAllocator': 'SMA',
    'correct_assets50_EMAChoice-6_SimpleAllocator': 'EMA',
    'correct_assets50_MLChoice-xgboost-60-fixed_SimpleAllocator': 'XGB',
    'correct_assets50_MLChoice-random_forest-60-fixed_SimpleAllocator': 'RF'
})[["Naive", "SMA", "EMA", "XGB", "RF"]]

In [14]:
correct_metric = pd.DataFrame()
for column in correct_df.columns:
    avg = correct_df[column].mean()
    std = correct_df[column].std()
    med = correct_df[column].median()
    min_val = correct_df[column].min()
    max_val = correct_df[column].max()
    correct_metric[column] = [avg, std, med, min_val, max_val]

correct_metric.index = ["Average", "STD", "Median", "Minimum", "Maximum"]
correct_metric.head()

Unnamed: 0,Naive,SMA,EMA,XGB,RF
Average,6.133333,5.283333,5.183333,8.516667,7.6
STD,3.372203,1.957962,1.827211,3.666114,4.038795
Median,6.0,5.0,5.0,8.0,7.0
Minimum,0.0,1.0,1.0,2.0,1.0
Maximum,14.0,11.0,9.0,18.0,18.0


In [15]:
for col in correct_metric.columns:
    print(f"{col} & {correct_metric[col].iloc[0]:.2f} & {correct_metric[col].iloc[1]:.2f} & {correct_metric[col].iloc[2]:.0f} & {correct_metric[col].iloc[3]:.0f} & {correct_metric[col].iloc[4]:.0f}\\\\")
    print("\hline")

Naive & 6.13 & 3.37 & 6 & 0 & 14\\
\hline
SMA & 5.28 & 1.96 & 5 & 1 & 11\\
\hline
EMA & 5.18 & 1.83 & 5 & 1 & 9\\
\hline
XGB & 8.52 & 3.67 & 8 & 2 & 18\\
\hline
RF & 7.60 & 4.04 & 7 & 1 & 18\\
\hline
