In [1]:
import mlflow
import quantstream as qs
import numpy as np
import pandas as pd
import polars as pl

from plotly.io import show

from sklearn import set_config
from sklearn.model_selection import (
    GridSearchCV,
    KFold,
    RandomizedSearchCV,
    train_test_split,
)
from sklearn.pipeline import Pipeline
from scipy.stats import loguniform

from skfolio import RatioMeasure, RiskMeasure, Population
from skfolio.datasets import load_factors_dataset, load_sp500_dataset
from skfolio.model_selection import (
    CombinatorialPurgedCV,
    WalkForward,
    cross_val_predict,
)
from skfolio.moments import (
    DenoiseCovariance,
    DetoneCovariance,
    EWMu,
    GerberCovariance,
    ShrunkMu,
)
from skfolio.optimization import (
    MeanRisk,
    NestedClustersOptimization,
    InverseVolatility,
    ObjectiveFunction,
    RiskBudgeting,
)
from skfolio.pre_selection import SelectKExtremes
from skfolio.preprocessing import prices_to_returns
from skfolio.prior import BlackLitterman, EmpiricalPrior, FactorModel
from skfolio.uncertainty_set import BootstrapMuUncertaintySet

### Pull portfolio data for EDA

In [2]:
prices = load_sp500_dataset()
prices.head()

Unnamed: 0_level_0,AAPL,AMD,BAC,BBY,CVX,GE,HD,JNJ,JPM,KO,LLY,MRK,MSFT,PEP,PFE,PG,RRC,UNH,WMT,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1990-01-02,0.264,4.125,4.599,0.144,4.991,14.391,1.117,3.438,3.394,2.235,6.658,4.215,0.384,4.738,1.021,3.86,3.322,0.31,3.653,4.068
1990-01-03,0.266,4.0,4.636,0.161,4.91,14.364,1.121,3.452,3.508,2.203,6.658,4.282,0.386,4.692,1.024,3.853,3.322,0.304,3.653,4.027
1990-01-04,0.267,3.938,4.537,0.159,4.847,14.283,1.128,3.459,3.522,2.192,6.621,4.215,0.397,4.646,1.041,3.777,3.322,0.301,3.634,3.987
1990-01-05,0.268,3.812,4.438,0.159,4.775,14.148,1.113,3.423,3.536,2.174,6.549,4.128,0.387,4.581,1.032,3.709,3.322,0.288,3.595,3.966
1990-01-08,0.269,3.812,4.463,0.147,4.82,14.229,1.102,3.481,3.536,2.22,6.549,4.181,0.393,4.664,1.023,3.777,3.322,0.282,3.644,4.027


In [3]:
portfolio = qs.Portfolio()
portfolio.add_security("AAPL")
portfolio.add_security("ABBV")
portfolio.add_security("CVX")
portfolio.add_security("BRK-B")
portfolio.add_security("XOM")
portfolio.add_security("GM")
portfolio.add_security("JPM")
portfolio.add_security("META")
portfolio.add_security("NVDA")
portfolio.add_security("VZ")
portfolio.add_security("WMT")

In [4]:
portfolio.data

In [5]:
prices = portfolio.data["adjClose"]
volume = portfolio.data["volume"]

In [6]:
df_prices = prices.to_dataframe()
# unstack the multi-index
df_prices = df_prices.unstack()
df_prices.columns = df_prices.columns.droplevel()
df_prices = df_prices.T
df_prices

security,AAPL,ABBV,BRK-B,CVX,GM,JPM,META,NVDA,VZ,WMT,XOM
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2019-10-21,58.18,62.38,211.490005,93.40,34.31,106.92,189.19,4.88,46.11,36.95,53.73
2019-10-22,58.05,62.84,210.619995,94.35,34.68,108.00,181.79,4.87,46.13,36.90,54.01
2019-10-23,58.83,62.74,210.960007,94.50,34.96,108.28,185.59,4.85,46.21,36.83,54.52
2019-10-24,58.92,61.98,210.259995,94.18,34.21,108.21,185.82,4.90,45.98,36.75,54.01
2019-10-25,59.65,61.76,211.800003,95.05,35.09,109.07,187.33,5.09,45.82,36.73,54.13
...,...,...,...,...,...,...,...,...,...,...,...
2024-10-14,231.30,194.10,459.980000,151.78,48.63,221.48,590.42,138.07,43.19,80.29,124.08
2024-10-15,233.85,191.86,462.680000,147.73,47.85,222.39,586.27,131.60,43.74,81.65,120.35
2024-10-16,231.78,190.46,465.690000,148.73,49.01,223.64,576.79,135.72,43.91,81.22,120.66
2024-10-17,232.15,188.57,465.850000,151.20,49.38,224.42,576.93,136.93,43.85,80.89,120.35


In [7]:
df_volume = volume.to_dataframe()
# unstack the multi-index
df_volume = df_volume.unstack()
df_volume.columns = df_volume.columns.droplevel()
df_volume = df_volume.T
df_volume

security,AAPL,ABBV,BRK-B,CVX,GM,JPM,META,NVDA,VZ,WMT,XOM
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2019-10-21,89469932.0,7882560.0,3337300.0,4153612.0,7429278.0,13272678.0,8580744.0,266967240.0,11441991.0,10417549.0,10473346.0
2019-10-22,90736004.0,6863955.0,2716800.0,4130577.0,6773553.0,12657085.0,19854192.0,337369120.0,9802244.0,7789555.0,9503140.0
2019-10-23,79730180.0,6195542.0,2413800.0,4159941.0,6468788.0,9397328.0,12370819.0,285770000.0,10461906.0,8606657.0,8722963.0
2019-10-24,71665020.0,6332465.0,3147200.0,3893194.0,6813476.0,8057164.0,11505406.0,239037320.0,11215278.0,8767829.0,9069090.0
2019-10-25,73477184.0,5497019.0,2539200.0,3369139.0,8735581.0,9055700.0,8061240.0,423092000.0,15036400.0,7692979.0,7607192.0
...,...,...,...,...,...,...,...,...,...,...,...
2024-10-14,39882100.0,3624123.0,2969800.0,4214721.0,6774134.0,9048925.0,8252000.0,232347700.0,9923500.0,9914500.0,9808122.0
2024-10-15,64751400.0,6553712.0,3929900.0,6704610.0,12532100.0,9235825.0,9564236.0,377831021.0,15444500.0,13276221.0,15771317.0
2024-10-16,34082240.0,5576000.0,3166300.0,4303737.0,10152811.0,6382515.0,11268400.0,264879720.0,9459000.0,11123300.0,8426800.0
2024-10-17,32993810.0,4419002.0,2520903.0,6593136.0,8333573.0,6470186.0,8701158.0,306435921.0,10539459.0,9424705.0,11204857.0


### Train Mean Risk Model

In [8]:
X = prices_to_returns(df_prices)
X_train, X_test = train_test_split(X, test_size=0.33, shuffle=False)

In [9]:
model = MeanRisk()

In [10]:
model.fit(X_train)
print(model.weights_)

[9.73411244e-07 1.95372778e-01 8.53442419e-02 8.41094737e-07
 1.14044646e-06 4.75810791e-07 1.12496635e-02 4.34553009e-07
 4.55634538e-01 2.52244258e-01 1.50655877e-04]


In [11]:
port1 = model.predict(X_test)

print(port1.annualized_sharpe_ratio)
print("-"*50)
print(port1.summary())

1.8218065176406282
--------------------------------------------------
Mean                                              0.094%
Annualized Mean                                   23.77%
Variance                                         0.0068%
Annualized Variance                                1.70%
Semi-Variance                                    0.0034%
Annualized Semi-Variance                           0.85%
Standard Deviation                                 0.82%
Annualized Standard Deviation                     13.04%
Semi-Deviation                                     0.58%
Annualized Semi-Deviation                          9.21%
Mean Absolute Deviation                            0.59%
CVaR at 95%                                        1.78%
EVaR at 95%                                        2.39%
Worst Realization                                  3.47%
CDaR at 95%                                        7.76%
MAX Drawdown                                      10.27%
Average Drawdown  

### Model

We create a Maximum Sharpe Ratio model and then fit it on the training set. portfolio_params are parameters passed to the Portfolio returned by the predict method. It can be omitted, here we use it to give a name to our maximum Sharpe Ration portfolio:

In [12]:
model = MeanRisk(
    risk_measure=RiskMeasure.ANNUALIZED_VARIANCE,
    objective_function=ObjectiveFunction.MAXIMIZE_RATIO,
    portfolio_params=dict(name="Max Sharpe"),
)
model.fit(X_train)
model.weights_


The annualized risk measure Annualized Variance will be convertedto its non-annualized version Variance



array([3.34966176e-03, 5.84626279e-01, 2.12809131e-11, 1.93307329e-11,
       8.93733155e-12, 1.18914899e-11, 7.24519191e-12, 3.01683828e-01,
       9.69659330e-12, 4.84028007e-11, 1.10340231e-01])

In [13]:
benchmark = InverseVolatility(portfolio_params=dict(name="Inverse Vol"))
benchmark.fit(X_train)
benchmark.weights_

array([0.08389328, 0.11547621, 0.12192735, 0.07321198, 0.06313561,
       0.08267092, 0.06155309, 0.05419695, 0.14644521, 0.12084116,
       0.07664825])

In [14]:
pred_model = model.predict(X_test)
pred_bench = benchmark.predict(X_test)

In [15]:
print(pred_model.annualized_sharpe_ratio)
print(pred_bench.annualized_sharpe_ratio)

2.680034743732841
2.8490664336909


In [16]:
population = Population([pred_model, pred_bench])
population.plot_composition()

In [17]:
fig = population.plot_cumulative_returns()
# show(fig) is only used for the documentation sticker.
show(fig)

In [18]:
population.summary()

Unnamed: 0,Max Sharpe,Inverse Vol
Mean,0.19%,0.12%
Annualized Mean,48.65%,31.31%
Variance,0.013%,0.0048%
Annualized Variance,3.29%,1.21%
Semi-Variance,0.0064%,0.0025%
Annualized Semi-Variance,1.60%,0.64%
Standard Deviation,1.14%,0.69%
Annualized Standard Deviation,18.15%,10.99%
Semi-Deviation,0.80%,0.50%
Annualized Semi-Deviation,12.67%,7.99%


In [22]:
import mlflow

In [29]:
pred_model.summary().to_dict()

{'Mean': '0.19%',
 'Annualized Mean': '48.65%',
 'Variance': '0.013%',
 'Annualized Variance': '3.29%',
 'Semi-Variance': '0.0064%',
 'Annualized Semi-Variance': '1.60%',
 'Standard Deviation': '1.14%',
 'Annualized Standard Deviation': '18.15%',
 'Semi-Deviation': '0.80%',
 'Annualized Semi-Deviation': '12.67%',
 'Mean Absolute Deviation': '0.85%',
 'CVaR at 95%': '2.30%',
 'EVaR at 95%': '3.11%',
 'Worst Realization': '4.23%',
 'CDaR at 95%': '7.57%',
 'MAX Drawdown': '9.72%',
 'Average Drawdown': '2.08%',
 'EDaR at 95%': '8.22%',
 'First Lower Partial Moment': '0.42%',
 'Ulcer Index': '0.030',
 'Gini Mean Difference': '1.23%',
 'Value at Risk at 95%': '1.60%',
 'Drawdown at Risk at 95%': '6.57%',
 'Entropic Risk Measure at 95%': '2.99',
 'Fourth Central Moment': '0.000010%',
 'Fourth Lower Partial Moment': '0.000004%',
 'Skew': '24.26%',
 'Kurtosis': '611.16%',
 'Sharpe Ratio': '0.17',
 'Annualized Sharpe Ratio': '2.68',
 'Sortino Ratio': '0.24',
 'Annualized Sortino Ratio': '3.84',

In [43]:
metrics = pred_model.summary().to_dict()
metrics

{'Mean': '0.19%',
 'Annualized Mean': '48.65%',
 'Variance': '0.013%',
 'Annualized Variance': '3.29%',
 'Semi-Variance': '0.0064%',
 'Annualized Semi-Variance': '1.60%',
 'Standard Deviation': '1.14%',
 'Annualized Standard Deviation': '18.15%',
 'Semi-Deviation': '0.80%',
 'Annualized Semi-Deviation': '12.67%',
 'Mean Absolute Deviation': '0.85%',
 'CVaR at 95%': '2.30%',
 'EVaR at 95%': '3.11%',
 'Worst Realization': '4.23%',
 'CDaR at 95%': '7.57%',
 'MAX Drawdown': '9.72%',
 'Average Drawdown': '2.08%',
 'EDaR at 95%': '8.22%',
 'First Lower Partial Moment': '0.42%',
 'Ulcer Index': '0.030',
 'Gini Mean Difference': '1.23%',
 'Value at Risk at 95%': '1.60%',
 'Drawdown at Risk at 95%': '6.57%',
 'Entropic Risk Measure at 95%': '2.99',
 'Fourth Central Moment': '0.000010%',
 'Fourth Lower Partial Moment': '0.000004%',
 'Skew': '24.26%',
 'Kurtosis': '611.16%',
 'Sharpe Ratio': '0.17',
 'Annualized Sharpe Ratio': '2.68',
 'Sortino Ratio': '0.24',
 'Annualized Sortino Ratio': '3.84',

In [51]:
# convert the metrics to float or int, resolve percent signs
# replace spaces with underscores
metrics = {
        k.replace(" ", "_").replace("%", "percent"): float(v.replace("%", "")) if isinstance(v, str) else v
        for k, v in pred_model.summary().to_dict().items()
        if isinstance(v, (int, float)) or (isinstance(v, str) and v.replace("%", "").replace(".", "").isdigit())
    }
metrics

{'Mean': 0.19,
 'Annualized_Mean': 48.65,
 'Variance': 0.013,
 'Annualized_Variance': 3.29,
 'Semi-Variance': 0.0064,
 'Annualized_Semi-Variance': 1.6,
 'Standard_Deviation': 1.14,
 'Annualized_Standard_Deviation': 18.15,
 'Semi-Deviation': 0.8,
 'Annualized_Semi-Deviation': 12.67,
 'Mean_Absolute_Deviation': 0.85,
 'CVaR_at_95percent': 2.3,
 'EVaR_at_95percent': 3.11,
 'Worst_Realization': 4.23,
 'CDaR_at_95percent': 7.57,
 'MAX_Drawdown': 9.72,
 'Average_Drawdown': 2.08,
 'EDaR_at_95percent': 8.22,
 'First_Lower_Partial_Moment': 0.42,
 'Ulcer_Index': 0.03,
 'Gini_Mean_Difference': 1.23,
 'Value_at_Risk_at_95percent': 1.6,
 'Drawdown_at_Risk_at_95percent': 6.57,
 'Entropic_Risk_Measure_at_95percent': 2.99,
 'Fourth_Central_Moment': 1e-05,
 'Fourth_Lower_Partial_Moment': 4e-06,
 'Skew': 24.26,
 'Kurtosis': 611.16,
 'Sharpe_Ratio': 0.17,
 'Annualized_Sharpe_Ratio': 2.68,
 'Sortino_Ratio': 0.24,
 'Annualized_Sortino_Ratio': 3.84,
 'Mean_Absolute_Deviation_Ratio': 0.23,
 'First_Lower_Part

In [64]:
import mlflow

# Correct the tracking URI
mlflow.set_tracking_uri("http://127.0.0.1:5000/")

mlflow.set_experiment("mean-risk2")

with mlflow.start_run(run_name="mean-risk4"):
    mlflow.log_params(
        {
            "risk_measure": "ANNUALIZED_VARIANCE",  # Assuming RiskMeasure.ANNUALIZED_VARIANCE is a string
            "objective_function": "MAXIMIZE_RATIO",  # Assuming ObjectiveFunction.MAXIMIZE_RATIO is a string
        }
    )    
    mlflow.log_metrics(metrics)
    mlflow.sklearn.log_model(model, "model")

2024/10/20 17:21:52 INFO mlflow.tracking._tracking_service.client: 🏃 View run mean-risk4 at: http://127.0.0.1:5000/#/experiments/721934320915735068/runs/1f1938bf5f5c4328b7d343c171868ba9.
2024/10/20 17:21:52 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/721934320915735068.


In [54]:
fig.show()

In [57]:
fig_json = fig.to_json()

with open("fig.json", "w") as f:
    f.write(fig_json)

In [65]:
model_name = "mean-risk"
run_id = input("Enter the run ID: ")
model_uri = f"runs:/{run_id}/model"

result = mlflow.register_model(model_uri, model_name)

Registered model 'mean-risk' already exists. Creating a new version of this model...
2024/10/20 17:22:25 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: mean-risk, version 3
Created version '3' of model 'mean-risk'.


In [66]:
model_version = 2
model_uri = f"models:/{model_name}@challenger"

loaded_model = mlflow.sklearn.load_model(model_uri)
y_pred = loaded_model.predict(X_test)

In [74]:
# plot the cumulative returns
y_pred.plot_cumulative_returns()