In [1]:
import pandas as pd
import numpy as np

Data generation

In [2]:
# Set seed for reproducibility
np.random.seed(42)

# Define dates: 2 years of daily data
dates = pd.date_range(start="2023-01-01", end="2024-12-31", freq="D")
n_days = len(dates)

feature_names = [f"asset_{i+1}" for i in range(20)]

# Initialize parameters for geometric random walk
start_price = 150
volatility = 0.02  # daily std dev
drift = 0.0005     # slight upward trend

# Simulate price series for each feature
data = {}
for name in feature_names:
    log_returns = np.random.normal(loc=drift, scale=volatility, size=n_days)
    price_series = start_price * np.exp(np.cumsum(log_returns))
    price_series = np.clip(price_series, 100, 300)  # realistic bounds
    data[name] = price_series

# Create DataFrame
df = pd.DataFrame(data, index=dates)
df.index.name = "date"
df

Unnamed: 0_level_0,asset_1,asset_2,asset_3,asset_4,asset_5,asset_6,asset_7,asset_8,asset_9,asset_10,asset_11,asset_12,asset_13,asset_14,asset_15,asset_16,asset_17,asset_18,asset_19,asset_20
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2023-01-01,151.573337,147.166977,149.448945,146.038723,150.350667,151.171976,148.029224,148.316165,147.334612,144.788458,148.729650,149.526015,147.728236,147.030534,145.668041,149.564961,148.197139,150.330501,145.509780,148.694785
2023-01-02,151.230368,148.447728,147.001739,146.667180,151.186648,151.269297,143.919812,143.767540,145.750815,144.029141,146.571028,145.348191,149.756842,152.642881,146.168290,151.553050,144.660791,149.687099,148.976461,150.884874
2023-01-03,153.278735,143.549686,145.377521,144.810544,150.911047,145.159352,144.412388,146.208608,146.534531,144.809114,147.302557,148.198231,154.190612,155.775411,149.678355,148.548407,145.979741,148.946096,148.807813,153.884480
2023-01-04,158.098564,146.608290,147.172522,146.122314,151.636292,145.698863,146.185886,142.297858,145.235569,144.101423,141.749041,151.126056,150.076276,155.476544,149.200517,150.447710,141.860889,153.096883,147.794615,153.659877
2023-01-05,157.438608,148.074612,152.246910,146.251390,156.576889,152.367068,147.772906,141.132525,147.098685,139.790988,138.684230,153.606756,149.123820,157.833125,144.846737,151.958951,143.656004,157.958331,148.417835,153.487657
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-27,179.226543,300.000000,277.079904,293.058093,210.050982,100.000000,202.551742,100.000000,149.116329,135.646631,300.000000,300.000000,172.012433,148.115235,109.322430,282.567577,231.025942,266.848235,300.000000,227.287653
2024-12-28,180.567062,300.000000,280.500917,289.019515,212.483901,100.000000,197.040754,100.000000,152.680403,138.516009,298.144350,300.000000,177.926333,151.048104,106.643219,291.224817,235.172159,258.064893,300.000000,224.589403
2024-12-29,178.717626,300.000000,271.813048,293.504895,214.489697,100.000000,196.610964,100.000000,150.908998,142.624210,297.794039,300.000000,177.064539,147.547139,105.112063,288.649434,233.785808,262.275839,300.000000,231.709510
2024-12-30,176.045231,300.000000,274.474390,292.510444,211.082411,100.000000,198.763197,100.063466,147.971492,139.729657,300.000000,300.000000,181.902051,145.005458,105.779869,285.902159,231.077910,255.850306,300.000000,237.924953


In [3]:
simple_returns = df.apply(lambda x: (x / x.shift(1)) - 1)  # Daily simple returns
mean = simple_returns.shift(1).rolling(window=252).mean()  # 1-rolling average
std = simple_returns.shift(1).rolling(window=252).std()   # 1-rolling volatility

Assests ranked according to their recent 252 day (1-year) return, and got the top 10.

In [4]:
numpy_top_10 = (simple_returns.shift(1).rolling(window=252).
                mean().rank(axis=1).iloc[-1].sort_values(ascending=False).
                head(10).index.to_numpy())
numpy_top_10

array(['asset_16', 'asset_12', 'asset_18', 'asset_11', 'asset_7',
       'asset_5', 'asset_4', 'asset_17', 'asset_20', 'asset_10'],
      dtype=object)

Each day, the top 10 assets with the highest 1-year expected return are selected. A portfolio is formed by assigning weights to these assets, proportional to the inverse of their 1-year volatility. The weights are then normalized so that they sum to one.

In [5]:
ranked = simple_returns.shift(1).rolling(window=252).mean().rank(axis=1)
weights = pd.DataFrame(index=ranked.index, columns=ranked.columns)

for date in ranked.index:
    top_10_index = ranked.loc[date].sort_values(ascending=False).head(10).index
    w = std.loc[date, top_10_index]
    inv = 1 / w
    normalized = inv / inv.sum()
    weights.loc[date, top_10_index] = normalized

Portfolio performance calculation.

In [6]:
(weights * mean).sum(axis=1).loc['2023-12-31':].to_frame().rename(columns={0: 'return'})

Unnamed: 0_level_0,return
date,Unnamed: 1_level_1
2023-12-31,0.001742
2024-01-01,0.001722
2024-01-02,0.001724
2024-01-03,0.001715
2024-01-04,0.001704
...,...
2024-12-27,0.001204
2024-12-28,0.001168
2024-12-29,0.001246
2024-12-30,0.001216
