In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [2]:
metrics_df = pd.read_csv("portfolio_metrics.csv")

In [3]:
metrics_df.head()

Unnamed: 0,Port_IDs,ROI,PnL,Total Positions,Win Rate,Win Positions,Sharpe Ratio,MDD
0,3672754654734989568,0.476385,566.59766,474.0,24.683544,210.0,0.185274,-0.460781
1,3733192481840423936,0.251099,2923.9772,689.0,79.245283,553.0,0.060265,-0.760355
2,3768170840939476993,8.779089,243.668899,14.0,42.857143,6.0,0.424277,0.0
3,3784403294629753856,0.339819,2521.814305,6050.0,23.53719,1829.0,0.106585,-0.177261
4,3786761687746711808,0.332072,205.0214,82.0,40.243902,37.0,0.21515,-2.318212


In [4]:
metrics_df.isna().sum()

Port_IDs           0
ROI                0
PnL                0
Total Positions    0
Win Rate           0
Win Positions      0
Sharpe Ratio       0
MDD                0
dtype: int64

In [5]:
metrics_df.set_index(["Port_IDs"], inplace=True)

In [6]:
metrics_df.head()

Unnamed: 0_level_0,ROI,PnL,Total Positions,Win Rate,Win Positions,Sharpe Ratio,MDD
Port_IDs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
3672754654734989568,0.476385,566.59766,474.0,24.683544,210.0,0.185274,-0.460781
3733192481840423936,0.251099,2923.9772,689.0,79.245283,553.0,0.060265,-0.760355
3768170840939476993,8.779089,243.668899,14.0,42.857143,6.0,0.424277,0.0
3784403294629753856,0.339819,2521.814305,6050.0,23.53719,1829.0,0.106585,-0.177261
3786761687746711808,0.332072,205.0214,82.0,40.243902,37.0,0.21515,-2.318212


In [7]:
metrics_df.isna().sum()

ROI                0
PnL                0
Total Positions    0
Win Rate           0
Win Positions      0
Sharpe Ratio       0
MDD                0
dtype: int64

In [8]:
df = metrics_df.copy()

In [9]:
pd.set_option("display.max_rows", None)
df["MDD"]

Port_IDs
3672754654734989568   -4.607808e-01
3733192481840423936   -7.603550e-01
3768170840939476993    0.000000e+00
3784403294629753856   -1.772611e-01
3786761687746711808   -2.318212e+00
3788465932399412480   -2.242626e-01
3818233536529843712   -9.319387e-01
3819545518395756033   -1.159995e-16
3826087012661391104   -5.034773e-02
3858510226868015873   -6.255268e+00
3865845304835489536   -1.143587e-03
3878631538480067329   -2.903858e-01
3879821005658659073   -4.258665e+01
3880920727367689984   -8.084070e+00
3886752488982104320   -3.057095e-02
3887577207880438784   -6.817661e-01
3891020560590657281    0.000000e+00
3907081197088384000   -2.429080e-01
3910887259807777281   -3.573145e-01
3910983669796782848   -2.410805e-01
3911997667197443328   -0.000000e+00
3914654298562589441   -1.332848e+00
3919174299855478272   -1.059745e+00
3923766029921022977   -3.546186e-01
3925368433214965504   -4.033724e-02
3926423286576838657   -5.423807e-01
3930688517098515713   -8.991871e-01
3931992636670880512

In [10]:
print(df.isin([float('inf'), float('-inf')]).any())

ROI                False
PnL                False
Total Positions    False
Win Rate           False
Win Positions      False
Sharpe Ratio       False
MDD                 True
dtype: bool


In [11]:
df.replace([np.inf, -np.inf], 0, inplace=True)

In [12]:
df.isna().sum()

ROI                0
PnL                0
Total Positions    0
Win Rate           0
Win Positions      0
Sharpe Ratio       0
MDD                0
dtype: int64

In [13]:
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestClassifier

# Clustering
kmeans = KMeans(n_clusters=7, random_state=42)
df['cluster'] = kmeans.fit_predict(df[['ROI', 'PnL', 'Total Positions', "Win Rate", "Win Positions", "Sharpe Ratio", "MDD"]])

# Train Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(df[['ROI', 'PnL', 'Total Positions', "Win Rate", "Win Positions", "Sharpe Ratio", "MDD"]], df['cluster'])

# Feature importance
feature_importance = rf.feature_importances_
weights = feature_importance / feature_importance.sum()
print(weights)


[0.05790243 0.4252408  0.16836323 0.05983642 0.19288766 0.0446331
 0.05113636]


In [14]:
feature_weights = {"ROI": 0.05790243, 
                    "PnL": 0.4252408,
                    "Total Positions": 0.16836323,
                    "Win Rate": 0.05983642,
                    "Win Positions": 0.19288766,
                    "Sharpe Ratio": 0.0446331,
                    "MDD": 0.05113636
                  }

In [15]:
df["Score"] = (
    df["ROI"] * feature_weights["ROI"] +
    df["PnL"] * feature_weights["PnL"] +
    df["Total Positions"] * feature_weights["Total Positions"] +
    df["Win Rate"] * feature_weights["Win Rate"] +
    df["Win Positions"] * feature_weights["Win Positions"] +
    df["Sharpe Ratio"] * feature_weights["Sharpe Ratio"] +
    df["MDD"] * feature_weights["MDD"]
)

In [16]:
df["Rank"] = df["Score"].rank(ascending=False)

In [17]:
df.head()

Unnamed: 0_level_0,ROI,PnL,Total Positions,Win Rate,Win Positions,Sharpe Ratio,MDD,cluster,Score,Rank
Port_IDs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
3672754654734989568,0.476385,566.59766,474.0,24.683544,210.0,0.185274,-0.460781,2,362.740287,137.0
3733192481840423936,0.251099,2923.9772,689.0,79.245283,553.0,0.060265,-0.760355,2,1470.783646,72.0
3768170840939476993,8.779089,243.668899,14.0,42.857143,6.0,0.424277,0.0,2,110.224054,143.0
3784403294629753856,0.339819,2521.814305,6050.0,23.53719,1829.0,0.106585,-0.177261,5,2445.191154,39.0
3786761687746711808,0.332072,205.0214,82.0,40.243902,37.0,0.21515,-2.318212,2,110.444429,142.0


In [18]:
df.sort_values(by="Rank", ascending=True, inplace=True)

In [19]:
df.drop(columns=["cluster"], axis=1, inplace=True)

In [20]:
top20_ranks = df[:20]

In [21]:
top20_ranks

Unnamed: 0_level_0,ROI,PnL,Total Positions,Win Rate,Win Positions,Sharpe Ratio,MDD,Score,Rank
Port_IDs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
4020204877254599680,0.311561,71998.855953,6050.0,16.859504,1993.0,0.060748,-8.028348,32020.492776,1.0
3999240873283311617,0.543673,42574.473679,4522.0,51.680672,2366.0,0.227505,-0.052816,19325.245296,2.0
4021669203289716224,0.40442,26427.331592,1682.0,8.739596,528.0,0.068251,-0.161568,11623.552413,3.0
3960874214179953664,0.233296,19567.471286,2391.0,46.716855,1233.0,0.162948,-34.53621,8962.324206,4.0
3907081197088384000,1.099242,18015.99737,4137.0,59.898477,2540.0,0.222729,-0.242908,8851.235752,5.0
3956076827719377409,1.03223,16790.012238,3916.0,46.297242,2019.0,0.099528,-0.246143,8191.370716,6.0
3986814617275053313,0.871628,16337.461881,3554.0,78.165447,2780.0,0.329787,-0.000149,8086.688296,7.0
4028701921959171840,0.40648,17601.401398,687.0,52.838428,373.0,0.275724,-0.045177,7675.641843,8.0
3788465932399412480,0.836806,13960.966457,3069.0,38.774845,1369.0,0.152912,-0.224263,6719.906462,9.0
3987739404272887297,0.557013,12464.606315,6050.0,1.272727,1381.0,0.049883,-0.310502,6585.529318,10.0


In [22]:
top20_ranks.to_csv("top20_portfolios.csv", index=True)