In [31]:
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [32]:
data_properties = ['nfeature', 'hlr', 'Correlation', 'Transition', 'Shifting', 'Seasonality', 'Trend', 'Stationarity']
binary_features = ['norm', 'sd']
categorical_features = ['fusion', 'embed', 'ff']
target = 'rank'

In [33]:
df = pd.read_csv("res_m_processed.csv")

all_features = data_properties + binary_features + categorical_features

# Make sure binary features are encoded as 0/1
for col in binary_features:
    df[col] = df[col].astype(int)

for col in categorical_features:
    df[col] = df[col].astype("category")

In [34]:
df

Unnamed: 0,dataset,nfeature,hlr,Correlation,Transition,Shifting,Seasonality,Trend,Stationarity,norm,sd,fusion,embed,ff,rank
0,PEMS03,358,0.125,0.800413,0.006313,0.071985,0.870186,0.095749,2.260248e-29,0,1,feature,invert,rnn,1.5
1,PEMS03,358,0.125,0.800413,0.006313,0.071985,0.870186,0.095749,2.260248e-29,1,1,feature,invert,trans,2.0
2,PEMS03,358,0.125,0.800413,0.006313,0.071985,0.870186,0.095749,2.260248e-29,0,0,feature,invert,rnn,2.5
3,PEMS03,358,0.125,0.800413,0.006313,0.071985,0.870186,0.095749,2.260248e-29,0,0,feature,none,rnn,4.0
4,PEMS03,358,0.125,0.800413,0.006313,0.071985,0.870186,0.095749,2.260248e-29,0,1,feature,none,rnn,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1435,weather,21,7.500,0.662655,0.054499,0.229994,0.652002,0.648986,1.036509e-08,1,0,temporal,invert,mlp,32.0
1436,weather,21,7.500,0.662655,0.054499,0.229994,0.652002,0.648986,1.036509e-08,1,0,feature,none,trans,33.0
1437,weather,21,7.500,0.662655,0.054499,0.229994,0.652002,0.648986,1.036509e-08,1,1,temporal,patch,mlp,33.0
1438,weather,21,7.500,0.662655,0.054499,0.229994,0.652002,0.648986,1.036509e-08,1,1,feature,token,rnn,33.0


In [35]:
X = df[all_features]
y = df[target]

In [36]:
categorical_feature = categorical_features

# Define the model
model = lgb.LGBMRegressor(
    objective='regression',
    metric='rmse',
    boosting_type='gbdt',
    n_estimators=100,
    learning_rate=0.05
)

model.fit(
    X,
    y,
    categorical_feature=categorical_feature,
)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000383 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 116
[LightGBM] [Info] Number of data points in the train set: 1440, number of used features: 13
[LightGBM] [Info] Start training from score 16.688889


In [37]:
import pandas as pd
import itertools

In [38]:
ett_data_prop = {
    'nfeature': 7,
    'hlr': 0.25,
    'Correlation': 0.6301528502237842,
    'Transition': 0.01983808182713343,
    'Shifting': 0.2049245530588814,
    'Seasonality': 0.7306899764703657,
    'Trend': 0.8534989671282699,
    'Stationarity': 0.0011999625289986515
}

In [39]:
binary_values = list(itertools.product([0, 1], repeat=len(binary_features)))

In [40]:
fusion_categories = ['temporal', 'feature']
embed_categories = ['token', 'patch', 'invert', 'freq', 'none']
ff_categories = ['mlp', 'rnn', 'trans']

categorical_values = list(itertools.product(fusion_categories, embed_categories, ff_categories))

In [41]:
all_combinations = list(itertools.product(binary_values, categorical_values))

In [42]:
rows = []
for binary_combo, cat_combo in all_combinations:
    row = ett_data_prop.copy()
    row.update(dict(zip(binary_features, binary_combo)))
    row.update(dict(zip(categorical_features, cat_combo)))
    rows.append(row)

df_combinations = pd.DataFrame(rows)

In [43]:
for col in binary_features:
    df_combinations[col] = df_combinations[col].astype(int)
for col in categorical_features:
    df_combinations[col] = df_combinations[col].astype("category")

In [44]:
# Predict scores
df_combinations['predicted_rank'] = model.predict(df_combinations)

# Optional: sort by best score (e.g., lowest rank)
df_combinations_sorted = df_combinations.sort_values('predicted_rank')

In [45]:
df_combinations_sorted.head(10)

Unnamed: 0,nfeature,hlr,Correlation,Transition,Shifting,Seasonality,Trend,Stationarity,norm,sd,fusion,embed,ff,predicted_rank
78,7,0.25,0.630153,0.019838,0.204925,0.73069,0.853499,0.0012,1,0,feature,patch,mlp,10.621027
72,7,0.25,0.630153,0.019838,0.204925,0.73069,0.853499,0.0012,1,0,temporal,none,mlp,10.959217
74,7,0.25,0.630153,0.019838,0.204925,0.73069,0.853499,0.0012,1,0,temporal,none,trans,10.96026
64,7,0.25,0.630153,0.019838,0.204925,0.73069,0.853499,0.0012,1,0,temporal,patch,rnn,10.993266
94,7,0.25,0.630153,0.019838,0.204925,0.73069,0.853499,0.0012,1,1,temporal,patch,rnn,11.058578
63,7,0.25,0.630153,0.019838,0.204925,0.73069,0.853499,0.0012,1,0,temporal,patch,mlp,12.241054
108,7,0.25,0.630153,0.019838,0.204925,0.73069,0.853499,0.0012,1,1,feature,patch,mlp,12.896969
55,7,0.25,0.630153,0.019838,0.204925,0.73069,0.853499,0.0012,0,1,feature,freq,rnn,12.897241
89,7,0.25,0.630153,0.019838,0.204925,0.73069,0.853499,0.0012,1,0,feature,none,trans,13.33985
25,7,0.25,0.630153,0.019838,0.204925,0.73069,0.853499,0.0012,0,0,feature,freq,rnn,13.35244
