In [15]:
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [16]:
data_properties = ['hlr', 'Transition', 'Shifting', 'Seasonality', 'Trend', 'Stationarity']
binary_features = ['norm', 'sd']
categorical_features = ['fusion', 'embed', 'ff']
target = 'rank'

In [17]:
df = pd.read_csv("res_s_processed.csv")

all_features = data_properties + binary_features + categorical_features

# Make sure binary features are encoded as 0/1
for col in binary_features:
    df[col] = df[col].astype(int)

for col in categorical_features:
    df[col] = df[col].astype("category")

In [18]:
X = df[all_features]
y = df[target]

In [19]:
categorical_feature = categorical_features

# Define the model
model = lgb.LGBMRegressor(
    objective='regression',
    metric='rmse',
    boosting_type='gbdt',
    n_estimators=100,
    learning_rate=0.05
)

model.fit(
    X,
    y,
    categorical_feature=categorical_feature,
)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000378 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 122
[LightGBM] [Info] Number of data points in the train set: 1620, number of used features: 11
[LightGBM] [Info] Start training from score 15.753068


In [20]:
import pandas as pd
import itertools

In [21]:
# social_data_prop = {
#     'hlr': 0.5,
#     'Transition': 0.036458333333333336,
#     'Shifting': 0.5126103010200889,
#     'Seasonality': 0.5359111459455013,
#     'Trend': 0.3438775740197786,
#     'Stationarity': 4.76691279545103e-12
# }

social_data_prop = {
    'hlr': 1.0/3.0,
    'Transition': 0.07407407407407407,
    'Shifting': 0.33045622688039455,
    'Seasonality': 0.538328791786597,
    'Trend': 0.915250129474845,
    'Stationarity': 0.509793082612691
}

In [22]:
binary_values = list(itertools.product([0, 1], repeat=len(binary_features)))

In [23]:
fusion_categories = ['temporal', 'feature']
embed_categories = ['token', 'patch', 'invert', 'freq', 'none']
ff_categories = ['mlp', 'rnn', 'trans']

categorical_values = list(itertools.product(fusion_categories, embed_categories, ff_categories))

In [24]:
all_combinations = list(itertools.product(binary_values, categorical_values))

In [25]:
rows = []
for binary_combo, cat_combo in all_combinations:
    row = social_data_prop.copy()
    row.update(dict(zip(binary_features, binary_combo)))
    row.update(dict(zip(categorical_features, cat_combo)))
    rows.append(row)

df_combinations = pd.DataFrame(rows)

In [26]:
for col in binary_features:
    df_combinations[col] = df_combinations[col].astype(int)
for col in categorical_features:
    df_combinations[col] = df_combinations[col].astype("category")

In [27]:
# Predict scores
df_combinations['predicted_rank'] = model.predict(df_combinations)

# Optional: sort by best score (e.g., lowest rank)
df_combinations_sorted = df_combinations.sort_values('predicted_rank')

In [28]:
df_combinations_sorted.head(10)

Unnamed: 0,hlr,Transition,Shifting,Seasonality,Trend,Stationarity,norm,sd,fusion,embed,ff,predicted_rank
109,0.333333,0.074074,0.330456,0.538329,0.91525,0.509793,1,1,feature,patch,rnn,9.526457
97,0.333333,0.074074,0.330456,0.538329,0.91525,0.509793,1,1,temporal,invert,rnn,9.676755
0,0.333333,0.074074,0.330456,0.538329,0.91525,0.509793,0,0,temporal,token,mlp,10.112537
49,0.333333,0.074074,0.330456,0.538329,0.91525,0.509793,0,1,feature,patch,rnn,10.155894
57,0.333333,0.074074,0.330456,0.538329,0.91525,0.509793,0,1,feature,none,mlp,10.160278
36,0.333333,0.074074,0.330456,0.538329,0.91525,0.509793,0,1,temporal,invert,mlp,10.348724
26,0.333333,0.074074,0.330456,0.538329,0.91525,0.509793,0,0,feature,freq,trans,10.393618
42,0.333333,0.074074,0.330456,0.538329,0.91525,0.509793,0,1,temporal,none,mlp,10.879278
55,0.333333,0.074074,0.330456,0.538329,0.91525,0.509793,0,1,feature,freq,rnn,10.910396
30,0.333333,0.074074,0.330456,0.538329,0.91525,0.509793,0,1,temporal,token,mlp,10.921711
