## Primo + Clara-MS

### Clara-MS Original Model

In [None]:
import pandas as pd
import xgboost as xgb
from sklearn.metrics import mean_absolute_error

importance_type = "gain"  # "gain", “weight”, “cover”, “total_gain” or “total_cover”.

features_train, tags_train, features_test, tags_test = pd.read_pickle("dataset.pickle")
features_nf, tags_nf = pd.read_pickle("testset.pickle")

nf_features = [
    "$R_{state}$",  # "Stateful Ratio",
    "$A_{i}$",  # "IMEM Access",
    "$A_{e}$",  # "EMEM Access",
    "$I_{c}$",  # "Compute Intensity",
    "$R_{ic}$",  # "IMEM-Compute Ratio",
    "$R_{ec}$",  # "EMEM-Compute Ratio",
    "$R_{i}$",  # "IMEM Ratio",
    "$R_{e}$",  # "EMEM Ratio",
    "$R_{sum}$",  # "MEMSum-Compute Ratio",
    "$R_{res}$",  # "MEMRes-Compute Ratio",
]

features_train = pd.DataFrame(features_train, columns=nf_features)
features_test = pd.DataFrame(features_test, columns=nf_features)
features_nf = pd.DataFrame(features_nf, columns=nf_features)

xgb_r = xgb.XGBRegressor(n_estimators=20, seed=42, importance_type=importance_type)
xgb_r.fit(features_train, tags_train)
pred_test = xgb_r.predict(features_test)
pred_nf = xgb_r.predict(features_nf)


print("Clara-MS (GBDT) Performance: ")
print("Clara Test MAE: ", mean_absolute_error(pred_test, tags_test))
print("Clara NF MAE: ", mean_absolute_error(pred_nf, tags_nf), "\n")

tags_nf = list(tags_nf)

print("Performance on Click NFs: ")
print("MazuNAT optimal: ", tags_nf[-4], ", prediction: ", pred_nf[-4])
print("DNSProxy optimal: ", tags_nf[-3], ", prediction: ", pred_nf[-3])
print("UDPCount optimal: ", tags_nf[-2], ", prediction: ", pred_nf[-2])
print("WebGen optimal: ", tags_nf[-1], ", prediction: ", pred_nf[-1])


### Primo Model

In [None]:
from primo.model import PrimoRegressor

"""For fast result reprodcution, we disable HPO and model selection. Use specific model type and configuration."""

config = {"max_bins": 32}
pram = PrimoRegressor(model="PrAM", model_config=config, hpo=None)
pram.fit(features_train, tags_train)

pred_test = pram.predict(features_test)
pred_nf = pram.predict(features_nf)

print("Primo Performance: ")
print("PrAM Test MAE: ", mean_absolute_error(pred_test, tags_test))
print("PrAM NF MAE: ", mean_absolute_error(pred_nf, tags_nf), "\n")

tags_nf = list(tags_nf)

print("Performance on Click NFs: ")
print("MazuNAT optimal: ", tags_nf[-4], ", prediction: ", pred_nf[-4])
print("DNSProxy optimal: ", tags_nf[-3], ", prediction: ", pred_nf[-3])
print("UDPCount optimal: ", tags_nf[-2], ", prediction: ", pred_nf[-2])
print("WebGen optimal: ", tags_nf[-1], ", prediction: ", pred_nf[-1])


### Global Interpretation

In [None]:
pram.visualize(n_features=12)

### Local Interpretation

In [None]:
pram.local_visualize(features_nf, tags_nf, idx=2)

### Mono Constraint

In [None]:
from primo.post_optim import add_monotone_constraint

mono_pram = add_monotone_constraint(pram, feature=[1, 5])

In [None]:
pred_test = mono_pram.predict(features_test)
pred_nf = mono_pram.predict(features_nf)

print("Primo Performance: ")
print("PrAM Test MAE: ", mean_absolute_error(pred_test, tags_test))
print("PrAM NF MAE: ", mean_absolute_error(pred_nf, tags_nf), "\n")

tags_nf = list(tags_nf)

print("Performance on Click NFs: ")
print("MazuNAT optimal: ", tags_nf[-4], ", prediction: ", pred_nf[-4])
print("DNSProxy optimal: ", tags_nf[-3], ", prediction: ", pred_nf[-3])
print("UDPCount optimal: ", tags_nf[-2], ", prediction: ", pred_nf[-2])
print("WebGen optimal: ", tags_nf[-1], ", prediction: ", pred_nf[-1])
