In [4]:
# !pip install scikit-learn

In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, export_text
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt

In [6]:
# 1. Load Data + Train Models
# CSV format: label,CPI,Math_Intensity,Stall_Ratio,System_BW_Proxy,Branch_MPKI,GFLOPS_Approx,Clock_Ratio

datasets = [
    {
        "name": "AMDzen4c_edp",
        "csv_path": "../scripts/AMDzen4c/csv/training_dataset_dvfs_edp_labels.csv",
        "model_out": "../dvfs/model_amdzen4c_edp.c",
    },
    {
        "name": "AMDzen4c_energy",
        "csv_path": "../scripts/AMDzen4c/csv/training_dataset_dvfs_energy_labels.csv",
        "model_out": "../dvfs/model_amdzen4c_energy.c",
    },
    {
        "name": "IntelSPR_edp",
        "csv_path": "../scripts/IntelSPR/csv/training_dataset_dvfs_edp_labels.csv",
        "model_out": "../dvfs/model_intelspr_edp.c",
    },
    {
        "name": "IntelSPR_energy",
        "csv_path": "../scripts/IntelSPR/csv/training_dataset_dvfs_energy_labels.csv",
        "model_out": "../dvfs/model_intelspr_energy.c",
    },
]

# 2. Feature Selection
features = [
    'CPI',
    'Math_Intensity',
    'Stall_Ratio',
    'System_BW_Proxy',
    'Branch_MPKI',
    'GFLOPS_Approx',
    'Clock_Ratio',
]

trained_models = {}

for ds in datasets:
    print(f"=== Training {ds['name']} ===")

    df = pd.read_csv(ds["csv_path"])

    # Data Cleaning: remove NaN or infinite values (common in Likwid startup/shutdown)
    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    df.dropna(inplace=True)

    X = df[features]
    y = df['label']

    # Split Data (Stratified)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )

    # Model Training (Decision Tree)
    clf = DecisionTreeClassifier(max_depth=5, random_state=42, class_weight='balanced')
    clf.fit(X_train, y_train)

    # Evaluation
    print("=== Classification Report ===")
    y_pred = clf.predict(X_test)
    print(classification_report(y_test, y_pred))

    print("=== Confusion Matrix ===")
    print(confusion_matrix(y_test, y_pred))

    # Feature Importance Analysis
    importances = pd.DataFrame({'feature': features, 'importance': clf.feature_importances_})
    print("=== Feature Importances ===")
    print(importances.sort_values(by='importance', ascending=False))

    # Export readable rules
    print("=== C-Code Logic (Copy this to your Daemon) ===")
    tree_rules = export_text(clf, feature_names=features)
    print(tree_rules)

    trained_models[ds['name']] = {
        "clf": clf,
        "features": features,
        "model_out": ds["model_out"],
    }


=== Training AMDzen4c_edp ===
=== Classification Report ===
              precision    recall  f1-score   support

    HighFreq       1.00      0.99      0.99      2880
     LowFreq       0.97      1.00      0.99      1300
     MedFreq       1.00      0.99      1.00       125

    accuracy                           0.99      4305
   macro avg       0.99      0.99      0.99      4305
weighted avg       0.99      0.99      0.99      4305

=== Confusion Matrix ===
[[2846   34    0]
 [   0 1300    0]
 [   0    1  124]]
=== Feature Importances ===
           feature  importance
2      Stall_Ratio    0.465550
4      Branch_MPKI    0.387387
1   Math_Intensity    0.084758
3  System_BW_Proxy    0.062305
0              CPI    0.000000
5    GFLOPS_Approx    0.000000
6      Clock_Ratio    0.000000
=== C-Code Logic (Copy this to your Daemon) ===
|--- Stall_Ratio <= 0.14
|   |--- Branch_MPKI <= 0.06
|   |   |--- Math_Intensity <= 1.12
|   |   |   |--- System_BW_Proxy <= 0.00
|   |   |   |   |--- cla

In [7]:
from sklearn.tree import _tree
import numpy as np

def generate_model_c_file(clf, feature_names, filename="../dvfs/model.c"):
    tree_ = clf.tree_

    # Mapping: Ensure these keys match your training labels exactly!
    label_to_enum = {
        "LowFreq":  "FREQ_LEVEL_LOW",
        "MedFreq":  "FREQ_LEVEL_MED",
        "HighFreq": "FREQ_LEVEL_HIGH"
    }

    model_classes = list(clf.classes_)

    with open(filename, "w") as f:
        # Write Includes
        f.write("/* Auto-generated by Python ML Exporter */")
        f.write("#include \"model.h\"\n")
        f.write("#include <stdlib.h>\n\n")

        # Write Function Signature
        f.write("FreqLevel predict_phase_level(double CPI, double Math_Intensity, double Stall_Ratio, double System_BW_Proxy, double Branch_MPKI, double GFLOPS_Approx, double Clock_Ratio) {\n")

        # Recursive Tree Walker
        def recurse(node, depth):
            indent = "    " * depth
            if tree_.feature[node] != _tree.TREE_UNDEFINED:
                # Decision Node
                feat_name = feature_names[tree_.feature[node]]
                threshold = tree_.threshold[node]
                f.write(f"{indent}if ({feat_name} <= {threshold:.6f}) {{\n")
                recurse(tree_.children_left[node], depth + 1)
                f.write(f"{indent}}} else {{\n")
                recurse(tree_.children_right[node], depth + 1)
                f.write(f"{indent}}}\n")
            else:
                # Leaf Node
                class_idx = np.argmax(tree_.value[node])
                label_str = model_classes[class_idx]
                enum_val = label_to_enum.get(label_str, "FREQ_LEVEL_HIGH") # Default fallback

                f.write(f"{indent}// Prediction: {label_str}\n")
                f.write(f"{indent}return {enum_val};\n")

        # Start Recursion
        recurse(0, 1)

        # Close Function
        f.write("}\n")

    print(f"Successfully generated {filename}")

# Generate a model file for each training dataset
for name, info in trained_models.items():
    print(f"\n=== Exporting {name} ===")
    generate_model_c_file(info["clf"], info["features"], filename=info["model_out"])



=== Exporting AMDzen4c_edp ===
Successfully generated ../dvfs/model_amdzen4c_edp.c

=== Exporting AMDzen4c_energy ===
Successfully generated ../dvfs/model_amdzen4c_energy.c

=== Exporting IntelSPR_edp ===
Successfully generated ../dvfs/model_intelspr_edp.c

=== Exporting IntelSPR_energy ===
Successfully generated ../dvfs/model_intelspr_energy.c
