In [5]:
import numpy as np
import pandas as pd
from scipy.stats import norm
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, export_text

# from scipy.stats import t

# Configuration: replace with your actual file paths
csv1_path = "sklinear_tc_fc_htorch_allcuda.csv"
csv2_path = "sklinear_tc_fc_htorch_htorch.csv"
feature_cols = ["in_features", "out_features", "num_terms", "low_rank"]
max_depth = 3
random_state = 42

speedup_threshold = 0.10  # ±10% relative difference
max_std = None  # e.g., 5.0 to drop runs with >5ms std

# Confidence-based z-threshold
# Choose a confidence level for requiring performance differences:
confidence_level = 0.95  # e.g., 0.95 for 95% confidence
# For normally distributed differences, z = norm.ppf(1 - alpha/2)
alpha = 1 - confidence_level
z_thresh = norm.ppf(1 - alpha / 2)
# Typical z-thresholds: 68%→1.0, 90%→1.645, 95%→1.96, 99%→2.576
print(f"Using z_threshold = {z_thresh:.3f} for {confidence_level*100:.0f}% confidence.")

# If sample sizes (n_runs) are known and small (<30), consider using Student's t:
# df = n_runs - 1; t_threshold = t.ppf(1 - alpha/2, df)

# --- Load and clean ---
df1 = pd.read_csv(csv1_path).rename(
    columns={"forward_mean_ms": "time_model1", "forward_std_ms": "std_model1"}
)
df2 = pd.read_csv(csv2_path).rename(
    columns={"forward_mean_ms": "time_model2", "forward_std_ms": "std_model2"}
)

# Drop missing values
df1 = df1.dropna(subset=["time_model1", "std_model1"])
df2 = df2.dropna(subset=["time_model2", "std_model2"])

# Optionally filter out high-variance runs if max_std is set
if max_std is not None:
    df1 = df1[df1["std_model1"] <= max_std]
    df2 = df2[df2["std_model2"] <= max_std]

# Merge on feature columns
df = pd.merge(df1, df2, on=feature_cols, how="inner")
print(f"Merged dataset has {len(df)} records.")

# Compute combined standard deviation
num_runs = 200
df["combined_std"] = np.sqrt((df["std_model1"] ** 2 + df["std_model2"] ** 2) / num_runs)

# --- Define runtime categories (0=model1 faster, 1=similar, 2=model2 faster) ---
lower = (1 - speedup_threshold) * df["time_model2"]
upper = (1 + speedup_threshold) * df["time_model2"]
diff = df["time_model1"] - df["time_model2"]

# Apply both percentage and confidence criteria
cond_fast1 = (df["time_model1"] < lower) & (diff <= -z_thresh * df["combined_std"])
cond_fast2 = (df["time_model1"] > upper) & (diff >= z_thresh * df["combined_std"])
conditions = [cond_fast1, cond_fast2]
choices = [0, 2]
df["runtime_category"] = np.select(conditions, choices, default=1)

# Display distribution
dist = df["runtime_category"].map(
    {0: "model1 faster", 1: "similar", 2: "model2 faster"}
)
print("Category distribution:\n", dist.value_counts())

# --- Feature preparation ---
X = df[feature_cols]
y = df["runtime_category"]

# --- Train multi-class decision tree ---
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=random_state, stratify=y
)
clf = DecisionTreeClassifier(max_depth=max_depth, random_state=random_state)
clf.fit(X_train, y_train)

# Evaluate
print(f"Training accuracy: {clf.score(X_train,y_train):.3f}")
print(f"Test accuracy: {clf.score(X_test,y_test):.3f}")

# Print rules
tree_rules = export_text(clf, feature_names=feature_cols)
print("\nDecision rules (0=model1 faster, 1=similar, 2=model2 faster):\n")
print(tree_rules)

# Feature importances
importances = pd.Series(clf.feature_importances_, index=feature_cols)
print("\nFeature importances:")
print(importances.sort_values(ascending=False))

# --- Considerations for 'similar' category ---
# - Exclude 'similar' runs for binary analysis.
# - Tune `confidence_level` to adjust sensitivity.
# - Use memory as another feature for better classification.
# - If n_runs per record available, use Student's t for small-sample corrections.

Using z_threshold = 1.960 for 95% confidence.
Merged dataset has 575 records.
Category distribution:
 runtime_category
similar          324
model2 faster    175
model1 faster     76
Name: count, dtype: int64
Training accuracy: 0.846
Test accuracy: 0.835

Decision rules (0=model1 faster, 1=similar, 2=model2 faster):

|--- num_terms <= 1.50
|   |--- in_features <= 4608.00
|   |   |--- out_features <= 24576.00
|   |   |   |--- class: 0
|   |   |--- out_features >  24576.00
|   |   |   |--- class: 1
|   |--- in_features >  4608.00
|   |   |--- in_features <= 12288.00
|   |   |   |--- class: 2
|   |   |--- in_features >  12288.00
|   |   |   |--- class: 2
|--- num_terms >  1.50
|   |--- in_features <= 12288.00
|   |   |--- low_rank <= 96.00
|   |   |   |--- class: 1
|   |   |--- low_rank >  96.00
|   |   |   |--- class: 2
|   |--- in_features >  12288.00
|   |   |--- out_features <= 4608.00
|   |   |   |--- class: 1
|   |   |--- out_features >  4608.00
|   |   |   |--- class: 1


Feature im