In [None]:
import os
import yaml
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report

In [2]:
# PATHS
BASE = "/eagle/projects/argonne_tpc/siebenschuh/domain_shift_data"
EMB_PATH = os.path.join(BASE, "embeddings", "SimSiam_moderate_best_embeddings.npy")
LBL_PATH = os.path.join(BASE, "embeddings", "SimSiam_moderate_best_labels.npy")
DATASETS = ["uniform", "moderate", "heavy"]

# ─── 1) Load embeddings + labels for the SSL subset ────────────────────────────
X_all = np.load(EMB_PATH)
y_all = np.load(LBL_PATH).ravel()

yaml_path = os.path.join(BASE, "datasets", f"MNIST_moderate.yaml")
with open(yaml_path) as f:
    moderate_cfg = yaml.safe_load(f)

train_idx = np.array(moderate_cfg["not_subsampled_indices"], dtype=int)
X_train, y_train = X_all[train_idx], y_all[train_idx]

# ─── 2) Build EXCLUDED index‑set (union of all three splits) ───────────────────
excluded = set()
for ds in DATASETS:
    cfg_path = os.path.join(BASE, "datasets", f"MNIST_{ds}.yaml")
    with open(cfg_path) as f:
        cfg = yaml.safe_load(f)
    excluded.update(cfg["not_subsampled_indices"])
excluded = np.array(sorted(excluded), dtype=int)

# ─── 3) Fetch FULL MNIST (raw) ────────────────────────────────────────────────
mnist = fetch_openml("mnist_784", version=1, as_frame=False)
X_full, y_full = mnist["data"], mnist["target"].astype(int)

# ─── 4) Select a balanced 10k test‑set outside ALL subsets ────────────────────
mask = np.ones(len(y_full), dtype=bool)
mask[excluded] = False

rng = np.random.RandomState(42)
test_indices = []
for cls in range(10):
    candidates = np.where((mask) & (y_full == cls))[0]
    chosen = rng.choice(candidates, size=1000, replace=False)
    test_indices.append(chosen)
test_indices = np.concatenate(test_indices)

y_test = y_full[test_indices]

# ─── 5) LOAD or COMPUTE EMBEDDINGS for TEST set ────────────────────────────────
# If you’ve precomputed embeddings for full MNIST, load them here:
# full_emb = np.load("/path/to/SimSiam_full_embeddings.npy")
# X_test = full_emb[test_indices]

# Otherwise compute via your SSL encoder:
# def encode(images): ...
# X_test = encode(X_full[test_indices])

raise NotImplementedError(
    "Fill in either loading or computing embeddings for the held-out test set"
)

# ─── 6) STANDARDIZE & TRAIN/EVALUATE ─────────────────────────────────────────
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

classifiers = {
    "SVC": SVC(kernel="rbf", probability=True, random_state=42),
    "RandomForest": RandomForestClassifier(n_estimators=200, random_state=42),
    "MLP": MLPClassifier(hidden_layer_sizes=(100,100), max_iter=500, random_state=42),
}

for name, clf in classifiers.items():
    clf.fit(X_train, y_train)
    preds = clf.predict(X_test)
    print(f"\n{name} Accuracy: {accuracy_score(y_test, preds):.4f}")
    print(classification_report(y_test, preds))



SVC Accuracy: 0.9246
              precision    recall  f1-score   support

           0       0.96      0.96      0.96      1185
           1       0.97      0.98      0.98      1348
           2       0.91      0.92      0.92      1192
           3       0.89      0.89      0.89      1226
           4       0.93      0.91      0.92      1168
           5       0.92      0.89      0.90      1084
           6       0.95      0.96      0.95      1184
           7       0.95      0.92      0.94      1253
           8       0.88      0.90      0.89      1170
           9       0.89      0.90      0.89      1190

    accuracy                           0.92     12000
   macro avg       0.92      0.92      0.92     12000
weighted avg       0.92      0.92      0.92     12000


RandomForest Accuracy: 0.8301
              precision    recall  f1-score   support

           0       0.88      0.89      0.88      1185
           1       0.96      0.96      0.96      1348
           2       0.84  

In [7]:
X.shape

(60000, 128)

In [3]:
# 1) Load data
X = np.load('/eagle/projects/argonne_tpc/siebenschuh/domain_shift_data/embeddings/SimSiam_moderate_worst_embeddings.npy')
y = np.load('/eagle/projects/argonne_tpc/siebenschuh/domain_shift_data/embeddings/SimSiam_moderate_worst_labels.npy')

# If labels are shape (n,1), flatten to (n,)
y = y.ravel()

# 2) Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# 3) Standardize features (important for SVC + MLP)
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test  = scaler.transform(X_test)

# 4) Define classifiers
classifiers = {
    "SVC": SVC(kernel='rbf', probability=True, random_state=42),
    "RandomForest": RandomForestClassifier(n_estimators=200, random_state=42),
    "MLP": MLPClassifier(hidden_layer_sizes=(100,100), max_iter=500, random_state=42)
}

# 5) Train + evaluate
for name, clf in classifiers.items():
    clf.fit(X_train, y_train)
    preds = clf.predict(X_test)
    acc = accuracy_score(y_test, preds)
    print(f"\n{name} Accuracy: {acc:.4f}")
    print(classification_report(y_test, preds))



SVC Accuracy: 0.9096
              precision    recall  f1-score   support

           0       0.96      0.97      0.96      1185
           1       0.96      0.97      0.97      1348
           2       0.91      0.92      0.92      1192
           3       0.88      0.89      0.89      1226
           4       0.93      0.91      0.92      1168
           5       0.88      0.85      0.86      1084
           6       0.96      0.94      0.95      1184
           7       0.93      0.89      0.91      1253
           8       0.86      0.87      0.86      1170
           9       0.82      0.89      0.85      1190

    accuracy                           0.91     12000
   macro avg       0.91      0.91      0.91     12000
weighted avg       0.91      0.91      0.91     12000


RandomForest Accuracy: 0.8393
              precision    recall  f1-score   support

           0       0.88      0.91      0.89      1185
           1       0.95      0.96      0.95      1348
           2       0.83  

In [4]:
# 1) Load data
X = np.load('/eagle/projects/argonne_tpc/siebenschuh/domain_shift_data/embeddings/SimCLR_heavy_best_embeddings.npy')
y = np.load('/eagle/projects/argonne_tpc/siebenschuh/domain_shift_data/embeddings/SimCLR_heavy_best_labels.npy')

# If labels are shape (n,1), flatten to (n,)
y = y.ravel()

# 2) Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# 3) Standardize features (important for SVC + MLP)
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test  = scaler.transform(X_test)

# 4) Define classifiers
classifiers = {
    "SVC": SVC(kernel='rbf', probability=True, random_state=42),
    "RandomForest": RandomForestClassifier(n_estimators=200, random_state=42),
    "MLP": MLPClassifier(hidden_layer_sizes=(100,100), max_iter=500, random_state=42)
}

# 5) Train + evaluate
for name, clf in classifiers.items():
    clf.fit(X_train, y_train)
    preds = clf.predict(X_test)
    acc = accuracy_score(y_test, preds)
    print(f"\n{name} Accuracy: {acc:.4f}")
    print(classification_report(y_test, preds))



SVC Accuracy: 0.9574
              precision    recall  f1-score   support

           0       0.98      0.99      0.99      1185
           1       0.99      0.99      0.99      1348
           2       0.94      0.94      0.94      1192
           3       0.91      0.93      0.92      1226
           4       0.98      0.99      0.99      1168
           5       0.96      0.90      0.93      1084
           6       0.96      0.96      0.96      1184
           7       0.97      0.96      0.96      1253
           8       0.93      0.95      0.94      1170
           9       0.95      0.94      0.95      1190

    accuracy                           0.96     12000
   macro avg       0.96      0.96      0.96     12000
weighted avg       0.96      0.96      0.96     12000


RandomForest Accuracy: 0.9114
              precision    recall  f1-score   support

           0       0.97      0.98      0.98      1185
           1       0.99      0.99      0.99      1348
           2       0.90  

In [5]:
# 1) Load data
X = np.load('/eagle/projects/argonne_tpc/siebenschuh/domain_shift_data/embeddings/BYOL_heavy_best_embeddings.npy')
y = np.load('/eagle/projects/argonne_tpc/siebenschuh/domain_shift_data/embeddings/BYOL_heavy_best_labels.npy')

# If labels are shape (n,1), flatten to (n,)
y = y.ravel()

# 2) Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# 3) Standardize features (important for SVC + MLP)
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test  = scaler.transform(X_test)

# 4) Define classifiers
classifiers = {
    "SVC": SVC(kernel='rbf', probability=True, random_state=42),
    "RandomForest": RandomForestClassifier(n_estimators=200, random_state=42),
    "MLP": MLPClassifier(hidden_layer_sizes=(100,100), max_iter=500, random_state=42)
}

# 5) Train + evaluate
for name, clf in classifiers.items():
    clf.fit(X_train, y_train)
    preds = clf.predict(X_test)
    acc = accuracy_score(y_test, preds)
    print(f"\n{name} Accuracy: {acc:.4f}")
    print(classification_report(y_test, preds))



SVC Accuracy: 0.9597
              precision    recall  f1-score   support

           0       0.98      0.98      0.98      1185
           1       0.98      0.98      0.98      1348
           2       0.96      0.96      0.96      1192
           3       0.95      0.94      0.94      1226
           4       0.96      0.95      0.95      1168
           5       0.96      0.94      0.95      1084
           6       0.97      0.98      0.97      1184
           7       0.97      0.97      0.97      1253
           8       0.91      0.95      0.93      1170
           9       0.95      0.94      0.94      1190

    accuracy                           0.96     12000
   macro avg       0.96      0.96      0.96     12000
weighted avg       0.96      0.96      0.96     12000


RandomForest Accuracy: 0.8909
              precision    recall  f1-score   support

           0       0.93      0.93      0.93      1185
           1       0.96      0.97      0.97      1348
           2       0.90  

In [6]:
X = np.load('/eagle/projects/argonne_tpc/siebenschuh/domain_shift_data/embeddings/BYOL_uniform_best_embeddings.npy')
y = np.load('/eagle/projects/argonne_tpc/siebenschuh/domain_shift_data/embeddings/BYOL_uniform_best_labels.npy')

# If labels are shape (n,1), flatten to (n,)
y = y.ravel()

# 2) Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# 3) Standardize features (important for SVC + MLP)
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test  = scaler.transform(X_test)

# 4) Define classifiers
classifiers = {
    "SVC": SVC(kernel='rbf', probability=True, random_state=42),
    "RandomForest": RandomForestClassifier(n_estimators=200, random_state=42),
    "MLP": MLPClassifier(hidden_layer_sizes=(100,100), max_iter=500, random_state=42)
}

# 5) Train + evaluate
for name, clf in classifiers.items():
    clf.fit(X_train, y_train)
    preds = clf.predict(X_test)
    acc = accuracy_score(y_test, preds)
    print(f"\n{name} Accuracy: {acc:.4f}")
    print(classification_report(y_test, preds))



SVC Accuracy: 0.9685
              precision    recall  f1-score   support

           0       0.98      0.99      0.99      1185
           1       0.99      0.99      0.99      1348
           2       0.96      0.97      0.96      1192
           3       0.96      0.96      0.96      1226
           4       0.98      0.96      0.97      1168
           5       0.97      0.96      0.96      1084
           6       0.98      0.98      0.98      1184
           7       0.97      0.96      0.97      1253
           8       0.95      0.96      0.95      1170
           9       0.94      0.96      0.95      1190

    accuracy                           0.97     12000
   macro avg       0.97      0.97      0.97     12000
weighted avg       0.97      0.97      0.97     12000


RandomForest Accuracy: 0.9068
              precision    recall  f1-score   support

           0       0.92      0.96      0.94      1185
           1       0.96      0.97      0.97      1348
           2       0.90  