In [1]:
import pandas as pd
import numpy as np
import os
import branched_resnet_v2 as br
from transformers import Trainer, TrainingArguments, set_seed
import datetime
from safetensors.torch import load_file

import torch

print(torch.__version__)

# Set device and verify CUDA availability
print("CUDA available:", torch.cuda.is_available())
print("CUDA version:", torch.version.cuda)
print("Device count:", torch.cuda.device_count())
print("Current device:", torch.cuda.current_device())
print("Device name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "None")

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')


2.6.0+cu118
CUDA available: True
CUDA version: 11.8
Device count: 1
Current device: 0
Device name: NVIDIA GeForce RTX 4070
Using device: cuda


In [2]:
D21_MODELS = ["data\D21_cv_results\parabolic_increasing_lambda_scheduler_fold_0_final_model_2025-09-26_01-45-15\model.safetensors",
             "data\D21_cv_results\parabolic_increasing_lambda_scheduler_fold_1_final_model_2025-09-26_01-45-15\model.safetensors",
             "data\D21_cv_results\parabolic_increasing_lambda_scheduler_fold_2_final_model_2025-09-26_01-45-15\model.safetensors",
             "data\D21_cv_results\parabolic_increasing_lambda_scheduler_fold_3_final_model_2025-09-26_01-45-15\model.safetensors",
             "data\D21_cv_results\parabolic_increasing_lambda_scheduler_fold_4_final_model_2025-09-26_21-33-12\model.safetensors"]

D20_MODELS = ["data\D20_cv_results\parabolic_increasing_lambda_scheduler_fold_0_final_model_2025-09-27_02-25-33\model.safetensors",
              "data\D20_cv_results\parabolic_increasing_lambda_scheduler_fold_1_final_model_2025-09-27_02-25-33\model.safetensors",
              "data\D20_cv_results\parabolic_increasing_lambda_scheduler_fold_2_final_model_2025-09-27_02-25-33\model.safetensors",
              "data\D20_cv_results\parabolic_increasing_lambda_scheduler_fold_3_final_model_2025-09-27_02-25-33\model.safetensors",
              "data\D20_cv_results\parabolic_increasing_lambda_scheduler_fold_4_final_model_2025-09-27_02-25-33\model.safetensors"]

In [3]:
metrics_df = pd.DataFrame(columns=['eval_loss', 'eval_accuracy_branch1', 'eval_accuracy_branch2', 'eval_f1_branch1', 'eval_f1_branch2', 'eval_precision_branch1', 'eval_precision_branch2', 'eval_recall_branch1', 'eval_recall_branch2'])

def build_row(results, dataset_name):
    row = {'Dataset': dataset_name,
           'eval_loss': None,
           'eval_accuracy_branch1': None,
           'eval_accuracy_branch2': None,
           'eval_f1_branch1': None,
           'eval_f1_branch2': None,
           'eval_precision_branch1': None,
           'eval_precision_branch2': None,
           'eval_recall_branch1': None,
           'eval_recall_branch2': None}
    row.update(results)
    return row

In [4]:
# load test datasets

test_ds1 = br.dataset_load('data/test/preprocessed_undistorted_test_v2.npz')
test_ds2 = br.dataset_load('data/test/preprocessed_all_distortions_test_v2.npz')
test_ds3 = br.dataset_load('data/test/preprocessed_ring_artifact_test_v2.npz')
test_ds4 = br.dataset_load('data/test/preprocessed_rotate_test_v2.npz')
test_ds5 = br.dataset_load('data/test/preprocessed_uniform_rotate_test_v2.npz')
test_ds6 = br.dataset_load('data/test/preprocessed_uniform_test_v2.npz')

test_names = ['undistorted', 'all_distortions', 'ring_artifact', 'rotate', 'uniform_rotate', 'uniform']

In [5]:
# from safetensors.torch import load_file

# fold = 1
# for model_path in D21_MODELS:
#     print(f"Evaluating D21 model from fold {fold}: {model_path}")
#     fold += 1

#     state_dict = load_file(model_path, device="cpu")

#     config = br.ResNetConfig()
#     model = br.ResNetForMultiLabel(config, num_d1_classes=11, num_d2_classes=2)
#     model.load_state_dict(state_dict)
#     model.eval()

#     with torch.no_grad():
#         outputs = model(test_ds1)
#         results = br.compute_metrics(outputs, test_ds1)
#         metrics_df = metrics_df.append(build_row(results, 'test_ds1'), ignore_index=True)

#     with torch.no_grad():
#         outputs = model(test_ds2)
#         results = br.compute_metrics(outputs, test_ds2)
#         metrics_df = metrics_df.append(build_row(results, 'test_ds2'), ignore_index=True)

#     with torch.no_grad():
#         outputs = model(test_ds3)
#         results = br.compute_metrics(outputs, test_ds3)
#         metrics_df = metrics_df.append(build_row(results, 'test_ds3'), ignore_index=True)

#     with torch.no_grad():
#         outputs = model(test_ds4)
#         results = br.compute_metrics(outputs, test_ds4)
#         metrics_df = metrics_df.append(build_row(results, 'test_ds4'), ignore_index=True)

#     with torch.no_grad():
#         outputs = model(test_ds5)
#         results = br.compute_metrics(outputs, test_ds5)
#         metrics_df = metrics_df.append(build_row(results, 'test_ds5'), ignore_index=True)

#     with torch.no_grad():
#         outputs = model(test_ds6)
#         results = br.compute_metrics(outputs, test_ds6)
#         metrics_df = metrics_df.append(build_row(results, 'test_ds6'), ignore_index=True)

# # Save metrics to CSV
# metrics_df.to_csv('data/D21_cv_results/D21_cv_test_metrics.csv', index=False)

In [6]:
def parabolic_increasing_lambda_scheduler(epoch, total_epochs, start_value=0.0, end_value=1.0):
    progress = epoch / total_epochs
    return start_value + (end_value - start_value) * (progress ** 2)

In [7]:
from safetensors.torch import load_file
from transformers import Trainer, TrainingArguments

lambda_scheduler = parabolic_increasing_lambda_scheduler
NUM_EPOCHS = 50
fold = 1
for model_path in D21_MODELS:
    print(f"Evaluating D21 model from fold {fold}: {model_path}")
    fold += 1

    # Load safetensors weights
    state_dict = load_file(model_path)

    # Rebuild model and load weights
    config = br.ResNetConfig()
    model = br.ResNetForMultiLabel(config, num_d1_classes=11, num_d2_classes=2)
    model.load_state_dict(state_dict)
    model.eval()

    # TrainingArguments (we only care about eval here)
    training_args = TrainingArguments(
        output_dir=f"./results/fold_{fold}",
        per_device_eval_batch_size=32,
        dataloader_drop_last=False,
        report_to="none",   # don’t log to W&B unless you want to
    )

    # Wrap with Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        tokenizer=None,  # if you need preprocessing, pass tokenizer
        compute_metrics=br.make_metrics_fn(model),
        callbacks=[br.LambdaUpdateCallback(model, lambda_scheduler, NUM_EPOCHS)]
    )

    # Evaluate on each test dataset
    for i, dataset in enumerate([test_ds1, test_ds2, test_ds3, test_ds4, test_ds5, test_ds6]):
        print(f"Evaluating on test dataset {i+1}")
        results = trainer.evaluate(eval_dataset=dataset)
        # Store results in your DataFrame
        row = build_row(results, f'Fold_{fold}_{test_names[i]}')
        metrics_df = pd.concat([metrics_df, pd.DataFrame([row])], ignore_index=True)

# Save metrics to CSV
metrics_df.to_csv("data/D21_cv_results/D21_cv_test_metrics_v2.csv", index=False)


Evaluating D21 model from fold 1: data\D21_cv_results\parabolic_increasing_lambda_scheduler_fold_0_final_model_2025-09-26_01-45-15\model.safetensors


  trainer = Trainer(


Evaluating on test dataset 1


Evaluating on test dataset 2


  metrics_df = pd.concat([metrics_df, pd.DataFrame([row])], ignore_index=True)


Evaluating on test dataset 3
Evaluating on test dataset 4
Evaluating on test dataset 5
Evaluating on test dataset 6
Evaluating D21 model from fold 2: data\D21_cv_results\parabolic_increasing_lambda_scheduler_fold_1_final_model_2025-09-26_01-45-15\model.safetensors
Evaluating on test dataset 1


  trainer = Trainer(


Evaluating on test dataset 2
Evaluating on test dataset 3
Evaluating on test dataset 4
Evaluating on test dataset 5
Evaluating on test dataset 6
Evaluating D21 model from fold 3: data\D21_cv_results\parabolic_increasing_lambda_scheduler_fold_2_final_model_2025-09-26_01-45-15\model.safetensors
Evaluating on test dataset 1


  trainer = Trainer(


Evaluating on test dataset 2
Evaluating on test dataset 3
Evaluating on test dataset 4
Evaluating on test dataset 5
Evaluating on test dataset 6
Evaluating D21 model from fold 4: data\D21_cv_results\parabolic_increasing_lambda_scheduler_fold_3_final_model_2025-09-26_01-45-15\model.safetensors
Evaluating on test dataset 1


  trainer = Trainer(


Evaluating on test dataset 2
Evaluating on test dataset 3
Evaluating on test dataset 4
Evaluating on test dataset 5
Evaluating on test dataset 6
Evaluating D21 model from fold 5: data\D21_cv_results\parabolic_increasing_lambda_scheduler_fold_4_final_model_2025-09-26_21-33-12\model.safetensors
Evaluating on test dataset 1


  trainer = Trainer(


Evaluating on test dataset 2
Evaluating on test dataset 3
Evaluating on test dataset 4
Evaluating on test dataset 5
Evaluating on test dataset 6


In [8]:
metrics_df = None
metrics_df = pd.DataFrame(columns=['eval_loss', 'eval_accuracy_branch1', 'eval_accuracy_branch2', 'eval_f1_branch1', 'eval_f1_branch2', 'eval_precision_branch1', 'eval_precision_branch2', 'eval_recall_branch1', 'eval_recall_branch2'])
fold = 1
for model_path in D20_MODELS:
    print(f"Evaluating D20 model from fold {fold}: {model_path}")
    fold += 1

    # Load safetensors weights
    state_dict = load_file(model_path)

    # Rebuild model and load weights
    config = br.ResNetConfig()
    model = br.ResNetForMultiLabel(config, num_d1_classes=11, num_d2_classes=2)
    model.load_state_dict(state_dict)
    model.eval()

    # TrainingArguments (we only care about eval here)
    training_args = TrainingArguments(
        output_dir=f"./results/fold_{fold}",
        per_device_eval_batch_size=32,
        dataloader_drop_last=False,
        report_to="none",   # don’t log to W&B unless you want to
    )

    # Wrap with Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        tokenizer=None,  # if you need preprocessing, pass tokenizer
        compute_metrics=br.make_metrics_fn(model),
        callbacks=[br.LambdaUpdateCallback(model, lambda_scheduler, NUM_EPOCHS)]
    )

    # Evaluate on each test dataset
    for i, dataset in enumerate([test_ds1, test_ds2, test_ds3, test_ds4, test_ds5, test_ds6]):
        results = trainer.evaluate(eval_dataset=dataset)
        # Store results in your DataFrame
        row = build_row(results, f'Fold_{fold}_{test_names[i]}')
        metrics_df = pd.concat([metrics_df, pd.DataFrame([row])], ignore_index=True)

# Save metrics to CSV
metrics_df.to_csv("data/D20_cv_results/D20_cv_test_metrics_v2.csv", index=False)


Evaluating D20 model from fold 1: data\D20_cv_results\parabolic_increasing_lambda_scheduler_fold_0_final_model_2025-09-27_02-25-33\model.safetensors


  trainer = Trainer(


  metrics_df = pd.concat([metrics_df, pd.DataFrame([row])], ignore_index=True)


Evaluating D20 model from fold 2: data\D20_cv_results\parabolic_increasing_lambda_scheduler_fold_1_final_model_2025-09-27_02-25-33\model.safetensors


  trainer = Trainer(


Evaluating D20 model from fold 3: data\D20_cv_results\parabolic_increasing_lambda_scheduler_fold_2_final_model_2025-09-27_02-25-33\model.safetensors


  trainer = Trainer(


Evaluating D20 model from fold 4: data\D20_cv_results\parabolic_increasing_lambda_scheduler_fold_3_final_model_2025-09-27_02-25-33\model.safetensors


  trainer = Trainer(


Evaluating D20 model from fold 5: data\D20_cv_results\parabolic_increasing_lambda_scheduler_fold_4_final_model_2025-09-27_02-25-33\model.safetensors


  trainer = Trainer(
