In [18]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import pandas as pd
import os
from tqdm import tqdm
import torch

In [None]:
# config
top_k = 5
all_models_path = './output_ensemble'
eval_set = ['life', 'bourne']
output_path = 'ensenmble_results/'

In [20]:
all_models = os.listdir(all_models_path)
print(f"Found {len(all_models)} models in {all_models_path}")
finished_all_models = []
def load_all_accuracies():
    accuracies = []
    for model in tqdm(all_models):
        ckpt_path = os.path.join(all_models_path, model, 'ckpt.pt')

        if not os.path.exists(ckpt_path):
            print(f"Checkpoint not found for model {model}, skipping.")
            continue
        
        try:
            ckpt = torch.load(ckpt_path, map_location='cpu', weights_only=False)
            all_acc_per_subject = []
            for i in [1,2,3,5]:
                mean_acc = 0
                for eval_movie in eval_set:
                    acc_cmovie = ckpt['metrics']['val_' + eval_movie][f'accmap_sub-{i}']
                    mean_acc = mean_acc + acc_cmovie
                all_acc_per_subject.append(mean_acc/len(eval_set))
            
            all_acc_per_subject = np.stack(all_acc_per_subject, axis=0)
            # print(all_acc_per_subject.shape)
            accuracies.append(all_acc_per_subject)
            finished_all_models.append(model)
        
        except Exception as e:
            print(f"Error loading checkpoint for model {model}: {e}")
            continue

    return np.stack(accuracies, axis=0)


accuracies_per_parcel = load_all_accuracies()
all_models = finished_all_models

Found 53 models in /home/cesar/algo_sub/algonauts2025/output_2


100%|██████████| 53/53 [00:00<00:00, 369.35it/s]

Checkpoint not found for model feature_encoding_lr0.001_weight_decay0.1_encoder_kernel_size11_num_samples4000_sample_length32_batch_size32_epochs15_embed_dim128_transformer_depth4_pool_num_heads3_layers.20_layers.30, skipping.
Checkpoint not found for model feature_encoding_lr0.0001_weight_decay0.1_encoder_kernel_size33_num_samples4000_sample_length64_batch_size32_epochs10_embed_dim128_transformer_depth4_pool_num_heads2_layers.20_layers.20, skipping.
Checkpoint not found for model feature_encoding_lr0.001_weight_decay0.1_encoder_kernel_size11_num_samples4000_sample_length32_batch_size32_epochs15_embed_dim128_transformer_depth4_pool_num_heads3_layers.30, skipping.
Checkpoint not found for model feature_encoding_lr0.0001_weight_decay0.1_encoder_kernel_size11_num_samples4000_sample_length64_batch_size16_epochs15_embed_dim128_transformer_depth4_pool_num_heads3_layers.11_layers.12_layers.20_layers.20_encoder.layernorm_avg, skipping.





In [21]:
# find the model with the best accuracy for the average of subjects
acurracies_per_subject = np.mean(accuracies_per_parcel, axis=-1)
acurracies_per_model = np.mean(acurracies_per_subject, axis=-1)
best_model_index = np.argmax(acurracies_per_model)

print(f"Best model is {all_models[best_model_index]} with accuracy {acurracies_per_subject[best_model_index]}, mean: {acurracies_per_model[best_model_index]}")

Best model is feature_encoding_lr0.001_weight_decay0.3_encoder_kernel_size45_num_samples1000_sample_length128_batch_size32_epochs20_embed_dim256_transformer_depth6_pool_num_heads4_layers.11_layers.12_layers.20_layers.20_layers.20_encoder.layernorm_avg with accuracy [0.2181748  0.19460419 0.23472248 0.1770398 ], mean: 0.2061353176832199


In [22]:
num_models, num_subjects, num_parcels = accuracies_per_parcel.shape

In [23]:
ensemble_results = []

# Loop through each subject
for subject_id in range(num_subjects):
    # Loop through each parcel for the current subject
    for parcel_id in range(num_parcels):
        # Extract the accuracy scores for the current subject and parcel across all models
        model_accuracies = accuracies_per_parcel[:, subject_id, parcel_id]

        # Get the indices of the top-k models based on accuracy.
        # `np.argsort` returns the indices that would sort an array.
        # Slicing with `[-top_k:]` gives the indices of the top k highest values.
        top_model_indices = np.argsort(model_accuracies)[-top_k:]

        # Retrieve the names of the top k models using the indices
        top_model_names = [all_models[i] for i in top_model_indices]

        # Get the accuracy values of the top k models
        top_model_accuracies = model_accuracies[top_model_indices]

        # Calculate the expected performance by averaging the top k accuracies
        expected_performance = np.mean(top_model_accuracies)

        # Store the results
        ensemble_results.append({
            "subject_id": subject_id,
            "parcel_id": parcel_id,
            "top_models": top_model_names,
            "expected_performance": expected_performance,
        })


In [24]:
# replace subject_id with the actual subject number in the results
subject_map = {
    0: 1,  # Assuming subject IDs start from 1
    1: 2,
    2: 3,
    3: 5
}
for result in ensemble_results:
    result['subject_id'] = subject_map.get(result['subject_id'], result['subject_id'])
# calculate the expected_performance for each subject
expected_performance_per_subject = {}
for result in ensemble_results:
    subject_id = result['subject_id']
    if subject_id not in expected_performance_per_subject:
        expected_performance_per_subject[subject_id] = []
    expected_performance_per_subject[subject_id].append(result['expected_performance'])
# Average expected performance across parcels for each subject
average_expected_performance = {subject_id: np.mean(perfs) for subject_id, perfs in expected_performance_per_subject.items()}
print("\n--- Average Expected Performance per Subject ---")
for subject_id, avg_perf in average_expected_performance.items():
    print(f"Subject {subject_id}: {avg_perf:.4f}")
print(" Final average expected performance across all subjects: ", np.mean(list(average_expected_performance.values())))


--- Average Expected Performance per Subject ---
Subject 1: 0.2248
Subject 2: 0.2024
Subject 3: 0.2420
Subject 5: 0.1836
 Final average expected performance across all subjects:  0.21318185


In [8]:
ensemble_results

[{'subject_id': 1,
  'parcel_id': 0,
  'top_models': ['feature_encoding_lr0.001_weight_decay0.3_encoder_kernel_size45_num_samples1000_sample_length128_batch_size32_epochs20_embed_dim256_transformer_depth6_pool_num_heads4_layers.20_encoder.layernorm_avg_layers.12'],
  'expected_performance': 0.23615408},
 {'subject_id': 1,
  'parcel_id': 1,
  'top_models': ['feature_encoding_lr0.0001_weight_decay0.1_encoder_kernel_size45_num_samples1000_sample_length128_batch_size8_epochs15_embed_dim256_transformer_depth4_pool_num_heads2_layers.20_encoder.layernorm_avg_layers.12'],
  'expected_performance': 0.3767295},
 {'subject_id': 1,
  'parcel_id': 2,
  'top_models': ['feature_encoding_lr0.001_weight_decay0.3_encoder_kernel_size45_num_samples1000_sample_length128_batch_size32_epochs20_embed_dim256_transformer_depth6_pool_num_heads4_layers.11_layers.12_layers.20_layers.20_layers.20_encoder.layernorm_avg'],
  'expected_performance': 0.37897623},
 {'subject_id': 1,
  'parcel_id': 3,
  'top_models': ['f

In [9]:
def load_all_model_submissions():
    all_submissions = {}
    for model in tqdm(all_models):
        submission_path = os.path.join(all_models_path, model, 'fmri_predictions_ood.npy')
        if not os.path.exists(submission_path):
            print(f"Submission file not found for model {model}, skipping.")
            continue
        
        try:
            submission = np.load(submission_path, allow_pickle=True).item()
            all_submissions[model] = submission
        except Exception as e:
            print(f"Error loading submission for model {model}: {e}")
            continue

    return all_submissions

all_model_submissions = load_all_model_submissions()

100%|██████████| 49/49 [00:03<00:00, 13.67it/s]


In [10]:
example_submission = np.load(f'output_2/{all_models[-1]}/fmri_predictions_ood.npy', allow_pickle=True).item()

In [11]:
empty_submission = {subject: {} for subject in example_submission.keys()}
for subject, preds in example_submission.items():
    for movie, pred in preds.items():
        empty_submission[subject][movie] = np.zeros_like(pred)

for subject, preds in empty_submission.items():
    for movie, pred in preds.items():
        print(f"Subject: {subject}, Movie: {movie}, Prediction Shape: {pred.shape}")
        assert np.allclose(pred, np.zeros_like(pred)), "Predictions are not initialized to zero."

Subject: sub-01, Movie: chaplin1, Prediction Shape: (432, 1000)
Subject: sub-01, Movie: chaplin2, Prediction Shape: (405, 1000)
Subject: sub-01, Movie: mononoke1, Prediction Shape: (423, 1000)
Subject: sub-01, Movie: mononoke2, Prediction Shape: (426, 1000)
Subject: sub-01, Movie: passepartout1, Prediction Shape: (422, 1000)
Subject: sub-01, Movie: passepartout2, Prediction Shape: (436, 1000)
Subject: sub-01, Movie: planetearth1, Prediction Shape: (433, 1000)
Subject: sub-01, Movie: planetearth2, Prediction Shape: (418, 1000)
Subject: sub-01, Movie: pulpfiction1, Prediction Shape: (468, 1000)
Subject: sub-01, Movie: pulpfiction2, Prediction Shape: (378, 1000)
Subject: sub-01, Movie: wot1, Prediction Shape: (353, 1000)
Subject: sub-01, Movie: wot2, Prediction Shape: (324, 1000)
Subject: sub-02, Movie: chaplin1, Prediction Shape: (432, 1000)
Subject: sub-02, Movie: chaplin2, Prediction Shape: (405, 1000)
Subject: sub-02, Movie: mononoke1, Prediction Shape: (423, 1000)
Subject: sub-02, Mo

In [12]:
empty_submission['sub-01'].keys()

dict_keys(['chaplin1', 'chaplin2', 'mononoke1', 'mononoke2', 'passepartout1', 'passepartout2', 'planetearth1', 'planetearth2', 'pulpfiction1', 'pulpfiction2', 'wot1', 'wot2'])

In [13]:
# Final ensemble predictions
ensemble_predictions = empty_submission.copy()
for subject_id in range(num_subjects):
    for parcel_id in tqdm(range(num_parcels)):
        # Get the top k models for the current subject and parcel
        top_models = [result['top_models'] for result in ensemble_results if result['subject_id'] == subject_map[subject_id] and result['parcel_id'] == parcel_id]
        
        if not top_models:
            print(f"No top models found for subject {subject_id+1}, parcel {parcel_id}. Skipping.")
            continue
        
        top_models = top_models[0]  # Get the first (and only) entry since we looped through all subjects and parcels

        # Average the predictions from the top k models
        predictions = []
        for model in top_models:
            if model in all_model_submissions:
                preds = all_model_submissions[model][f'sub-0{subject_map[subject_id]}']
                predictions.append(preds)
            else:
                print(f"Model {model} not found in submissions, skipping.")

        if predictions:
            for movie in example_submission[f'sub-0{subject_map[subject_id]}'].keys():
                # Average the predictions for the current movie
                avg_prediction = np.mean([preds[movie] for preds in predictions], axis=0)
                ensemble_predictions[f'sub-0{subject_map[subject_id]}'][movie][:, parcel_id] = avg_prediction[:, parcel_id]

100%|██████████| 1000/1000 [00:08<00:00, 111.89it/s]
100%|██████████| 1000/1000 [00:09<00:00, 108.74it/s]
100%|██████████| 1000/1000 [00:08<00:00, 116.73it/s]
100%|██████████| 1000/1000 [00:08<00:00, 115.31it/s]


In [14]:
# check if there are zeros in the ensemble predictions
for subject, preds in ensemble_predictions.items():
    for movie, pred in preds.items():
        if np.any(pred == 0):
            print(f"Found zeros in predictions for {subject}, {movie}.")
            # Count how many zeros are in the predictions
            zero_count = np.sum(pred == 0)
            print(f"Number of zeros in predictions for {subject}, {movie}: {zero_count}")
        else:
            print(f"No zeros in predictions for {subject}, {movie}.")
ensemble_predictions

No zeros in predictions for sub-01, chaplin1.
No zeros in predictions for sub-01, chaplin2.
No zeros in predictions for sub-01, mononoke1.
No zeros in predictions for sub-01, mononoke2.
No zeros in predictions for sub-01, passepartout1.
No zeros in predictions for sub-01, passepartout2.
No zeros in predictions for sub-01, planetearth1.
No zeros in predictions for sub-01, planetearth2.
No zeros in predictions for sub-01, pulpfiction1.
No zeros in predictions for sub-01, pulpfiction2.
No zeros in predictions for sub-01, wot1.
No zeros in predictions for sub-01, wot2.
No zeros in predictions for sub-02, chaplin1.
No zeros in predictions for sub-02, chaplin2.
No zeros in predictions for sub-02, mononoke1.
No zeros in predictions for sub-02, mononoke2.
No zeros in predictions for sub-02, passepartout1.
No zeros in predictions for sub-02, passepartout2.
No zeros in predictions for sub-02, planetearth1.
No zeros in predictions for sub-02, planetearth2.
No zeros in predictions for sub-02, pulp

{'sub-01': {'chaplin1': array([[-0.09750009, -0.1972368 , -0.29033697, ...,  0.03508082,
          -0.06536325, -0.00117031],
         [-0.12326148, -0.2798772 , -0.2781374 , ...,  0.10312617,
          -0.00573307,  0.05987582],
         [-0.13214545, -0.33786988, -0.23733516, ...,  0.05759467,
           0.14454304,  0.12735866],
         ...,
         [ 0.05571396,  0.17526698,  0.3312317 , ...,  0.00092853,
           0.27909514,  0.21525803],
         [-0.11932096, -0.36749566, -0.02338677, ...,  0.07904674,
           0.392294  ,  0.2895323 ],
         [-0.27589047, -0.87652075, -0.28853065, ...,  0.20491427,
           0.5332804 ,  0.41263485]], dtype=float32),
  'chaplin2': array([[-0.02521061, -0.11490208, -0.10303237, ..., -0.01441721,
          -0.03883632, -0.01306938],
         [-0.02931923, -0.19078416, -0.20218645, ..., -0.01194705,
          -0.03531433, -0.01653899],
         [-0.10861367, -0.39401174, -0.3144456 , ...,  0.004867  ,
           0.18365416,  0.11503664],

In [15]:
import time
if not os.path.exists(output_path):
    os.makedirs(output_path)
# Save the ensemble predictions
output_file_name = f'new_fmri_predictions_ood_top_{top_k}_' + f'{time.strftime("%Y%m%d-%H%M%S")}.npy'
ensemble_output_path = os.path.join(output_path, output_file_name)
np.save(ensemble_output_path, ensemble_predictions)


In [16]:
# zip the npy file
import zipfile
zip_file_name = output_file_name.replace('.npy', '.zip')
zip_file_path = os.path.join(output_path, zip_file_name)
with zipfile.ZipFile(zip_file_path, 'w') as zipf:
    zipf.write(ensemble_output_path, arcname=output_file_name)
    

In [17]:
ensemble_output_path

'ensenmble_results/new_fmri_predictions_ood_top_1_20250712-231704.npy'

: 

: 

: 

: 