 == Stiching Model Performances Based on Test Movies == 

In [29]:
import numpy as np
import os
def make_ensemble_weights(target_filename="fmri_predictions_ood.npy", foldername = "",filter_movies=["chaplin"]):
    """
    Create a dictionary of ensemble weights for different models, formatted the same way as the submission file.
    """
    print(os.getcwd())
    weights = {'sub-01':{}, 'sub-02':{}, 'sub-03':{}, 'sub-05':{}}
    submission_predictions = np.load(os.path.join("./output",foldername,target_filename),allow_pickle=True).item()
    for subject, episodes_dict in submission_predictions.items():
            for episode,values in episodes_dict.items():
                if any(fm in episode for fm in filter_movies):
                    weights[subject][episode] = np.ones_like(values)
                else:
                    weights[subject][episode] = np.zeros_like(values)
    save_path = os.path.join("./output", foldername, "ensemble_weights.npy")
    np.save(save_path, weights,allow_pickle=True)
    print(f'Saving weights to {save_path}')
    print('Saving weights to file done!')
    return weights

# Use no llama for Chaplin
make_ensemble_weights(target_filename="fmri_predictions_ood.npy", foldername = "feature_encoding_data-all-but-life_replace_internvl3-14B_remove_llama_add_qwen3b_vjepa2_enc-kernel-45_2507121123",filter_movies=["chaplin"])
# Use extra training on life for planet earth
make_ensemble_weights(target_filename="fmri_predictions_ood.npy", foldername = "feature_encoding_data-life_replace_internvl3-14B_llama-3B_qwen3b_vjepa2_enc-kernel-45_2507121216",filter_movies=["planetearth"])
# Use the canonical model for others
make_ensemble_weights(target_filename="fmri_predictions_ood.npy", foldername = "feature_encoding_data-all_replace_internvl3-14B_llama-3B_add_qwen3b_vjepa2_enc-kernel-45_2507112307",filter_movies=["mononoke","passepartout","pulpfiction","wot"])


/home/cindy/algonauts2025
Saving weights to ./output/feature_encoding_data-all-but-life_replace_internvl3-14B_remove_llama_add_qwen3b_vjepa2_enc-kernel-45_2507121123/ensemble_weights.npy
Saving weights to file done!
/home/cindy/algonauts2025
Saving weights to ./output/feature_encoding_data-life_replace_internvl3-14B_llama-3B_qwen3b_vjepa2_enc-kernel-45_2507121216/ensemble_weights.npy
Saving weights to file done!
/home/cindy/algonauts2025
Saving weights to ./output/feature_encoding_data-all_replace_internvl3-14B_llama-3B_add_qwen3b_vjepa2_enc-kernel-45_2507112307/ensemble_weights.npy
Saving weights to file done!


{'sub-01': {'chaplin1': array([[0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         ...,
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.]], dtype=float32),
  'chaplin2': array([[0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         ...,
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.]], dtype=float32),
  'mononoke1': array([[1., 1., 1., ..., 1., 1., 1.],
         [1., 1., 1., ..., 1., 1., 1.],
         [1., 1., 1., ..., 1., 1., 1.],
         ...,
         [1., 1., 1., ..., 1., 1., 1.],
         [1., 1., 1., ..., 1., 1., 1.],
         [1., 1., 1., ..., 1., 1., 1.]], dtype=float32),
  'mononoke2': array([[1., 1., 1., ..., 1., 1., 1.],
         [1., 1., 1., ..., 1., 1., 1.],
         [1., 1., 1., ..., 1., 1., 1.],
       

In [36]:
import numpy as np
import os
import zipfile
from datetime import datetime
import pytz

def save_weighted_predictions(weights_file, predictions_file, foldernames=[], output_file_stem='_'):
    """
    Load weights and predictions, apply weights, and save the weighted predictions.
    """
    for ii, foldername in enumerate(foldernames):
        weights = np.load(os.path.join("./output", foldername, weights_file), allow_pickle=True).item()
        predictions = np.load(os.path.join("./output", foldername, predictions_file), allow_pickle=True).item()
        
        # Replace the ii == 0 block with:
        if ii == 0:
            all_submissions = {subj: {ep: np.zeros_like(vals) for ep, vals in eps.items()}
                            for subj, eps in predictions.items()}
            all_weights = {subj: {ep: np.zeros_like(vals) for ep, vals in eps.items()}
                        for subj, eps in predictions.items()}
        # Then accumulate for all models (including first):
        for subject, episodes_dict in predictions.items():
            for episode, values in episodes_dict.items():
                all_submissions[subject][episode] += np.multiply(values, weights[subject][episode])
                all_weights[subject][episode] += weights[subject][episode]
    # Normalize by weights (with small epsilon to avoid division by zero)
    epsilon = 1e-10
    for subject, episodes_dict in all_submissions.items():
        for episode, values in episodes_dict.items():
            all_submissions[subject][episode] = np.divide(values, all_weights[subject][episode] + epsilon)

    # Rest of your saving code remains the same...
    ny_time = datetime.now(pytz.timezone("America/New_York"))
    now = ny_time.strftime("%y%m%d%H%M") 
    ensemble_output_path = './output/ensemble_output'+ output_file_stem + now 
    os.makedirs(ensemble_output_path, exist_ok=True)  
    np.save(os.path.join(ensemble_output_path, predictions_file), all_submissions, allow_pickle=True)
    print(f'Saving weighted predictions to {ensemble_output_path}')
    
    # Zip the saved file for submission
    zip_file = os.path.join(ensemble_output_path, predictions_file.replace("npy","zip"))
    with zipfile.ZipFile(zip_file, 'w') as zipf:
        zipf.write(os.path.join(ensemble_output_path, predictions_file), os.path.basename(os.path.join(ensemble_output_path, predictions_file)))
    print(f"Submission file successfully zipped as: {zip_file}")

In [41]:

final = np.load("./output/ensemble_output_weighted-prediction-by-movies_2507121402/fmri_predictions_ood.npy",allow_pickle=True).item()
nollama = np.load("./output/feature_encoding_data-all-but-life_replace_internvl3-14B_remove_llama_add_qwen3b_vjepa2_enc-kernel-45_2507121123/fmri_predictions_ood.npy",allow_pickle=True).item()
extralife = np.load("./output/feature_encoding_data-life_replace_internvl3-14B_llama-3B_qwen3b_vjepa2_enc-kernel-45_2507121216/fmri_predictions_ood.npy",allow_pickle=True).item()
other = np.load("./output/feature_encoding_data-all_replace_internvl3-14B_llama-3B_add_qwen3b_vjepa2_enc-kernel-45_2507112307/fmri_predictions_ood.npy",allow_pickle=True).item()
print(np.allclose(final['sub-01']['chaplin1'],nollama['sub-01']['chaplin1']))
print(np.allclose(final['sub-01']['planetearth1'],extralife['sub-01']['planetearth1']))
print(np.allclose(final['sub-01']['mononoke1'],other['sub-01']['mononoke1']))

True
True
True


In [35]:
# For the nollama model
nollama_weights = np.load("./output/feature_encoding_data-all-but-life_replace_internvl3-14B_remove_llama_add_qwen3b_vjepa2_enc-kernel-45_2507121123/ensemble_weights.npy", allow_pickle=True).item()
print(nollama_weights['sub-01']['chaplin1'][:10])  # Should be all 1s

# For other models
other_weights = np.load("./output/feature_encoding_data-all_replace_internvl3-14B_llama-3B_add_qwen3b_vjepa2_enc-kernel-45_2507112307/ensemble_weights.npy", allow_pickle=True).item()
print(other_weights['sub-01']['chaplin1'][:10])  # Should be all 0s

[[1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 ...
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]]
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


Select the best one for submission among many submissions

In [None]:
from pathlib import Path
import json
import os

def find_scores_json_parents(root_dir="."):
    root_path = Path(root_dir)
    # Find all 'scores.json' files recursively
    matches = list(root_path.glob("**/scores.json"))
    # Extract parent folders
    parent_folders = [str(file.parent) for file in matches]
    return parent_folders

all_submission_dirs = find_scores_json_parents(".")
for ii, dir in enumerate(all_submission_dirs):
    print(f"Submission{ii}:{dir}")
    json_path = os.path.join(dir, "scores.json")
    with open(json_path,'r', encoding='utf-8') as file:
        data = json.load(file)  # Returns a Python dict/list
        if ii == 0:
            acc = {}
            index = {}
        for key,values in data.items():
            if '_' in key:
                # Split into ['sub-01', 'movie-chaplin']
                subject, movie_with_prefix = key.split('_')
                
                # Remove 'movie-' prefix to get 'chaplin'
                movie = movie_with_prefix.replace('movie-', '')
                
                # Initialize subject if not exists
                if subject not in acc:
                    acc[subject] = {}
                    index[subject] = {}
                # Initialize movie if not exists
                if movie not in acc[subject]:
                    acc[subject][movie] = 0

                # keep the best accuracy
                if values>acc[subject][movie]:
                    acc[subject][movie] = values
                    index[subject][movie] = ii
grand_mean = []
for subject,subject_acc in acc.items():
    movie_mean = np.mean(list(subject_acc.values()))
    print(f"Subject: {subject}, Expected OOD Mean Accuracy: {movie_mean:.4f}")
    grand_mean.append(movie_mean)

grand_mean = np.mean(grand_mean)
print(f"Across-subject Expected OOD Mean Accuracy: {grand_mean:.4f}")
print(acc)
print(index)


Submission0:output/feature_encoding_data-all_replace_internvl3-14B_llama-3B_add_qwen3b_vjepa2_enc-kernel-45_2507112307
Submission1:output/Mihirs_model_submission1
Submission2:output/feature_encoding_remove_llama_qwen3b_vjepa2_enc-kernel-45_2507102030
Submission3:output/ensenmble_results-Cesar44models
Subject: sub-01, Expected OOD Mean Accuracy: 0.2315
Subject: sub-02, Expected OOD Mean Accuracy: 0.1972
Subject: sub-03, Expected OOD Mean Accuracy: 0.2295
Subject: sub-05, Expected OOD Mean Accuracy: 0.1700
Across-subject Expected OOD Mean Accuracy: 0.2070
{'sub-01': {'chaplin': 0.23129592835903168, 'mononoke': 0.28672534227371216, 'passepartout': 0.2574006915092468, 'planetearth': 0.12944163382053375, 'pulpfiction': 0.2536613345146179, 'wot': 0.2303098440170288}, 'sub-02': {'chaplin': 0.14667394757270813, 'mononoke': 0.2444467693567276, 'passepartout': 0.24178491532802582, 'planetearth': 0.17622965574264526, 'pulpfiction': 0.22582681477069855, 'wot': 0.1479874849319458}, 'sub-03': {'chap

In [None]:
# First let's just take the average of several model predictions regardless of their architecture

import numpy as np
import os
import zipfile
from datetime import datetime
import pytz

target_filename = "fmri_predictions_friends_s7.npy" # CHANGE THIS

ny_time = datetime.now(pytz.timezone("America/New_York"))
now = ny_time.strftime("%y%m%d%H%M") 
ensemble_output_path = './output/ensemble_output_' + now
foldernames = [] # ["feature_encoding_enc_kernel-20_2507061542","feature_encoding_default_2507061512","feature_encoding_default_2506300839"] # CHANGE THIS 

matching_folders = []
for root, dirs, files in os.walk("./output/"):
    if target_filename in files:
        # Get the folder name (the last part of the root path)
        folder_name = os.path.basename(root)
        matching_folders.append(folder_name)

if not foldernames:
     foldernames = matching_folders
print(foldernames)

os.makedirs(ensemble_output_path,exist_ok = True)  
for ii,foldername in enumerate(foldernames):
    submission_predictions = np.load(os.path.join("./output",foldername,target_filename),allow_pickle=True).item()
    if ii == 0:
        all_submissions = submission_predictions
    else:
        # Add everything
        for subject, episodes_dict in submission_predictions.items():
            for episode, values in episodes_dict.items():
                all_submissions[subject][episode] += values
            # Optional: Display the structure and shapes of the predicted fMRI responses dictionary
            # # Print the subject and episode number for Friends season 7
            # print(f"Subject: {subject}")
            # print(f"  Number of Episodes: {len(episodes_dict)}")
            # # Print the predicted fMRI response shape for each episode
            # for episode, predictions in episodes_dict.items():
            #     print(f"    - Episode: {episode}, Predicted fMRI shape: {predictions.shape}")
            # print("-" * 40)  # Separator for clarity

# Divide by total number of model predictions
for subject, episodes_dict in submission_predictions.items():
            for episode, values in episodes_dict.items():
                all_submissions[subject][episode] /= len(foldernames)  
# Save the output
np.save(os.path.join(ensemble_output_path,target_filename),all_submissions,allow_pickle=True)

# Zip the saved file for submission
zip_file = os.path.join(ensemble_output_path,target_filename.replace("npy","zip"))
with zipfile.ZipFile(zip_file, 'w') as zipf:
    zipf.write(os.path.join(ensemble_output_path,target_filename), os.path.basename(os.path.join(ensemble_output_path,target_filename)))
print(f"Submission file successfully zipped as: {zip_file}")


['feature_encoding_default_2507061512', 'feature_encoding_enc_kernel-38_2507061602', 'feature_encoding_default_2506300839', 'ensemble_output_2507061554', 'feature_encoding_enc_kernel-45_2507061610', 'feature_encoding_enc_kernel-20_2507061542']
Submission file successfully zipped as: ./output/ensemble_output_2507061621/fmri_predictions_friends_s7.zip


==== Previous Notes that are unused

In [None]:
# # from sklearn.linear_model import LinearRegression
# import numpy as np
# import re
# import time
# from collections import defaultdict
# from pathlib import Path

# import h5py
# import numpy as np
# import torch
# from torch.utils.data import IterableDataset
# from tqdm import tqdm

In [None]:
np.array_equal(submission_predictions['sub-01']['s07e01a'],all_submissions['sub-01']['s07e01a'])

True

In [None]:
class Algonauts2025Dataset(IterableDataset):
    def __init__(
        self,
        fmri_data: dict[str, np.ndarray],
        feat_data: list[dict[str, np.ndarray]] | None = None,
        sample_length: int | None = 128,
        num_samples: int | None = None,
        shuffle: bool = True,
        seed: int | None = None,
    ):
        self.fmri_data = fmri_data
        self.feat_data = feat_data

        self.episode_list = list(fmri_data)
        self.sample_length = sample_length
        self.num_samples = num_samples
        self.shuffle = shuffle
        self.seed = seed

        self._rng = np.random.default_rng(seed)
    
    def _iter_shuffle(self):
        sample_idx = 0
        while True:
            episode_order = self._rng.permutation(len(self.episode_list))

            for ii in episode_order:
                episode = self.episode_list[ii]
                feat_episode = episode[0] if isinstance(episode, tuple) else episode

                fmri = torch.from_numpy(self.fmri_data[episode]).float()
    
                if self.feat_data:
                    feats = [torch.from_numpy(data[feat_episode]).float() for data in self.feat_data]
                else:
                    feats = feat_samples = None

                # Nb, fmri and feature length often off by 1 or 2.
                # But assuming time locked to start.
                length = fmri.shape[1]
                if feats:
                    length = min(length, min(feat.shape[0] for feat in feats))

                if self.sample_length:
                    # Random segment of run
                    offset = int(self._rng.integers(0, length - self.sample_length + 1))
                    fmri_sample = fmri[:, offset: offset + self.sample_length]
                    if feats:
                        feat_samples = [
                            feat[offset: offset + self.sample_length] for feat in feats
                        ]
                else:
                    # Take full run
                    # Nb this only works for batch size 1 since runs are different length
                    fmri_sample = fmri[:, :length]
                    if feats:
                        feat_samples = [feat[:length] for feat in feats]

                if feat_samples:
                    yield episode, fmri_sample, feat_samples
                else:
                    yield episode, fmri_sample

                sample_idx += 1
                if self.num_samples and sample_idx >= self.num_samples:
                    return

    def _iter_ordered(self):
        sample_idx = 0
        for episode in self.episode_list:
            feat_episode = episode[0] if isinstance(episode, tuple) else episode
            fmri = torch.from_numpy(self.fmri_data[episode]).float()
            if self.feat_data:
                feats = [torch.from_numpy(data[feat_episode]).float() for data in self.feat_data]
            else:
                feats = feat_samples = None

            length = fmri.shape[1]
            if feats:
                length = min(length, min(feat.shape[0] for feat in feats))

            sample_length = self.sample_length or length

            for offset in range(0, length - sample_length + 1, sample_length):
                fmri_sample = fmri[:, offset: offset + sample_length]
                if feats:
                    feat_samples = [feat[offset: offset + sample_length] for feat in feats]

                if feat_samples:
                    yield episode, fmri_sample, feat_samples
                else:
                    yield episode, fmri_sample

                sample_idx += 1
                if self.num_samples and sample_idx >= self.num_samples:
                    return

    def __iter__(self):
        if self.shuffle:
            yield from self._iter_shuffle()
        else:
            yield from self._iter_ordered()

In [None]:
val_dataset = Algonauts2025Dataset(
    friends_val_fmri,
    list(stimuli_features_friends.values()),
    sample_length=None,
    shuffle=False,
)
val_loader = DataLoader(val_dataset, batch_size=1)

@torch.no_grad()
def validate(
    *,
    epoch: int,
    model: torch.nn.Module,
    val_loader: DataLoader,
    device: torch.device,
):
    model.eval()

    use_cuda = device.type == "cuda"

    loss_m = AverageMeter()
    data_time_m = AverageMeter()
    step_time_m = AverageMeter()

    samples = []
    outputs = []

    end = time.monotonic()
    for batch_idx, (_, sample, feats) in enumerate(val_loader):
        sample = sample.to(device)
        feats = [feat.to(device) for feat in feats]
        batch_size = sample.size(0)
        data_time = time.monotonic() - end

        # forward pass
        output = model(feats)
        loss = F.mse_loss(output, sample)
        loss_item = loss.item()

        # end of iteration timing
        if use_cuda:
            torch.cuda.synchronize()
        step_time = time.monotonic() - end

        loss_m.update(loss_item, batch_size)
        data_time_m.update(data_time, batch_size)
        step_time_m.update(step_time, batch_size)

        N, S, L, C = sample.shape
        assert N, S == (1, 4)
        samples.append(sample.cpu().numpy().swapaxes(0, 1).reshape((S, N*L, C)))
        outputs.append(output.cpu().numpy().swapaxes(0, 1).reshape((S, N*L, C)))

        # Reset timer
        end = time.monotonic()

    # (S, N, C)
    samples = np.concatenate(samples, axis=1)
    outputs = np.concatenate(outputs, axis=1)

    metrics = {}
    model_pred = {}

    # Encoding accuracy metrics
    dim = samples.shape[-1]
    acc = 0.0
    acc_map = np.zeros(dim)
    for ii, sub in enumerate(SUBJECTS):
        y_true = samples[ii].reshape(-1, dim)
        y_pred = outputs[ii].reshape(-1, dim)
        metrics[f"acc_map_sub-{sub}"] = acc_map_i = pearsonr_score(y_true, y_pred)
        metrics[f"acc_sub-{sub}"] = acc_i = np.mean(acc_map_i)
        acc_map += acc_map_i / len(SUBJECTS)
        acc += acc_i / len(SUBJECTS)
        

    metrics["acc_map_avg"] = acc_map
    metrics["acc_avg"] = acc
    accs_fmt = ",".join(
        f"{val:.3f}" for key, val in metrics.items() if key.startswith("acc_sub-")
    )

    tput = batch_size / step_time_m.avg
    print(
        f"Val: {epoch:>3d}"
        f"  Loss: {loss_m.avg:#.3g}"
        f"  Acc: {accs_fmt} ({acc:.3f})"
        f"  Time: {data_time_m.avg:.3f},{step_time_m.avg:.3f} {tput:.0f}/s"
    )

    return acc, metrics


def pearsonr_score(
    y_true: np.ndarray, y_pred: np.ndarray, eps: float = 1e-7
) -> np.ndarray:
    assert y_true.ndim == y_pred.ndim == 2

    y_true = y_true - y_true.mean(axis=0)
    y_true = y_true / (np.linalg.norm(y_true, axis=0) + eps)

    y_pred = y_pred - y_pred.mean(axis=0)
    y_pred = y_pred / (np.linalg.norm(y_pred, axis=0) + eps)

    score = (y_true * y_pred).sum(axis=0)
    return score

In [None]:
# Assume:
# - X_val: Validation features (shape [n_samples, ...])
# - y_val: Validation labels (shape [n_samples, ...])
# - model_preds: Predictions from 5 models (shape [5, n_samples, ...])

# Stack predictions (shape [n_samples, 5])
X_stack = np.stack([model_preds[i] for i in range(5)], axis=-1)

# Train a regressor to learn weights
ensemble = LinearRegression()
ensemble.fit(X_stack, y_val)  # Learns weights w1, ..., w5

# For new data, combine predictions: final_pred = w1 * m1 + ... + w5 * m5
new_preds = np.stack([model.predict(new_data) for model in models], axis=-1)
final_pred = ensemble.predict(new_preds)