In [4]:
!pip install --no-index colorama

Looking in links: /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/gentoo2023/x86-64-v3, /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/gentoo2023/generic, /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/generic
Processing /cvmfs/soft.computecanada.ca/custom/python/wheelhouse/generic/colorama-0.4.6+computecanada-py2.py3-none-any.whl
Installing collected packages: colorama
Successfully installed colorama-0.4.6+computecanada


In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import os

# Add the parent directory to the Python path
script_dir = os.path.dirname(os.getcwd())  # Get the directory where the script is located
parent_dir = os.path.dirname(script_dir)  # Get the parent directory
parent_dir = os.path.dirname(parent_dir)  # Get the parent directory


print(f"{script_dir = }")
print(f"{parent_dir = }")

sys.path.append(parent_dir)

script_dir = '/lustre06/project/6067616/soroush1/idiosyncrasy/notebooks/test'
parent_dir = '/lustre06/project/6067616/soroush1/idiosyncrasy'


In [3]:
from lightning import LightningDataModule
from torch.utils.data import Dataset, DataLoader

from torchvision import transforms

import argparse
import numpy as np
import h5py as h5


from cka_reg import DATA_PATH
from cka_reg.datamodules.datamodules_utils import *

2025-01-16 03:15:26,845 - INFO - ImageNet module loaded.


In [5]:
class NeuralDataConstructor:
    def __init__(self, hparams, partition_scheme, *args, **kwargs):
        self.hparams = hparams
        self.partition = Partition(*partition_scheme, seed=hparams.seed)
        self.verbose = hparams.verbose

    def get_stimuli(self, *args, **kwargs):
        # overwrite method with dataset specific operations
        raise NameError("Method not implemented")

    def get_neural_responses(self, *args, **kwargs):
        # overwrite method with dataset specific operations
        raise NameError("Method not implemented")

    def get_labels(self, *args, **kwargs):
        # overwrite method with dataset specific operations
        raise NameError("Method not implemented")

    @staticmethod
    def partition_neurons(X, ntrain, seed=0):
        np.random.seed(seed)
        idx = np.random.choice(X.shape[1], X.shape[1], replace=False)
        return X[:, idx[:ntrain]], X[:, idx[ntrain:]]

class _ManyMonkeysDataConstructer(NeuralDataConstructor):

    data = h5.File(f"{DATA_PATH}/neural_data/many_monkeys2.h5", "r")
    print(f"{data.keys() = }")

    def __init__(
        self,
        hparams,
        variations="All",
        partition_scheme=(640, 540, 100, 0),
        *args,
        **kwargs,
    ):
        super().__init__(hparams, partition_scheme, *args, **kwargs)

        if variations == "All":
            # return all stimuli
            self.idxs = np.array(range(len(self.data["var"][()])))
            assert partition_scheme[0] == 640
        if variations == 3:
            self.idxs = self.data["var"][()] == 3
            assert partition_scheme[0] == 320
        if variations == 6:
            self.idxs = self.data["var"][()] == 6
            assert partition_scheme[0] == 320

        self.n_heldout_neurons = 0

    def get_stimuli(self, stimuli_partition):
        print(f"{stimuli_partition = }")
        X = self.data["stimuli"][()][self.idxs].transpose(0, 3, 1, 2)
        print(f"{X}")
        # partition the stimuli
        X_Partitioned = self.partition(X)[stimuli_partition]
        return X_Partitioned

    def get_labels(self, stimuli_partition, class_type):
        # get label data -- already converted into integers corresponding to HVM category labels
        X = self.data["category_name_HVM_aligned"][()][self.idxs]

        X_Partitioned = self.partition(X)[stimuli_partition]

        return X_Partitioned

    def get_neural_responses(
        self,
        animals,
        n_neurons_animal,
        n_trials,
        neuron_partition,
        stimuli_partition,
        hparams,
    ):
        n_neurons_str = "+".join(n_neurons_animal)
        if self.verbose:
            print(
                f"constructing {stimuli_partition} data with\n"
                + f"animals:{animals}\n"
                + f"neurons:{n_neurons_str}\n"
                + f"trials:{n_trials}\n"
            )
        # transform "All" to all dataset's animals
        animals = self.expand(animals)
        # only return [:n_neurons] if it's not the heldout set of neurons
        n_neurons_animal = (
            [int(1e10)] * len(animals)
            if (n_neurons_animal == ["All"] or neuron_partition != 0)
            else n_neurons_animal
        )
        n_trials = int(1e10) if n_trials == "All" else int(n_trials)
        neural_responses = []
        for animal, n_neurons in zip(animals, n_neurons_animal):
            r = self._get_neural_responses(animal, n_trials, neuron_partition, hparams)
            selected_neurons = np.random.RandomState(
                hparams.seed_select_neurons
            ).permutation(r.shape[1])[: int(n_neurons)]
            # print(selected_neurons)
            r = r[:, selected_neurons]
            neural_responses.append(r)
        X = np.concatenate(neural_responses, axis=1)

        if self.verbose:
            print(f"Neural data shape:\n(stimuli, sites) : {X.shape}")

        temp = self.partition(X)
        print(f"{type(temp) = }")
        print(f"{temp.keys() = }")
        
        X_Partitioned = self.partition(X)[stimuli_partition]
        return X_Partitioned

    def _get_neural_responses(self, animal, n_trials, neuron_partition, hparams):
        animal, region = animal.split(".")
        X = self.data[animal][region]["rates"][()][self.idxs]

        if self.verbose:
            print(f"{animal} {region} shape:\n(stimuli, sites, trials) : {X.shape}")

        """
        get subset of neurons to fit/test on. 
        return_heldout==0 => fitting set,
        return_heldout==1 => heldout set
        """
        if self.n_heldout_neurons != 0:
            X = self.partition_neurons(
                X, X.shape[1] - self.n_heldout_neurons, seed=hparams.seed
            )[neuron_partition]

        if self.verbose:
            print(f"(stimuli, sites, trials) : {X.shape}")

        # take mean over trials
        X = X[:, :, :n_trials]
        X = np.nanmean(X, axis=2)

        if self.verbose:
            print(f"(stimuli, sites) : {X.shape}")

        assert ~np.isnan(np.sum(X))
        return X

    @staticmethod
    def expand(animals):
        if animals[0] == "All":
            return [
                "nano.right",
                "nano.left",
                "magneto.right",
                "magneto.left",
                "bento.right",
                "bento.left",
                "solo.left",
                "tito.right",
                "tito.left",
                "chabo.left",
            ]
        return animals

def ManyMonkeysDataConstructer(hparams):
    return _ManyMonkeysDataConstructer(hparams)

def ManyMonkeysValDataConstructer(hparams):
    return _ManyMonkeysDataConstructer(
        hparams, variations=6, partition_scheme=(320, 0, 320, 0)
    )

data.keys() = <KeysViewHDF5 ['bento', 'category_name_HVM_aligned', 'chabo', 'magneto', 'nano', 'solo', 'stimuli', 'tito', 'var']>


In [6]:
import h5py as h5
import numpy as np
from types import SimpleNamespace

# Create minimal hparams
hparams = SimpleNamespace(
    seed=42,
    seed_select_neurons=42,
    verbose=True
)

# Create minimal hparams
hparams = SimpleNamespace(
    seed=42,
    seed_select_neurons=42,
    verbose=True
)

# Initialize data constructor  
data_constructor = ManyMonkeysDataConstructer(hparams)

# Get neural responses for train split
responses = data_constructor.get_neural_responses(
   animals=['nano.right'],
   n_neurons_animal=['All'], 
   n_trials='All',
   neuron_partition=0,
   stimuli_partition="train",
   hparams=hparams
)

print(f"Neural responses shape: {responses.shape}")

constructing train data with
animals:['nano.right']
neurons:All
trials:All

nano right shape:
(stimuli, sites, trials) : (640, 139, 47)
(stimuli, sites, trials) : (640, 139, 47)
(stimuli, sites) : (640, 139)
Neural data shape:
(stimuli, sites) : (640, 139)
type(temp) = <class 'dict'>
temp.keys() = dict_keys(['train', 'test', 'val'])
Neural responses shape: (540, 139)
