<a href="https://colab.research.google.com/github/IanQS/neuromatch_project/blob/main/steinmetz_modeling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Modeling of the Steinmetz dataset

- uses [Neuromatch Load Steinmetz Decisions](https://colab.research.google.com/github/NeuromatchAcademy/course-content/blob/main/projects/neurons/load_steinmetz_decisions.ipynb#scrollTo=DJ-jzsE5eLxX) as a base

In [11]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import zscore
from sklearn.decomposition import PCA
import concurrent.futures
from multiprocessing import Pool

!pip install -q ipython-autotime
%load_ext autotime

The autotime extension is already loaded. To reload it, use:
  %reload_ext autotime
time: 4.32 s (started: 2023-07-19 15:21:07 +00:00)


# Data Pipeline

1) Loads and processes the data

2) Filters out brain areas

In [2]:
import os, requests

fname = []
for j in range(3):
  fname.append('steinmetz_part%d.npz'%j)
url = ["https://osf.io/agvxh/download"]
url.append("https://osf.io/uv3mw/download")
url.append("https://osf.io/ehmw2/download")

for j in range(len(url)):
  if not os.path.isfile(fname[j]):
    try:
      r = requests.get(url[j])
    except requests.ConnectionError:
      print("!!! Failed to download data !!!")
    else:
      if r.status_code != requests.codes.ok:
        print("!!! Failed to download data !!!")
      else:
        with open(fname[j], "wb") as fid:
          fid.write(r.content)



time: 373 ms (started: 2023-07-19 14:54:10 +00:00)


In [3]:
all_datasets = np.array([])
for j in range(len(fname)):
  all_datasets = np.hstack((all_datasets,
                      np.load('steinmetz_part%d.npz'%j,
                              allow_pickle=True)['dat']))

time: 46.1 s (started: 2023-07-19 14:54:11 +00:00)


In [4]:
regions = ["vis ctx", "thal", "hipp", "other ctx", "midbrain", "basal ganglia", "cortical subplate", "other"]
region_colors = ['blue', 'red', 'green', 'darkblue', 'violet', 'lightblue', 'orange', 'gray']
brain_groups = [["VISa", "VISam", "VISl", "VISp", "VISpm", "VISrl"],  # visual cortex
                ["CL", "LD", "LGd", "LH", "LP", "MD", "MG", "PO", "POL", "PT", "RT", "SPF", "TH", "VAL", "VPL", "VPM"], # thalamus
                ["CA", "CA1", "CA2", "CA3", "DG", "SUB", "POST"],  # hippocampal
                ["ACA", "AUD", "COA", "DP", "ILA", "MOp", "MOs", "OLF", "ORB", "ORBm", "PIR", "PL", "SSp", "SSs", "RSP","TT"],  # non-visual cortex
                ["APN", "IC", "MB", "MRN", "NB", "PAG", "RN", "SCs", "SCm", "SCig", "SCsg", "ZI"],  # midbrain
                ["ACB", "CP", "GPe", "LS", "LSc", "LSr", "MS", "OT", "SNr", "SI"],  # basal ganglia
                ["BLA", "BMA", "EP", "EPd", "MEA"]  # cortical subplate
                ]

# Assign each area an index
area_to_index = dict(root=0)
counter = 1
for group in brain_groups:
    for area in group:
        area_to_index[area] = counter
        counter += 1

# Figure out which areas are in each dataset
areas_by_dataset = np.zeros((counter, len(all_datasets)), dtype=bool)
for j, d in enumerate(all_datasets):
    for area in np.unique(d['brain_area']):
        i = area_to_index[area]
        areas_by_dataset[i, j] = True


time: 5.2 ms (started: 2023-07-19 14:54:57 +00:00)


In [5]:
DATASET_IDX = 11
specific_dataset = all_datasets[DATASET_IDX]

dt = specific_dataset["bin_size"]
NUM_SUBJECTS = specific_dataset["spks"].shape[0]

NUM_BINNED_TIMES = specific_dataset["spks"].shape[2]

if DATASET_IDX != 11:
    raise Exception("Code is only meant for DATASET_IDX=11")
else:
    NUM_REGIONS = 4
    NUM_SUBREGIONS = len(specific_dataset["brain_area"])

brain_subregions = NUM_REGIONS * np.ones(NUM_SUBREGIONS, )  # last one is "other"
for j in range(NUM_REGIONS):
  brain_subregions[
      np.isin(specific_dataset['brain_area'], brain_groups[j])
      ] = j  # assign a number to each region


time: 2.85 ms (started: 2023-07-19 14:54:57 +00:00)


# Creating the dataset

In [16]:
LABELS = specific_dataset["response"]  # RIGHT - NO_GO - LEFT (-1, 0, 1)
y = LABELS

time: 660 µs (started: 2023-07-19 15:23:01 +00:00)


In [17]:
def log_shapes():
    _ds = specific_dataset['spks']
    print(f"All spikes shape: {_ds.shape}")
    _ds_brain_region = _ds[brain_subregions == 0]
    print(f"\t- Spike shape for sample brain region (0-th): {_ds_brain_region.shape}")

    _ds_0th_left_response = _ds_brain_region[:, y >= 0]
    print(f"\t- Spike shape for sample brain region (0-th) left responses: {_ds_0th_left_response.shape}")

    averaged_over_left_response = _ds_0th_left_response.mean(axis=(0, 1))
    print(f"\t- Averaged brain region (0-th) left responses: {averaged_over_left_response.shape}")

log_shapes()


All spikes shape: (698, 340, 250)
	- Spike shape for sample brain region (0-th): (145, 340, 250)
	- Spike shape for sample brain region (0-th) left responses: (145, 199, 250)
	- Averaged brain region (0-th) left responses: (250,)
time: 17.2 ms (started: 2023-07-19 15:23:02 +00:00)


In [18]:
def create_design_matrix():
    spike_partitioned = {}  # brain region to spike mapping
    for i in range(NUM_REGIONS):
        spks = specific_dataset["spks"][brain_subregions == j]
        spikes_for_left_response = spks[:, y < 0]
        spikes_for_right_response = spks[:, y > 0]
        spikes_for_no_response = spks[:, y == 0]

        spike_partitioned[regions[i]] = [
            spikes_for_left_response,
            spikes_for_no_response,
            spikes_for_right_response
        ]
    return spike_partitioned

Xs = create_design_matrix()

time: 18.1 ms (started: 2023-07-19 15:23:02 +00:00)


In [23]:
print(f"Left-Resp Visual Cortex shape: {Xs['vis ctx'][0].shape}")
print(f"No-Resp Visual Cortex shape: {Xs['vis ctx'][1].shape}")
print(f"Right-Resp Visual Cortex shape: {Xs['vis ctx'][2].shape}")

Left-Resp Visual Cortex shape: (78, 141, 250)
No-Resp Visual Cortex shape: (78, 64, 250)
Right-Resp Visual Cortex shape: (78, 135, 250)
time: 1.06 ms (started: 2023-07-19 15:24:47 +00:00)
