In [1]:
import os
import os.path as op
import nibabel as nib
import numpy as np
import pandas as pd
from PIL import Image, ImageDraw, ImageFont, ImageEnhance
import imageio.v3 as imageio
import matplotlib.pyplot as plt
import torch
from torchvision import transforms
from tqdm import tqdm
import h5py
import pickle
from scipy.io import loadmat
os.chdir('../')

  warn(
  from .autonotebook import tqdm as notebook_tqdm


# Data Preprocessing

In [14]:
file_path = 'nsddata_stimuli/stimuli/nsdsynthetic/nsdsynthetic_stimuli.hdf5'
sf = h5py.File(file_path, 'r')
sdataset = sf.get('imgBrick')
stim_part1 = torch.zeros(220, 3, 714, 1360)
for i in tqdm(range(220)):
    im = transforms.ToPILImage()(sdataset[i])
    im = transforms.ToTensor()(im)
    stim_part1[i] = im
torch.save(stim_part1, "nsddata_stimuli/stimuli/nsdsynthetic/nsd_synthetic_stim_part1.pt")

for sub in range(1,9):
    file_path = f'nsddata_stimuli/stimuli/nsdsynthetic/nsdsynthetic_colorstimuli_subj0{sub}.hdf5'
    sf = h5py.File(file_path, 'r')
    sdataset = sf.get('imgBrick')
    stim_part2 = torch.zeros(64, 3, 714, 1360)
    for i in tqdm(range(64)):
        im = transforms.ToPILImage()(sdataset[i])
        im = transforms.ToTensor()(im)
        stim_part1[i] = im
    torch.save(stim_part2, f"nsddata_stimuli/stimuli/nsdsynthetic/nsd_synthetic_stim_part2_sub{sub}.pt")


100%|██████████| 220/220 [00:00<00:00, 278.38it/s]
100%|██████████| 64/64 [00:00<00:00, 231.38it/s]
100%|██████████| 64/64 [00:00<00:00, 227.90it/s]
100%|██████████| 64/64 [00:00<00:00, 268.94it/s]
100%|██████████| 64/64 [00:00<00:00, 271.82it/s]
100%|██████████| 64/64 [00:00<00:00, 269.74it/s]
100%|██████████| 64/64 [00:00<00:00, 220.36it/s]
100%|██████████| 64/64 [00:00<00:00, 255.79it/s]
100%|██████████| 64/64 [00:00<00:00, 273.67it/s]


In [7]:
def zscore(x, mean=None, stddev=None, return_stats=False):
    if mean is not None:
        m = mean
    else:
        m = torch.mean(x, axis=0, keepdims=True)
    if stddev is not None:
        s = stddev
    else:
        s = torch.std(x, axis=0, keepdims=True)
    if return_stats:
        return (x - m)/(s+1e-6), m, s
    else:
        return (x - m)/(s+1e-6)
    
def create_whole_region_synthetic_unnormalized(subject = 1):

    nsd_general = nib.load(f"nsddata/ppdata/subj0{subject}/func1pt8mm/roi/nsdgeneral.nii.gz").get_fdata()
    nsd_general = np.nan_to_num(nsd_general)
    nsd_general = np.where(nsd_general==1.0, True, False)

    layer_size = np.sum(nsd_general == True)
    os.makedirs("preprocessed_data/subject{}/".format(subject), exist_ok=True)

    whole_region = np.zeros((744, layer_size))

    nsd_general_mask = np.nan_to_num(nsd_general)
    nsd_mask = np.array(nsd_general_mask.flatten(), dtype=bool)
    beta_file = f"nsddata_betas/ppdata/subj0{subject}/func1pt8mm/nsdsyntheticbetas_fithrf_GLMdenoise_RR/betas_nsdsynthetic.nii.gz"

    imagery_betas = nib.load(beta_file).get_fdata()
    imagery_betas = imagery_betas.transpose((3,0,1,2))
    whole_region = torch.from_numpy(imagery_betas.reshape((len(imagery_betas), -1))[:,nsd_general.flatten()].astype(np.float32))

    torch.save(whole_region, "preprocessed_data/subject{}/nsd_synthetic_unnormalized.pt".format(subject))
    return whole_region

def create_whole_region_synthetic_normalized(subject = 1):
    unnormalized_file = f"preprocessed_data/subject{subject}/nsd_synthetic_unnormalized"
    whole_region = torch.load(unnormalized_file + ".pt")
    whole_region = whole_region / 300.
    whole_region_norm = torch.zeros_like(whole_region)

    # Normalize the data using Z scoring method for each voxel
    whole_region_norm = zscore(whole_region)

    # Save the tensor of normalized data
    torch.save(whole_region_norm, f"preprocessed_data/subject{subject}/nsd_synthetic.pt")

In [12]:
stimuli_order = loadmat('nsddata/experiments/nsdsynthetic/nsdsynthetic_expdesign.mat')['masterordering'][0]
print(stimuli_order)
print(data['stimpattern'])

[198 231 196 152  53  18  23 180  25  34 221 240 202 141 108  52  14 249
  24 203 149 149  12  23 112 264 264 189 136  90  27  23  87 226  86  21
 216 216 255 224  43  77 272  28  22  59 155  33 283  27  95 195  26  26
 244  21  55 148 110 131  23 273 197 260 126  22  22 150  20  20 140 251
 215  17  16 127 127  69  83  68  68 132  67  37 165 186 186 104  10  10
  21  72  91 261 230 230 217  27 190 256  58  41  41 234 278 278  16  28
 225  81  80  14  26  29  29 182  28  28 239  20  35 138  98 211 199 147
 238 271 271 169 209 129  92 268   3 161  44 222 227 245  66 269 210  99
 252 262 100  25 277  17  13  25 107  50  22  15  89 276   6 124 167  60
  60  17 153  15  15  26  84  11  15 177 270  25 103 103 113  18  82 172
 130 171 178  40 170 170 219 118 188  46 114 114  20  63  63  54 102  19
   2  32  19  19  42  42 258  28 119 119 105 109 237 101 259  76  76 156
 183 248 248 163 243   9 181 173 254 139 175 160  13  71  13   1 218   4
 116  70  79 151 274 213  39  85  22 120 235 275  9

In [30]:
def condition_average(x, y, cond, nest=False):
    idx, idx_count = np.unique(cond, return_counts=True)
    idx_list = [np.array(cond)==i for i in np.sort(idx)]
    if nest:
        avg_x = torch.zeros((len(idx), idx_count.max(), x.shape[1]), dtype=torch.float32)
    else:
        avg_x = torch.zeros((len(idx), 1, x.shape[1]), dtype=torch.float32)
    arranged_y = torch.zeros((len(idx)), y.shape[1], y.shape[2], y.shape[3])
    for i, m in enumerate(idx_list):
        if nest:
            if np.sum(m) == idx_count.max():
                avg_x[i] = x[m]
            else:
                avg_x[i,:np.sum(m)] = x[m]
        else:
            avg_x[i] = torch.mean(x[m], axis=0)
        arranged_y[i] = y[m[0]]

    return avg_x, y, len(idx_count)

def load_nsd_synthetic(subject, average=False, nest=False):
    y = torch.zeros((284, 3, 714, 1360))
    y[:220] = torch.load("nsddata_stimuli/stimuli/nsdsynthetic/nsd_synthetic_stim_part1.pt")
    y[220:] = torch.load(f"nsddata_stimuli/stimuli/nsdsynthetic/nsd_synthetic_stim_part2_sub{subject}.pt")
    
    x = torch.load("preprocessed_data/subject{}/nsd_synthetic.pt".format(subject)).requires_grad_(False).to("cpu")
    conditionals = loadmat('nsddata/experiments/nsdsynthetic/nsdsynthetic_expdesign.mat')['masterordering'][0].astype(int) - 1
    
    if average or nest:
        x, y, sample_count = condition_average(x, y, conditionals, nest=nest)
    else:
        x = x.reshape((x.shape[0], 1, x.shape[1]))
        y = y[conditionals]
    print(x.shape, y.shape)
    return x, y    

In [31]:
print(os.getcwd())
for subject in tqdm(range(1,9)):
    create_whole_region_synthetic_unnormalized(subject)
    create_whole_region_synthetic_normalized(subject)
    x, y = load_nsd_synthetic(subject=subject, average=False, nest=False)

/export/raid1/home/kneel027/Second-Sight/data


  0%|          | 0/8 [00:00<?, ?it/s]

torch.Size([744, 1, 15724]) torch.Size([744, 3, 714, 1360])


 12%|█▎        | 1/8 [00:02<00:20,  2.97s/it]

torch.Size([744, 1, 14278]) torch.Size([744, 3, 714, 1360])


 25%|██▌       | 2/8 [00:05<00:17,  2.98s/it]

torch.Size([744, 1, 15226]) torch.Size([744, 3, 714, 1360])


 38%|███▊      | 3/8 [00:08<00:14,  2.98s/it]

torch.Size([744, 1, 13153]) torch.Size([744, 3, 714, 1360])


 50%|█████     | 4/8 [00:11<00:11,  2.98s/it]

torch.Size([744, 1, 13039]) torch.Size([744, 3, 714, 1360])


 62%|██████▎   | 5/8 [00:14<00:08,  2.98s/it]

torch.Size([744, 1, 17907]) torch.Size([744, 3, 714, 1360])


 75%|███████▌  | 6/8 [00:17<00:05,  2.98s/it]

torch.Size([744, 1, 12682]) torch.Size([744, 3, 714, 1360])


 88%|████████▊ | 7/8 [00:20<00:02,  2.97s/it]

torch.Size([744, 1, 14386]) torch.Size([744, 3, 714, 1360])


100%|██████████| 8/8 [00:24<00:00,  3.08s/it]
