## Imports

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import random
import math
import scipy.io as sio
from pathlib import Path

  from .autonotebook import tqdm as notebook_tqdm


## Load data

### Fmri data

In [2]:
NUM_SUBJS = 8
subjects_fmri = [] #stores all 8 subject fmri np arrays

fMRI_folder = Path('./doi_10.5061_dryad.gt413__v1')
assert fMRI_folder.exists(), f"Foldder: {fMRI_folder} does not exist."

for subj_id in range(8):
    fmri_file_name = str(subj_id) + '_masked_2d.npy'
    fmri = np.load(fMRI_folder / fmri_file_name)
    assert isinstance(fmri, np.ndarray), f"Imported fmri_scan for subject {subj_id} is not of type numpy.ndarray"
    assert(fmri.ndim) == 2, f"Imported fmri_scan for subject {subj_id} is not 2 dimensional"
    subjects_fmri.append(fmri)

### Word features

In [3]:
feature_matrix = np.zeros((5176,195)) #stores the feature vectors as a row for each word
feature_names = [] #stores the names of all features in order
feature_types = {} #stores the types of features and all the names of the features for each type

features = sio.loadmat(fMRI_folder / 'story_features.mat')
feature_count = 0
for feature_type in features['features'][0]:
    feature_types[feature_type[0][0]] = []
    if isinstance(feature_type[1][0], str):
        feature_types[feature_type[0][0]].append(feature_type[1][0])
        feature_names.append(feature_type[1][0])
    else:
        for feature in feature_type[1][0]:
            feature_types[feature_type[0][0]].append(feature[0])
            feature_names.append(feature[0])
    feature_matrix[:, feature_count:feature_count+feature_type[2].shape[1]] = feature_type[2] #adds the (5176xN) feature values to the feature matrix for the current feature group
    feature_count += feature_type[2].shape[1]

### Word values and timings

In [4]:
words_info = [] #stores tuples of (word, time, features) sorted by time appeared

mat_file = fMRI_folder / 'subject_1.mat' #only looks at the first subject file, somewhere it said all the timings were the same so this should be safe
mat_contents = sio.loadmat(mat_file)
for count, row in enumerate(mat_contents['words'][0]):
    word_value = row[0][0][0][0]
    time = row[1][0][0]
    word_tuple = (word_value, time, feature_matrix[count,:])
    words_info.append(word_tuple)

## Align fmri and word features

Still working on this part - Harrison

In [7]:
#class for storing all the information for each sample

class sample: 
    def __init__(self, subj_id, time, input_voxels, output_voxels, input_words, input_word_features, output_words, output_word_features):
        self.subj_id = subj_id #id of subject
        self.time = time #time at which the scan occurred
        self.input_voxels = voxels #2d array of 4 TRs of voxels at time of scan
        self.output_voxels = 
        self.words = words #list of 4 words associated with scan
        self.word_features = word_features #np array of size (4,nFeatures) storing the features for the 4 words
    
    def get_subj_id(self):
        return self.subj_id
    
    def get_time(self):
        return self.time
    
    def get_voxels(self):
        return self.voxels
    
    def get_words(self):
        return self.words
    
    def get_word_features(self):
        return self.word_features

In [24]:
subjects_samples = [[] for i in range(NUM_SUBJS)] #stores lists of all the samples for each subject

#still working on this, need to deal with the issue where a rest happens 
for word_count in range(0,len(words_info),4):
    #gets 4 words and their features
    print(word_count)
    scan_words = []
    start_time = words_info[word_count][1]
    for i in range(4):
        word_info = words_info[word_count + i]
        print(word_info[0])
        print(word_info[1])
        assert word_info[1] == start_time + 0.5*i, "Words are not 0.5 seconds apart"
        scan_words.append(word_info[0])
    #print(scan_words)
    word_features = feature_matrix[word_count:word_count+4, :]
    #gets index of associated fmri
    fmri_time = start_time + 6 #peak is assumed 6 seconds after reading words
    #print(fmri_time)
    fmri_index = fmri_time//2
    print(type(fmri_index))
    assert isinstance(fmri_index, np.int32), "fmri_time is not an exact number"
    for count, subject in enumerate(subjects_fmri):
        #print(fmri_index)
        #print(subject.shape)
        new_sample = sample(count, fmri_time, subject[fmri_index,:], scan_words, word_features)
        subjects_samples[count].append(new_sample)

0
Harry
20
had
20.5
never
21
believed
21.5
<class 'numpy.int32'>
4
he
22
would
22.5
meet
23
a
23.5
<class 'numpy.int32'>
8
boy
24
he
24.5
hated
25
more
25.5
<class 'numpy.int32'>
12
than
26
Dudley,
26.5
but
27
that
27.5
<class 'numpy.int32'>
16
was
28
before
28.5
he
29
met
29.5
<class 'numpy.int32'>
20
Draco
30
Malfoy.
30.5
Still,
31
first-year
31.5
<class 'numpy.int32'>
24
Gryffindors
32
only
32.5
had
33
Potions
33.5
<class 'numpy.int32'>
28
with
34
the
34.5
Slytherins,
35
so
35.5
<class 'numpy.int32'>
32
they
36
didn't
36.5
have
37
to
37.5
<class 'numpy.int32'>
36
put
38
up
38.5
with
39
Malfoy
39.5
<class 'numpy.int32'>
40
much.
40
Or
40.5
at
41
least,
41.5
<class 'numpy.int32'>
44
they
42
didn't
42.5
until
43
they
43.5
<class 'numpy.int32'>
48
spotted
44
a
44.5
notice
45
pinned
45.5
<class 'numpy.int32'>
52
up
46
in
46.5
the
47
Gryffindor
47.5
<class 'numpy.int32'>
56
common
48
room
48.5
that
49
made
49.5
<class 'numpy.int32'>
60
them
50
all
50.5
groan.
51
Flying
51.5
<class 'numpy.

AssertionError: Words are not 0.5 seconds apart