In [1]:
import os
import numpy as np
import json
import argparse
import torch

import config

from utils.utils_stim import get_stim
from utils.utils_resp import get_resp
from utils.utils_ridge.ridge import ridge, bootstrap_ridge
from utils.GPT import GPT
from utils.LMFeatures import LMFeatures

np.random.seed(42)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cpu")

In [3]:
# load gpt - semantic features pulled from gpt intermediary used to train encoding model
with open(os.path.join("./data_lm/", "perceived", "vocab.json"), "r") as f:
    gpt_vocab = json.load(f)
gpt = GPT(path = os.path.join("./data_lm/", "perceived", "model"), vocab = gpt_vocab, device = device)
features = LMFeatures(model = gpt, layer = config.GPT_LAYER, context_words = config.GPT_WORDS)

In [4]:
output = features.make_stim(["Hello", "Bob", "how", "are", "you", "today"])
output.shape

(6, 768)

In [5]:
sessions = [2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 14, 15, 18, 20]

In [6]:
# training stories
stories = []
with open(os.path.join(config.DATA_TRAIN_DIR, "sess_to_story.json"), "r") as f:
    sess_to_story = json.load(f) 
for sess in sessions:
    stories.extend(sess_to_story[str(sess)])

In [7]:
# estimate encoding model
rstim, tr_stats, word_stats = get_stim(stories, features)
print("Pass 1")
rresp = get_resp("S1", stories, stack = True)
print("Pass 2")
nchunks = int(np.ceil(rresp.shape[0] / 5 / config.CHUNKLEN))
weights, alphas, bscorrs = bootstrap_ridge(rstim, rresp, use_corr = False, alphas = config.ALPHAS,
    nboots = config.NBOOTS, chunklen = config.CHUNKLEN, nchunks = nchunks)        
bscorrs = bscorrs.mean(2).max(0)
vox = np.sort(np.argsort(bscorrs)[-config.VOXELS:])

Pass 1
Pass 2


18it [50:31, 168.19s/it]