In [1]:
import os
import time
import numpy as np

import pymc3 as pm
import theano
import matplotlib.pyplot as plt

from utils.helpers import *
from utils.plotting import *
from utils.glm_models import *

import sampling.seq_gen as sg
import sampling.seq_analysis as sa

# Import relevant SBL modules
import sbl_agents.sbl_cat_dir as sbl_cd
import sbl_agents.sbl_hmm as sbl_hmm

# Set random seed for replicability and define directories
np.random.seed(seed=1234)
results_dir = os.getcwd() + "/results/"
fig_dir = os.getcwd() + "/figures/"
data_dir = os.getcwd() + "/data/"

In [92]:
# General Model Settings
model_types = ["SP", "AP", "TP"]
save_results = True
verbose = True

sample_files = [["sub-01/sub-01_ses-1_run-1", "sub-01/sub-01_ses-1_run-2",
                 "sub-01/sub-01_ses-1_run-3", "sub-01/sub-01_ses-1_run-4",
                 "sub-01/sub-01_ses-1_run-5"]]

subject_list = range(len(sample_files))

eeg_files = ["sub-01/sub-01_sbl"]

elec_of_interest = {"FCz": 47, "FC2": 46, "FC4": 45, 
                    "Cz": 48, "C2": 49, "C4": 50,
                    "C6": 51, "CPz": 32, "CP2": 56,
                    "CP4": 55, "CP6": 54}

trial_coding_lookup = {11: "First Regime - Low Intensity",
                       12: "First Regime - High Intensity",
                       21: "Second Regime - Low Intensity",
                       22: "Second Regime - High Intensity",
                       33: "Catch Trial"}

# Select block and electrode for analysis
sampling_rate = 0.3
inter_stim_interval = np.array([-0.05, 0.65])
reg_model_type = "OLS"
template = "Subject {} | Block {} | Electrode {} | Regressor: {} | Time: {:.2f}"

In [93]:
# Subset for proto-typing
sampling_rate = 0.1
inter_stim_interval = np.array([0.1, 0.5])
eoi = {"Cz": 48, "C2": 49, "C4": 50}

**Loops to Run**
- ~15 Subjects, 5 blocks, ~10 Electrodes
- ~30 Different Models, ~359 Sample Point per event

**TODO**
- [x] HDF5 logging
- Check if $\tau$ is working
- Get CS running
- Get HMM running
- Write a general function to get regressors

In [94]:
import h5py
import tables

class ExperimentLog():
    def __init__(self, num_subjects, num_blocks, elec_of_interest,
                 save_fname=None):
        self.save_fname = save_fname
        
        if os.path.exists(self.save_fname):
            os.remove(self.save_fname)
        
        # Initialize all the groups in hdf5 object
        h5f = tables.open_file(self.save_fname, mode="a")
        for i in range(num_subjects):
            sub_i = h5f.create_group("/", "subject_" + str(subject_id))
            for j in range(num_blocks):
                block_j = h5f.create_group("/" + "subject_" + str(i),  "block_" + str(j))
                for elec_name, elec_num in elec_of_interest.items():
                    elec = h5f.create_group("/" + "subject_" + str(i) + "/block_" + str(j), "elec_" + elec_name)
        h5f.flush()
        h5f.close()

    def dump_data(self, subject_id, block_id,
                  elec_name, regressor_type, results):

        h5f = tables.open_file(self.save_fname, mode="a")
        atom = tables.Atom.from_dtype(results.dtype)
        group_name = "subject_" + str(subject_id) + "/block_" + str(block_id) + "/elec_" + elec_name
        group = h5f.root[group_name]
        d = h5f.create_carray(group, regressor_type, atom,
                              results.shape)
        d[...] = results[...]

        h5f.flush()
        h5f.close()

In [98]:
# Create Logging object
log = ExperimentLog(num_subjects=1, num_blocks=5,
                    elec_of_interest=eoi,
                    save_fname=results_dir + "trial_by_trial.hdf5")

In [99]:
h5f = tables.open_file(results_dir + "trial_by_trial.hdf5", mode="a")
h5f.root["subject_0/block_0"]

/subject_0/block_0 (Group) ''
  children := ['elec_C2' (Group), 'elec_C4' (Group), 'elec_Cz' (Group)]

In [None]:
# Loop over SUBJECTS
for subject_id in subject_list:
    # print("Subject {}: LME Trial-by-Trial Analysis".format(subject_id + 1))
    eeg_data = sio.loadmat("data/" + eeg_files[subject_id] + ".mat")
    
    # Loop over BLOCKS
    for block_id in range(len(sample_files[subject_id])):
        # print("\t Block {}: Load Seq, Comp Regressors".format(block_id + 1))
        
        # Load in the specific trial/stimuli sequence
        sample, meta = load_obj("data/" + sample_files[subject_id][block_id] + ".mat")
        seq, hidden = sample[:, 2], sample[:, 1]
    
        # Compute Surprise Regressors
        CD_PS_SP, CD_BS_SP, CD_CS_SP = sbl_cd.main(seq, hidden, tau=0, model_type="SP")
        CD_PS_AP, CD_BS_AP, CD_CS_AP = sbl_cd.main(seq, hidden, tau=0, model_type="AP")
        CD_PS_TP, CD_BS_TP, CD_CS_TP = sbl_cd.main(seq, hidden, tau=0, model_type="TP")

        # regressors = {"PS-SP": PS_SP, "BS-SP": BS_SP, "CS-SP": CS_SP,
        #               "PS-AP": PS_AP, "BS-AP": BS_AP, "CS-AP": CS_AP,
        #               "PS-TP": PS_TP, "BS-TP": BS_TP, "CS-TP": CS_TP}

        regressors = {"CD_PS_SP": CD_PS_SP, "CD_BS-SP": CD_BS_SP,
                      "CD_PS_AP": CD_PS_AP, "CD-BS-AP": CD_BS_AP,
                      "CD_PS_TP": CD_PS_TP, "CD-BS-TP": CD_BS_TP}
        
        # Loop over ELECTRODES OF INTEREST
        for elec_name, elec_id in eoi.items():
            # print("\t \t Electrode {}: Select EEG".format(elec_name))
            
            # Get the block- and electrode-specific eeg data
            y_elec, y_tw = get_electrode_data(eeg_data, block_id, elec_id,
                                              inter_stim_interval, sampling_rate,
                                              verbose=False)
            # Get null model once for a block 
            # (PS-AP as filler - parallelize does not work with None)
            start = time.time()
            null_model_lme = parallelize_over_samples(y_elec,
                                                      regressor=regressors["CD-PS-AP"],
                                                      reg_model_type="Null")
            t_time = time.time() - start
            
            log.dump_data(subject_id, block_id, elec_name, "Null", null_model_lme)
            log.dump_data(subject_id, block_id, elec_name, "Sample_Points", y_tw)
            print(template.format(subject_id+1, block_id+1, elec_name, "Null", t_time))
           
            # Loop over Different SURPRISE REGRESSORS
            for regressor_type, regressor in regressors.items():
                # print("\t \t \t Model {}: Run in Parallel".format(regressor_type))
                
                start = time.time()
                results = parallelize_over_samples(y_elec, regressor, reg_model_type)
                t_time = time.time() - start
                
                log.dump_data(subject_id, block_id, elec_name, regressor_type, results)
                print(template.format(subject_id+1, block_id+1, elec_name, regressor_type, t_time))

Subject 1 | Block 1 | Electrode Cz | Regressor: Null | Time: 80.56
Subject 1 | Block 1 | Electrode Cz | Regressor: CD-PS-SP | Time: 85.11
Subject 1 | Block 1 | Electrode Cz | Regressor: CD-BS-SP | Time: 85.83




In [None]:
y_labels = ["PS_SP", "BS_SP", "CS_SP",
            "PS_AP", "BS_AP", "CS_AP",
            "PS_TP", "BS_TP", "CS_TP"]
x_labels = ["Block 1", "Block 2", "Block 3", "Block 4"]

heat_data = np.array(list(log_model_evidences.values()))


fig, ax = plt.subplots()
im = ax.imshow(heat_data, cmap="Reds")

# We want to show all ticks...
ax.set_xticks(np.arange(len(x_labels)))
ax.set_yticks(np.arange(len(y_labels)))
# ... and label them with the respective list entries
ax.set_xticklabels(x_labels)
ax.set_yticklabels(y_labels)

# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
         rotation_mode="anchor")

ax.set_title("Log Model Evidences: FCz")