This notebook runs the event segmentation analyses.

## Import libraries

In [None]:
import numpy as np
import pandas as pd
import brainiak.eventseg.event
import hypertools as hyp
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.spatial.distance import cdist
import pickle

def score_model(mcorr, model, k, s):
    i1, i2 = np.where(np.round(model.segments_[0])==1)
    w = np.zeros_like(model.segments_[0])
    w[i1,i2] = 1
    w = np.dot(w, w.T).astype(bool)
    return mcorr[w].mean()/mcorr[~w].mean() - k/s
    
def reduce_model(m, ev):
    """Reduce a model based on event labels"""
    w = (np.round(ev.segments_[0])==1).astype(bool)
    return np.array([m[wi, :].mean(0) for wi in w.T])

%matplotlib inline

## Setting paths

In [None]:
datadir = '../../../data/processed/'

## Load data

In [None]:
video_model, recall_models = np.load(datadir+'models_t100_v50_r10.npy')

## Find optimal k for video model

In [None]:
m=[]
corrmat = np.corrcoef(video_model)
for i, events in enumerate(range(1, 51)):
    ev = event.EventSegment(events)
    ev.fit(movie_model)
    t = np.round(ev.segments_[0]).astype(int)
    mask = np.sum(list(map(lambda x: np.outer(x, x), t.T)), 0).astype(bool)
    within = corrmat[mask].mean()
    across = corrmat[~mask].mean()
    m.append((within, across, within/across))

## Plot the within and across correlation values as a function of k

In [None]:
plt.plot(list(map(lambda x: x[0], m)), label='Within-event correlation')
plt.plot(list(map(lambda x: x[1], m)), label='Across-event correlation')
plt.legend()
plt.ylabel('Correlation')
plt.xlabel('Number of events (k)')

## Plot the ratio of within/across ratio as a function of k

In [None]:
t=list(map(lambda x: x[0]/(x[1]-min(m[1])), m))[1:]
t/=np.max(t)
for i, v in enumerate(t):
    t[i]-=i/250
plt.plot(t, label='Within-event correlation')
plt.ylabel('abs(within/across) ratio')
plt.xlabel('Number of events (k)')
maxk_video = np.argmax(t)

## Fit event segmentation model to video

In [None]:
ev = brainiak.eventseg.event.EventSegment(maxk_video)
ev.fit(video_model)
video_events = reduce_model(video_model, ev)

## Save models

In [None]:
# with open('../data/video_eventseg_model', 'wb') as f:
#     pickle.dump(ev, f)
# np.save('../data/video_events', video_events)

## Get video event times

In [None]:
video_event_times = []
for s in ev.segments_[0].T:
    tp = np.where(np.round(s)==1)[0]
    video_event_times.append((tp[0], tp[-1]))
np.save(datadir+'video_event_times', video_event_times)

## Fit event segmentation model to recall

In [None]:
ks = list(range(2, 30))
maxk = []
for i, sub in enumerate(recall_models):
    mcorr = np.corrcoef(sub)
    cs = []
    for k in ks:
        ev = brainiak.eventseg.event.EventSegment(k)
        ev.fit(sub)
        i1, i2 = np.where(np.round(ev.segments_[0])==1)
        w = np.zeros_like(ev.segments_[0])
        w[i1,i2] = 1
        w = np.dot(w, w.T).astype(bool)
        c = mcorr[w].mean()/mcorr[~w].mean() - k/50
        cs.append(c)
    m = ks[np.argmax(cs)]
    maxk.append(m)
    print(i, m)

## Fit model to recall using best k

In [None]:
recall_events = []
recall_event_times = []
recall_eventseg_models = []
for i, k in enumerate(maxk):
    ev = brainiak.eventseg.event.EventSegment(k)
    ev.fit(recall_models[i])
    m = reduce_model(recall_models[i], ev)
    recall_events.append(m)
    recall_times = []
    for s in ev.segments_[0].T:
        tp = np.where(np.round(s)==1)[0]
        recall_times.append((tp[0], tp[-1]))
    recall_event_times.append(recall_times)
    recall_eventseg_models.append(ev)

## Create average recall model

In [None]:
matches = np.array([np.argmax(1 - cdist(video_events, r, 'correlation'), 0) for r in recall_events])
avg_recalls = [[] for _ in video_events]
for match, r in zip(matches, recall_events):
    for i, m in enumerate(match):
        avg_recalls[m].append(r[i,:])
avg_recall_events = np.array(list(map(lambda r: np.mean(r, 0) if len(r)>0 else np.zeros((100,)), avg_recalls)))
# avg_recall_events = np.array([a.reshape(100,) for a in avg_recall_events if a.shape==(100,)])

## Create 2D embeddings

In [None]:
np.random.seed(10)
embeddings = hyp.reduce(recall_events+[video_events]+[avg_recall_events], reduce='UMAP', ndims=2)

## Save models

In [None]:
# np.save(datadir+'avg_recall_events', avg_recall_events)
# np.save(datadir+'embeddings', [embeddings[:-2], embeddings[-2], embeddings[:-1]])
# np.save(datadir+'labels', matches)
# np.save(datadir+'recall_events', recall_events)
# np.save(datadir+'recall_event_times', recall_event_times)
# with open(datadir+'recall_eventseg_models', 'wb') as f:
#     pickle.dump(recall_eventseg_models, f)