# GLM data prep

Create a table of data for each recording.
Each row is a millisecond (data only from bouts).
Variables include speeds + shifted speeds, curvature of the track, firing rate...


In [2]:
# imports
import sys
import matplotlib.pyplot as plt
from pathlib import Path
import numpy as np
import pandas as pd
from scipy import interpolate
from fcutils.progress import track

sys.path.append("./")
sys.path.append(r"C:\Users\Federico\Documents\GitHub\pysical_locomotion")


from analysis.ephys.utils import get_recording_names, get_data, get_session_bouts, trim_bouts

save_folder = Path(r"D:\Dropbox (UCL)\Rotation_vte\Locomotion\analysis\ephys")


Connecting root@127.0.0.1:3306


## Parameters

In [3]:
curvature_horizon = 30
curvature_sampling_spacing = 5
curv_sample_points = np.arange(0, curvature_horizon+curvature_sampling_spacing, curvature_sampling_spacing)

track_downsample_factor = 25

firing_rate_gaussian = 100 # width in ms

### Track curvature
Sample the track curvature for N future positions given each track position

In [4]:
track_data = pd.read_json(r"C:\Users\Federico\Documents\GitHub\pysical_locomotion\analysis\ephys\track.json").iloc[::track_downsample_factor]
track_data = track_data.reset_index(drop=True)
S_f = track_data.S.values[-1]
track_data

Unnamed: 0,S,X,Y,curvature,width,θ
0,0.000000,20.005419,32.926672,1.397851e-08,3.600000,-1.562696
1,0.034248,20.005696,32.892425,2.407125e-04,3.596612,-1.562692
2,0.068464,20.005974,32.858210,4.834723e-04,3.590541,-1.562680
3,0.102616,20.006251,32.824059,7.303402e-04,3.584550,-1.562659
4,0.136672,20.006529,32.790004,9.834296e-04,3.578645,-1.562630
...,...,...,...,...,...,...
3996,265.273224,3.999664,7.342840,3.530815e-05,3.600000,-1.570627
3997,265.334935,3.999675,7.281129,2.648609e-05,3.600000,-1.570625
3998,265.396642,3.999685,7.219422,1.766084e-05,3.600000,-1.570624
3999,265.458347,3.999696,7.157717,8.831522e-06,3.600000,-1.570623


In [5]:
# load track from json
k_shifts = np.arange(curvature_horizon+1)
curv_shifted = {
    **{f"k_{k}":[] for k in k_shifts},
    **{f"idx_{k}":[] for k in k_shifts},
}
for i, s in enumerate(track_data.S):
    for k in k_shifts:
        if s + k < S_f:
            select = track_data.loc[track_data.S >= s + k]
            curv_shifted[f"idx_{k}"].append(select.index[0])
            curv_shifted[f"k_{k}"].append(select["curvature"].iloc[0])
        else:
            curv_shifted[f"k_{k}"].append(np.nan)
            curv_shifted[f"idx_{k}"].append(np.nan)

    # break

for k,v in curv_shifted.items():
    track_data.insert(2, k, v)
track_data.head()

## Process data

In [None]:

def upsample_frames_to_ms(var):
    """
        Interpolates the values of a variable expressed in frams (60 fps)
        to values expressed in milliseconds.
    """
    t_60fps = np.arange(len(var)) / 60
    f = interpolate.interp1d(t_60fps, var)

    # t_1000fps = np.arange(0, len(var)) * 1000 / 60
    t_1000fps = np.arange(0, t_60fps[-1], step=1/1000)
    interpolated_variable_values = f(t_1000fps)
    return interpolated_variable_values


In [None]:

def gaussian(x, s):
    return (
        1.0
        / np.sqrt(2.0 * np.pi * s ** 2)
        * np.exp(-(x ** 2) / (2.0 * s ** 2))
    )


def calc_firing_rate(spikes_train: np.ndarray, dt: int = 10):
    """
        Computes the firing rate given a spikes train (wether there is a spike or not at each ms).
        Using a gaussian kernel with standard deviation = dt/2 [dt is in ms]
    """
    # create kernel & get area under the curve
    k = np.array([gaussian(x, dt / 2) for x in np.linspace(-2*dt, 2*dt, dt)])
    auc = np.trapz(k)

    # get firing rate
    frate = (
        np.convolve(spikes_train, k, mode="same") / auc * 1000
    )  # times 1000 to go from ms to seconds
    return frate


def make_shuffled_units(units):
    """
        For each unit make shuffled copies in which 
        the firing rate is offset by some ammount looping
        around the start/end of the session
    """
    N = 100
    shuffled_units = dict(unit_id=[], firing_rate_ms=[])
    for i, unit in units.iterrows():
        for n in range(N):
            shuffle = np.random.randint(10 * 1000, 30 * 1000)  # shuffle between 10 and 30 seconds
            shuffled_units["unit_id"].append(f"{unit.unit_id}_shuffle_{n}")
            frate = unit.firing_rate_ms
            frate = np.hstack([frate[shuffle:], frate[:shuffle]])
            shuffled_units["firing_rate_ms"].append(frate)

    # merge units and shuffle units in a single dataframe
    shuffled_units = pd.DataFrame(shuffled_units)
    units = pd.concat([units, shuffled_units], ignore_index=True)
    return units

NameError: name 'np' is not defined

#### main data loader

In [None]:
# upsample
def load_get_recording_data(REC): 
    # load data
    units, left_fl, right_fl, left_hl, right_hl, body = get_data(REC)


    out_bouts = get_session_bouts(REC, complete=None)
    in_bouts = get_session_bouts(REC, direction="inbound", complete=None)

    v = upsample_frames_to_ms(body.speed)
    omega = upsample_frames_to_ms(body.thetadot)

    # get time shifted
    shift_nframes_250ms = int(250 / 1000 * 60)
    shift_nframes_500ms = int(500 / 1000 * 60)
    shift_nframes_1000ms = int(1000 / 1000 * 60)

    v_250ms = np.hstack([v[shift_nframes_250ms:], v[shift_nframes_250ms] * np.ones(shift_nframes_250ms)])
    omega_250ms = np.hstack([omega[shift_nframes_250ms:], omega[shift_nframes_250ms] * np.ones(shift_nframes_250ms)])
    v_500ms = np.hstack([v[shift_nframes_500ms:], v[shift_nframes_500ms] * np.ones(shift_nframes_500ms)])
    omega_500ms = np.hstack([omega[shift_nframes_500ms:], omega[shift_nframes_500ms] * np.ones(shift_nframes_500ms)])
    v_1000ms = np.hstack([v[shift_nframes_1000ms:], v[shift_nframes_1000ms] * np.ones(shift_nframes_1000ms)])
    omega_1000ms = np.hstack([omega[shift_nframes_1000ms:], omega[shift_nframes_1000ms] * np.ones(shift_nframes_1000ms)])

    # get unit firing rate in milliseconds
    units = units.loc[units.brain_region.isin(["MOs", "MOs1", "MOs2/3", "MOs5", "MOs6a", "MOs6b"])]
    frates = []
    for i, unit in units.iterrows():
        time = np.zeros(len(v))
        time[np.int64(np.round(unit.spikes_ms))] = 1
        frates.append(calc_firing_rate(time, dt=firing_rate_gaussian))
    units["firing_rate_ms"] = frates
    units = units[["unit_id", "firing_rate_ms"]]

    # add shuffled units
    units = make_shuffled_units(units)

    return units, body, pd.concat([out_bouts, in_bouts]), v, omega, v_250ms, omega_250ms, v_500ms, omega_500ms, v_1000ms, omega_1000ms

### Collect data for all bouts

In [None]:
recordings = get_recording_names()
doing = 0
REC = recordings[doing]

units, body, bouts, v, omega, v_250ms, omega_250ms, v_500ms, omega_500ms, v_1000ms, omega_1000ms = load_get_recording_data(REC)

data = {
    **dict(
        v=[],
        dv_250ms=[],
        dv_500ms=[],
        dv_1000ms=[],
        omega=[],
        domega_250ms=[],
        domega_500ms=[],
        domega_1000ms=[],
    ),
    **{f"curv_{k}cm":[] for k in curv_sample_points},
    **{unit:[] for unit in units.unit_id.values},
}

data.keys()


3224484 3224484
3224484 3224484
3224484 3224484
3224484 3224484
3224484 3224484
3224484 3224484
3224484 3224484
3224484 3224484
3224484 3224484
3224484 3224484
3224484 3224484
3224484 3224484
3224484 3224484
3224484 3224484
3224484 3224484
3224484 3224484
3224484 3224484
3224484 3224484
3224484 3224484
3224484 3224484
3224484 3224484
3224484 3224484
3224484 3224484
3224484 3224484
3224484 3224484
3224484 3224484
3224484 3224484
3224484 3224484
3224484 3224484
3224484 3224484
3224484 3224484
3224484 3224484
3224484 3224484


In [None]:
for i, bout in track(bouts.iterrows(), total=len(bouts)):
    S = upsample_frames_to_ms(bout.s)
    start_ms = int(bout.start_frame / 60 * 1000)
    end_ms = start_ms + len(S)
    data['v'].extend(v[start_ms : end_ms])
    data['dv_250ms'].extend(v_250ms[start_ms : end_ms])
    data['dv_500ms'].extend(v_500ms[start_ms : end_ms])
    data['dv_1000ms'].extend(v_1000ms[start_ms : end_ms])
    data['omega'].extend(omega[start_ms : end_ms])
    data['domega_250ms'].extend(omega_250ms[start_ms : end_ms])
    data['domega_500ms'].extend(omega_500ms[start_ms : end_ms])
    data['domega_1000ms'].extend(omega_1000ms[start_ms : end_ms])

    for i, unit in units.iterrows():
        data[unit.unit_id].extend(unit.firing_rate_ms[start_ms : end_ms])

    for k_cm in curv_sample_points:
        for s in S:
            idx = np.argmin((track_data.S - s)**2)
            data[f"curv_{k_cm}cm"].append(track_data[f"k_{k_cm}"][idx])

    lengths = set([len(v) for v in data.values()])
    if len(lengths) > 1:
        lns = {k:len(v) for k,v in data.items()}        
        raise ValueError(f"Lengths of data are not the same:\n{lns}")

    break

{k:len(v) for k,v in data.items()}
