# GLM data prep

Create a table of data for each recording.
Each row is a millisecond (data only from bouts).
Variables include speeds + shifted speeds, curvature of the track, firing rate...


In [1]:
# imports
import sys
import matplotlib.pyplot as plt
from pathlib import Path
import numpy as np
import pandas as pd
from scipy import interpolate
from fcutils.progress import track
from loguru import logger
from scipy import stats
from fcutils.maths.signals import rolling_mean
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

sys.path.append("./")
sys.path.append(r"C:\Users\Federico\Documents\GitHub\pysical_locomotion")


from analysis.ephys.utils import get_recording_names, get_data, get_session_bouts, trim_bouts

save_folder = Path(r"D:\Dropbox (UCL)\Rotation_vte\Locomotion\analysis\ephys")

cache = Path(r"D:\GLM\data")

REGION = "MOs"
recordings = get_recording_names(region=REGION)
recordings

Connecting root@127.0.0.1:3306


## Parameters

In [2]:
curvature_horizon = 30
curvature_sampling_spacing = 5
curv_sample_points = np.arange(0, curvature_horizon+curvature_sampling_spacing, curvature_sampling_spacing)

minimum_bout_ds = 100

track_downsample_factor = 25
firing_rate_gaussian = 250 # width in ms

### Collect data for all bouts

In [6]:
recordings = get_recording_names(region=REGION)

for REC in recordings:
    savepath = cache / f"{REC}_bouts.h5"
    if savepath.exists():
        print(f"{REC}_bouts.h5 already exists")
        continue
    
    print(f"Doing   {REC}")
    
    bouts = trim_bouts(get_session_bouts(REC, complete=None,direction=None))
    n = len(bouts)
    bouts["ds"] = [abs(b.s[-1]-b.s[0]) for i,b in bouts.iterrows()]
    bouts = bouts.loc[bouts.ds >= minimum_bout_ds]


    bouts_files = [f for f in cache.glob(f"{REC}_bout_*.feather") if "xy." not in f.name]
    
    if len(bouts_files) < len(bouts):
        print(f"    Not all bouts were saved for {REC}: {len(bouts_files)}/{len(bouts)}")
        continue

    bouts_data = []
    for i, bout in bouts.iterrows():
        try:
            f = cache / f"{REC}_bout_{bout.start_frame}.feather"
            _data = pd.read_feather(f)
        except:
            logger.warning(f"Failed to read file for bout {i}: {f}")
            continue
        bouts_data.append(_data.astype(np.float32))


    if not len(bouts_data) or len(bouts_data) < len(bouts):
        print(f"    No bouts were saved for {REC}")
        continue
    bouts_data = pd.concat(bouts_data)

    if bouts_data.shape[0] < 10000:
        print(f"    Too few frames for {REC}: {bouts_data.shape[0]}")
        continue

    print(f" Saving data (shape: {bouts_data.shape})")
    bouts_data.to_hdf(savepath, key="data")
    print(" Saved all data")


FC_220408_BAA1101192_hairpin_bouts.h5 already exists
FC_220409_BAA1101192_hairpin_bouts.h5 already exists
FC_220410_BAA1101192_hairpin_bouts.h5 already exists
Doing   FC_220411_BAA1101192_hairpin
 Saving data (shape: (108126, 4045))
 Saved all data
FC_220412_BAA1101192_hairpin_bouts.h5 already exists
FC_220413_BAA1101192_hairpin_bouts.h5 already exists
FC_220414_BAA1101192_hairpin_bouts.h5 already exists
FC_220415_BAA1101192_hairpin_bouts.h5 already exists
FC_220432_BAA1101192_hairpin_bouts.h5 already exists
FC_220433_BAA1101192_hairpin_bouts.h5 already exists
FC_220434_BAA1101192_hairpin_bouts.h5 already exists
FC_220435_BAA1101192_hairpin_bouts.h5 already exists
