# Tuning curves data generation
Get data saved by the GLM data generation script. 
Each recording has a .h5 file were each row is a millisecond and each column the value of a behavioral variable or the firing rate of a (shuffled/real) unit. 

This script bins stuff based on the values of the behavioral variables and then takes chunks from each bin of equal duration.

In [1]:
# imports
import sys

from pathlib import Path
import numpy as np
import pandas as pd
import warnings 
import matplotlib.pyplot as plt

warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

sys.path.append("./")
sys.path.append(r"C:\Users\Federico\Documents\GitHub\pysical_locomotion")

from analysis.ephys.utils import get_recording_names, get_data, get_session_bouts, trim_bouts

cache = Path(r"D:\Dropbox (UCL)\Rotation_vte\Locomotion\analysis\ephys\tuning_curves\cache")
glm_cache = Path(r"D:\Dropbox (UCL)\Rotation_vte\Locomotion\analysis\ephys\GLM\data")



Connecting root@127.0.0.1:3306


### Bin data

First bin based on the values of each selected variable, then for each bin group the data into chunks of N frames and take the average and variance of the firing rate in each group.

In [2]:
def sample_and_bin(rec_data, var, bins):
    chunk_size = int(2.5 * 100)  # chunks will be 2.5 seconds long
    print(f"    binning {var}")
    data = rec_data.copy().reset_index()
    data.sdot = data.sdot * 60

    # bin based on the variable of interest
    _bins = pd.cut(data[var], bins[var])

    # keep only bins with at least 1 second
    groups = data.groupby(_bins)
    gc = data.groupby(_bins).count()
    kept = gc.loc[gc[gc.columns[200]] >= chunk_size]

    data['bin'] = [b.mid if isinstance(b, pd.Interval) else np.nan for b in _bins.values]
    data['_bin'] = _bins.values
    data = data.loc[data._bin.isin(kept.index)]
    print(f"        n groups: {len(groups)}")
    
    binned_sampled_mean = []
    for n, (i, group) in enumerate(groups):
        group.reset_index(inplace=True, drop=True)
        # split into groupps of equal length
        splits = group.groupby(group.index // chunk_size)
        print(f"            group [{n+1}] {i}: n splits {len(splits)}")
        if group.empty:
            continue
        
        for _, split in splits:
            if len(split) < chunk_size:
                continue
            binned_sampled_mean.append(split.mean())
        
    # get mean/var in each group
    groups_mean = groups.mean().reset_index(drop=True)
    groups_var = groups.var().reset_index(drop=True)
    samples_mean = pd.concat(binned_sampled_mean, axis=1).T if len(binned_sampled_mean) > 0 else pd.DataFrame()
    return samples_mean, groups_mean, groups_var


bins = dict(
    s = np.linspace(0, 260, 21),
    v = np.linspace(10, 80, 21),
    dv_250ms = np.linspace(-30, 30, 21),
    dv_500ms = np.linspace(-30, 30, 21),
    dv_1000ms = np.linspace(-30, 30, 21),
    omega = np.linspace(-350, 350, 21),
    domega_250ms = np.linspace(-250, 250, 21),
    domega_500ms = np.linspace(-250, 250, 21),
    domega_1000ms = np.linspace(-250, 250, 21),
    curv_0cm = np.linspace(-.20, .20, 21),
    curv_10cm = np.linspace(-.20, .20, 21),
    curv_20cm = np.linspace(-.20, .20, 21),
    curv_30cm = np.linspace(-.20, .20, 21),
)



In [3]:


for rec in get_recording_names():
    try:
        rec_data = pd.read_hdf(glm_cache / (rec + "_bouts.h5"), key="data").reset_index(drop=True)
        print(f"Doing {rec} - got data")
    except:
        print(f"Doing {rec} - no data")
        continue
    
    # bin every 10ms
    rec_data = rec_data.groupby(rec_data.index // 10).mean()

    # plot histograms of variables distributions
    # f, axes = plt.subplots(4, 5, figsize=(20, 10))
    # rec_data_ = rec_data[rec_data.columns[:17]]
    # _ = rec_data_.hist(bins=100, ax=axes.flatten()[:17])
    
    # bin the recording data for each variable
    for k in bins.keys():
        mu, groups_mean, groups_var = sample_and_bin(rec_data, k, bins)
        mu.to_hdf(cache / (f"{rec}_{k}_mu.h5"), key="hdf")
        groups_mean.to_hdf(cache / (f"{rec}_{k}_groups_mean.h5"), key="hdf")
        groups_var.to_hdf(cache / (f"{rec}_{k}_groups_var.h5"), key="hdf")
        # sigma.to_hdf(cache / (f"{rec}_{k}_sigma.h5"), key="hdf")

    # break


Doing FC_220408_BAA1101192_hairpin - got data
    binning s
        n groups: 20
            group [1] (0.0, 13.0]: n splits 1
            group [2] (13.0, 26.0]: n splits 1
            group [3] (26.0, 39.0]: n splits 1
            group [4] (39.0, 52.0]: n splits 2
            group [5] (52.0, 65.0]: n splits 2
            group [6] (65.0, 78.0]: n splits 2
            group [7] (78.0, 91.0]: n splits 3
            group [8] (91.0, 104.0]: n splits 2
            group [9] (104.0, 117.0]: n splits 2
            group [10] (117.0, 130.0]: n splits 2
            group [11] (130.0, 143.0]: n splits 3
            group [12] (143.0, 156.0]: n splits 3
            group [13] (156.0, 169.0]: n splits 2
            group [14] (169.0, 182.0]: n splits 2
            group [15] (182.0, 195.0]: n splits 2
            group [16] (195.0, 208.0]: n splits 2
            group [17] (208.0, 221.0]: n splits 2
            group [18] (221.0, 234.0]: n splits 2
            group [19] (234.0, 247.0]: n spl