In [1]:
'''
## Overview
Here I extended the analysis done to HD cells to all recorded ROIs.

## Author
Siyuan Mei (mei@bio.lmu.de)

## Last update
2025-9-11: add docstring for the notebook
'''
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import statsmodels.formula.api as smf
from statsmodels.stats.multitest import multipletests

from tqdm import tqdm

from lotr import LotrExperiment
from lotr.default_vals import TURN_BIAS
from lotr.behavior import get_bouts_props_array
from lotr.utils import convolve_with_tau
from HD_utils.HD_functions import *
from HD_utils.network import *
from HD_utils.IO import *
from HD_utils.defaults import *

from functions_12 import *

fish_num = len(DATA_FOLDERS)
bin_num = 4
tau = 3

# Transform to datafram

In [2]:
fish_exclude = [12, 20, 26, 27] # Based on the result of the previous notebook

In [3]:
# Create a dataframe to store the data for linear regression
df = pd.DataFrame()
df_vel = pd.DataFrame()
for fish in tqdm(range(fish_num)):
    if fish in fish_exclude:
        continue
    exp = LotrExperiment(DATA_FOLDERS[fish])
    cell_num = len(exp.hdn_indexes)
    theta_rotation = get_bouts_props_array(exp.n_pts, exp.bouts_df, min_bias=TURN_BIAS, selection="all", value="bias")
    fictive_head = convolve_with_tau(np.cumsum(theta_rotation), tau * exp.fn, n_kernel_pts=20*exp.fn)
    velocity = convolve_with_tau(theta_rotation, tau * exp.fn, n_kernel_pts=20*exp.fn)
    trace = np.delete(exp.noZ_traces, exp.hdn_indexes, axis=1)
    cell_num = trace.shape[1]

    # generate the fish id and cell id for the df
    fish_tpr = np.array([fish] * (trace.shape[1] * trace.shape[0]), dtype=np.byte)
    fish_vel_tpr = np.array([fish] * trace.shape[0], dtype=np.byte)
    cell_tpr = np.repeat(np.arange(trace.shape[1]), trace.shape[0]).astype(np.short)
    trace_tpr = trace.T.flatten()
    df_tpr = pd.DataFrame({"fish": fish_tpr, "cell": cell_tpr, "activity": trace_tpr})
    df_vel_tpr = pd.DataFrame({"fish": fish_vel_tpr, "angVel": velocity})
    
    df_tpr.dropna(inplace=True, ignore_index=True)
    df_vel_tpr.dropna(inplace=True, ignore_index=True)
    
    df = pd.concat([df,df_tpr], ignore_index=True)
    df_vel = pd.concat([df_vel,df_vel_tpr], ignore_index=True)

100%|██████████| 31/31 [00:19<00:00,  1.55it/s]


In [4]:
df.to_pickle(FISH_RESULT_PATH / "data_ruben_noHD_acv.pkl")
df_vel.to_pickle(FISH_RESULT_PATH / "data_ruben_noHD_vel.pkl")
df

Unnamed: 0,fish,cell,activity
0,0,0,0.712625
1,0,0,0.998196
2,0,0,1.049545
3,0,0,1.049545
4,0,0,1.049545
...,...,...,...
214681895,30,711,1.522629
214681896,30,711,1.503818
214681897,30,711,1.496625
214681898,30,711,1.481557


# Linear regression

In [5]:
df = pd.read_pickle(FISH_RESULT_PATH / "data_ruben_noHD_acv.pkl")
df_vel = pd.read_pickle(FISH_RESULT_PATH / "data_ruben_noHD_vel.pkl")
df_vel['angVel_abs'] = np.abs(df_vel['angVel'])
df_vel['rotation_direction'] = np.sign(df_vel['angVel'])

In [6]:
# Linear regression
models = np.zeros((fish_num), dtype=object)
results = np.zeros((fish_num), dtype=object)
for fish in tqdm(range(fish_num)):
    if fish in fish_exclude:
        continue
    df_fish = df.loc[df['fish'] == fish]
    cell_num = len(df_fish['cell'].unique())
    models_cell = np.zeros((cell_num), dtype=object)
    results_cell = np.zeros((cell_num), dtype=object)
    for celli in range(cell_num):
        df_cell_temp = df_fish.loc[df_fish['cell'] == celli].reset_index(drop=True)
        df_cell_vel = df_vel.loc[df_vel['fish'] == fish].reset_index(drop=True)
        df_cell = pd.merge(df_cell_temp, df_cell_vel, left_index=True, right_index=True)
        if len(df_cell) > 0:
            model = smf.ols("activity ~ angVel_abs + angVel_abs : rotation_direction", data=df_cell)
            result = model.fit()
            
            models_cell[celli] = model
            results_cell[celli] = result
            # print(f'fly {fish+1}, cell {celli+1}:')
            # print(result.summary())
            
    models[fish] = models_cell
    results[fish] = results_cell

100%|██████████| 31/31 [11:19<00:00, 21.90s/it]


# Dataframe of statistical results

In [7]:
fish_col = np.array([])
cell_col = np.array([])

beta1_col = np.array([]) # angVel_abs coeff
beta1_p_col = np.array([]) 
beta1_u_95ci_col = np.array([])
beta1_l_95ci_col = np.array([])

beta2_col = np.array([]) # angVel_abs X rotation_direction coeff
beta2_p_col = np.array([])
beta2_u_95ci_col = np.array([])
beta2_l_95ci_col = np.array([])

offset_col = np.array([])

for fish in tqdm(range(fish_num)):
    if fish in fish_exclude:
        continue
    cell_num = len(df.loc[df['fish'] == fish]['cell'].unique())
    # data for lm
    df_fish = df.loc[df['fish'] == fish]
    models_cell = np.zeros((cell_num), dtype=object)
    results_cell = np.zeros((cell_num), dtype=object)
    for celli in range(cell_num):
        df_cell_temp = df_fish.loc[df_fish['cell'] == celli].reset_index(drop=True)
        df_cell_vel = df_vel.loc[df_vel['fish'] == fish].reset_index(drop=True)
        df_cell = pd.merge(df_cell_temp, df_cell_vel, left_index=True, right_index=True)
        if len(df_cell) > 0:
            model = smf.ols("activity ~ angVel_abs + angVel_abs : rotation_direction", data=df_cell)
            result = model.fit()
            
            models_cell[celli] = model
            results_cell[celli] = result
        # Store result
        fish_col = np.append(fish_col, fish)
        cell_col = np.append(cell_col, celli)
        
        beta1_col = np.append(beta1_col, results_cell[celli].params['angVel_abs'])
        beta1_p_col = np.append(beta1_p_col, results_cell[celli].pvalues['angVel_abs'])
        beta1_u_95ci_col = np.append(beta1_u_95ci_col, results_cell[celli].conf_int().loc['angVel_abs'][1])
        beta1_l_95ci_col = np.append(beta1_l_95ci_col, results_cell[celli].conf_int().loc['angVel_abs'][0])
        
        beta2_col = np.append(beta2_col, results_cell[celli].params['angVel_abs:rotation_direction'])
        beta2_p_col = np.append(beta2_p_col, results_cell[celli].pvalues['angVel_abs:rotation_direction'])
        beta2_u_95ci_col = np.append(beta2_u_95ci_col, results_cell[celli].conf_int().loc['angVel_abs:rotation_direction'][1])
        beta2_l_95ci_col = np.append(beta2_l_95ci_col, results_cell[celli].conf_int().loc['angVel_abs:rotation_direction'][0])

        offset_col = np.append(offset_col, results_cell[celli].params['Intercept'])

stat_df = pd.DataFrame({'fish': fish_col, 'cell': cell_col, 'offset': offset_col,  
                        'beta1': beta1_col, 'beta1_p': beta1_p_col, 'beta1_u_95ci': beta1_u_95ci_col, 'beta1_l_95ci': beta1_l_95ci_col, 
                        'beta2': beta2_col, 'beta2_p': beta2_p_col, 'beta2_u_95ci': beta2_u_95ci_col, 'beta2_l_95ci': beta2_l_95ci_col})

# correct for multiple comparisons
pvals = stat_df['beta1_p']
reject, pvals_corrected, alphacSidak, alphacBonf = multipletests(pvals, alpha=0.05, method='Bonferroni')
stat_df['beta1_p_corrected'] = pvals_corrected

pvals = stat_df['beta2_p']
reject, pvals_corrected, alphacSidak, alphacBonf = multipletests(pvals, alpha=0.05, method='Bonferroni')
stat_df['beta2_p_corrected'] = pvals_corrected

stat_df['ring'] = ['center'] * len(stat_df)
stat_df.loc[    (stat_df['beta2_p_corrected'] < 0.05) & (stat_df['beta2'] > 0)   , 'ring'] = 'right'
stat_df.loc[    (stat_df['beta2_p_corrected'] < 0.05) & (stat_df['beta2'] < 0)   , 'ring'] = 'left'

stat_df = add_coor_to_lm_noHD(stat_df)
stat_df = add_speed_modu_2_lm_df(stat_df)

stat_df['speed_modu'] = pd.Categorical(stat_df['speed_modu'], categories=['Increase', 'None', 'Decrease'], ordered=True)
stat_df['ring'] = pd.Categorical(stat_df['ring'], categories=['center', 'left', 'right'], ordered=True)

stat_df['anatomical_loc'] = np.where(stat_df.x > 0, 'right', 'left')
stat_df.to_pickle(FISH_RESULT_PATH / "ruben_lm_result_noHD.pkl")

100%|██████████| 31/31 [21:12<00:00, 41.05s/it]
