In [None]:
'''
## Overview
Here I applies the linear regression.
I predct each ROI's activity using the angular head velocity and 
the angular head speed to detect the three types of rings: left, right,
or symmetric.

See Manuscript for more implementation details.

## Author
Siyuan Mei (mei@bio.lmu.de)

## Last update
2025-9-11
'''

import numpy as np
import pandas as pd
from tqdm import tqdm
import statsmodels.formula.api as smf

from HD_utils.IO import *
from HD_utils.lm import *

# Linear Regression

In [2]:
df = pd.read_csv(FLY_RESULT_PATH / 'data_PB_roi.csv')
df['angVel_abs'] = np.abs(df['angVel'])
df['cos_diff'] = np.cos(df['preferred_angle'] - df['netPhase'])
df[df['gain'] == '2x'] = np.nan
df.dropna(inplace=True)

In [4]:
# Linear regression


fly_num = len(df['fly'].unique())
models = np.zeros((fly_num), dtype=object)
results = np.zeros((fly_num), dtype=object)
for fly_id in tqdm(range(fly_num)):
    df_fly = df.loc[df['fly_id'] == fly_id]
    roi_num = len(df_fly['roi'].unique())
    models_roi = np.zeros((roi_num), dtype=object)
    results_roi = np.zeros((roi_num), dtype=object)
    for roi_id in range(roi_num):
        df_roi = df_fly.loc[df_fly['roi_id'] == roi_id]
        model = smf.ols("activity ~ angVel_abs + angVel + cos_diff \
            + gain + gain:angVel_abs", data=df_roi)
        result = model.fit()
        models_roi[roi_id] = model
        results_roi[roi_id] = result
            
    models[fly_id] = models_roi
    results[fly_id] = results_roi

100%|██████████| 14/14 [00:10<00:00,  1.31it/s]


In [7]:
from statsmodels.stats.multitest import multipletests

fly_col = np.array([])
roi_col = np.array([])

beta1_col = np.array([]) # angVel_abs coeff
beta1_p_col = np.array([]) 
beta1_u_95ci_col = np.array([])
beta1_l_95ci_col = np.array([])

beta2_col = np.array([]) # angVel_abs X rotation_direction coeff
beta2_p_col = np.array([])
beta2_u_95ci_col = np.array([])
beta2_l_95ci_col = np.array([])

offset_col = np.array([])

beta3_col = np.array([]) # cos_diff coeff
beta3_p_col = np.array([])
beta3_u_95ci_col = np.array([])
beta3_l_95ci_col = np.array([])

for fly_id in tqdm(range(fly_num)):
    roi_num = len(df.loc[df['fly_id'] == fly_id]['roi_id'].unique())
    for roi_id in range(roi_num):
        if results[fly_id][roi_id] == 0:
            continue
        fly_col = np.append(fly_col, fly_id)
        roi_col = np.append(roi_col, roi_id)
        
        beta1_col = np.append(beta1_col, results[fly_id][roi_id].params['angVel_abs'])
        beta1_p_col = np.append(beta1_p_col, results[fly_id][roi_id].pvalues['angVel_abs'])
        beta1_u_95ci_col = np.append(beta1_u_95ci_col, results[fly_id][roi_id].conf_int().loc['angVel_abs'][1])
        beta1_l_95ci_col = np.append(beta1_l_95ci_col, results[fly_id][roi_id].conf_int().loc['angVel_abs'][0])
        
        beta2_col = np.append(beta2_col, results[fly_id][roi_id].params['angVel'])
        beta2_p_col = np.append(beta2_p_col, results[fly_id][roi_id].pvalues['angVel'])
        beta2_u_95ci_col = np.append(beta2_u_95ci_col, results[fly_id][roi_id].conf_int().loc['angVel'][1])
        beta2_l_95ci_col = np.append(beta2_l_95ci_col, results[fly_id][roi_id].conf_int().loc['angVel'][0])

        offset_col = np.append(offset_col, results[fly_id][roi_id].params['Intercept'])

        beta3_col = np.append(beta3_col, results[fly_id][roi_id].params['cos_diff'])
        beta3_p_col = np.append(beta3_p_col, results[fly_id][roi_id].pvalues['cos_diff'])
        beta3_u_95ci_col = np.append(beta3_u_95ci_col, results[fly_id][roi_id].conf_int().loc['cos_diff'][1])
        beta3_l_95ci_col = np.append(beta3_l_95ci_col, results[fly_id][roi_id].conf_int().loc['cos_diff'][0])

stat_df = pd.DataFrame({'fly_id': fly_col, 'roi_id': roi_col, 'offset': offset_col,  
                        'beta1': beta1_col, 'beta1_p': beta1_p_col, 'beta1_u_95ci': beta1_u_95ci_col, 'beta1_l_95ci': beta1_l_95ci_col, 
                        'beta2': beta2_col, 'beta2_p': beta2_p_col, 'beta2_u_95ci': beta2_u_95ci_col, 'beta2_l_95ci': beta2_l_95ci_col,
                        'beta3': beta3_col, 'beta3_p': beta3_p_col, 'beta3_u_95ci': beta3_u_95ci_col, 'beta3_l_95ci': beta3_l_95ci_col})

p_names = ['beta1_p', 'beta2_p', 'beta3_p']
corp_suffix = ['_bonf', '_fdrbh']
methods = ['Bonferroni', 'fdr_bh']
for i in range(len(p_names)):
    for j in range(len(corp_suffix)):
        correct_pval(stat_df, p_names[i], p_names[i] + corp_suffix[j], methods[j])

# Assign ring based on p-value
stat_df['ring_bonf'] = ['central'] * len(stat_df)
stat_df.loc[    (stat_df['beta2_p_bonf'] < 0.05) & (stat_df['beta2'] > 0)   , 'ring_bonf'] = 'right'
stat_df.loc[    (stat_df['beta2_p_bonf'] < 0.05) & (stat_df['beta2'] < 0)   , 'ring_bonf'] = 'left'

stat_df['ring_fdrbh'] = ['central'] * len(stat_df)
stat_df.loc[    (stat_df['beta2_p_fdrbh'] < 0.05) & (stat_df['beta2'] > 0)   , 'ring_fdrbh'] = 'right'
stat_df.loc[    (stat_df['beta2_p_fdrbh'] < 0.05) & (stat_df['beta2'] < 0)   , 'ring_fdrbh'] = 'left'

# Add cell type
df_reduce = df.groupby(['fly_id', 'roi_id'])[['cell_type', 'preferred_angle']].max().reset_index()
stat_df = stat_df.merge(df_reduce, on=['fly_id', 'roi_id'], how='left')

# Simplify cell type names
stat_df['cell_type_s'] = stat_df['cell_type'].copy()
stat_df.loc[stat_df['cell_type'] == 'EPG_L', 'cell_type_s'] = 'EPG'
stat_df.loc[stat_df['cell_type'] == 'EPG_R', 'cell_type_s'] = 'EPG'

stat_df.to_csv(FLY_RESULT_PATH / "lm_PB_roi_netcosdif.csv", index=False)

  0%|          | 0/14 [00:00<?, ?it/s]

100%|██████████| 14/14 [00:01<00:00, 11.90it/s]


In [6]:
confusion_matrix = pd.crosstab(stat_df['ring_fdrbh'], stat_df['cell_type_s'])
confusion_matrix

cell_type_s,EPG,PEN_L,PEN_R
ring_fdrbh,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
central,138,31,30
left,33,77,3
right,53,4,79
