In [None]:
import os
import sys
import numpy as np
import pandas as pd

In [None]:
from joblib import Parallel, delayed

In [None]:
sys.path.append('../utils/')
from dlc_helper import DLC_tracking

In [None]:
import itertools

In [None]:
from scipy.stats import mannwhitneyu

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Load arena information

In [None]:
df_meta = pd.read_pickle('../../data/amphioxus_metadata_arena.pickle')

In [None]:
df_meta.columns

In [None]:
df_meta['filename_video'][0]

In [None]:
def calc_distance(x1,y1,x2,y2):
    return np.sqrt((x2 - x1)**2 + (y2 - y1)**2)   

In [None]:
def calc_displacements(df_xy):
    xy_vals = df_xy.values
    xy_diff = np.diff(xy_vals, axis=0, prepend=xy_vals[0,:].reshape((1,2)))
    xy_disp = np.linalg.norm(xy_diff, axis=1)
    
    return xy_disp
    

In [None]:
def remove_outlier_IQR(df_in, col):
    
    Q1 = df_in[col].quantile(0.25)
    Q3 = df_in[col].quantile(0.75)
    IQR = Q3 - Q1 
    
    thresh_low  = Q1 - 1.7 * IQR
    thresh_high = Q3 + 1.7 * IQR
    
    df_in.loc[((df_in[col] < thresh_low) | (df_in[col] > thresh_high)), 'displacement'] = np.nan
    
    return df_in

In [None]:
def get_thigmo_info(meta_row):
    
    # arena data
    x0 = meta_row['arena_x']
    y0 = meta_row['arena_y']
    radius = meta_row['arena_r']
    thresh_rad = radius * (1/np.sqrt(2))
        
    # data from DLC 
    dlc_path = meta_row['dlc_result_file']
    dlc_folder, dlc_filename = os.path.split(dlc_path)
    dlc_obj = DLC_tracking(dlc_filename, dlc_folder)
    
    df_thigmo = dlc_obj.df_data[['frame', 'NT_x', 'NT_y']]
    
    df_thigmo.loc[:,'filename'] = meta_row['filename_video'].split('.avi')[0]
    df_thigmo.loc[:,'dist_from_center'] = df_thigmo.apply(lambda x: calc_distance(x0,y0,x.NT_x,x.NT_y), axis=1)
    df_thigmo.loc[:,'is_in_outer_zone'] = df_thigmo['dist_from_center'].apply(lambda x: 1 if x >= radius else 0)
    
    df_thigmo.loc[:,'displacement'] = calc_displacements(df_thigmo[['NT_x', 'NT_y']])
    # should the outliers be removed at this point ?      
    
    return  df_thigmo

In [None]:
list_df_thigmos = Parallel(n_jobs=40, verbose = 5)(delayed(get_thigmo_info)(row) 
                                                for index, row in df_meta.iterrows())

In [None]:
df_thigmo_combined = pd.concat(list_df_thigmos)

In [None]:
df_thigmo_combined

In [None]:
df_thigmo_combined = remove_outlier_IQR(df_thigmo_combined, 'displacement')
df_thigmo_combined

In [None]:
df_thigmo_combined.to_hdf('../../results/metadata_thigmotaxis_all.h5', key='thigmo')

# Control data

In [None]:
df_results_control = pd.read_hdf('../../results/UMAP_HDBSCANclustering_withWV_31072023_1135.h5')

In [None]:
df_results_control.columns

In [None]:
df_results_thigmo = df_results_control.merge(df_thigmo_combined, on=['filename', 'frame'], how='left')
df_results_thigmo

In [None]:
df_results_thigmo_corr = df_results_thigmo[df_results_thigmo['displacement'].notna()]
df_results_thigmo_corr

## Thigmotaxis and acclimitization

In [None]:
df_results_thigmo_corr['acclimitization'] = df_results_thigmo_corr['filename'].apply(lambda x: 1 if ((x.split('_')[3]=='15m0s')|(x.split('_')[3]=='15m3s')) else 0)

In [None]:
df_results_thigmo_corr.groupby('acclimitization').apply(lambda x: x['is_in_outer_zone'].sum()/len(x)).reset_index(name='frac_to')

In [None]:
df_results_thigmo_corr.groupby('acclimitization').agg({'dist_from_center':np.median, 'displacement':np.median})

### Per file

#### fraction of time in outer zone

In [None]:
df_frac_to_ = df_results_thigmo_corr.groupby(['acclimitization', 'filename']).apply(lambda x: x['is_in_outer_zone'].sum()/len(x)).reset_index(name='frac_to')
df_frac_to_

In [None]:
frac_to_acc_1 = df_frac_to_.loc[df_frac_to_['acclimitization']==1, 'frac_to']
frac_to_acc_0 = df_frac_to_.loc[df_frac_to_['acclimitization']==0, 'frac_to']

In [None]:
mannwhitneyu(frac_to_acc_0, frac_to_acc_1)

#### median distance from center

In [None]:
df_md = df_results_thigmo_corr.groupby(['acclimitization', 'filename']).agg({'dist_from_center':np.median}).reset_index()
df_md

In [None]:
md_acc_1 = df_md.loc[df_md['acclimitization']==1, 'dist_from_center']
md_acc_0 = df_md.loc[df_md['acclimitization']==0, 'dist_from_center']

In [None]:
mannwhitneyu(md_acc_0, md_acc_1)

#### Fraction of distance travelled in outer zone

In [None]:
df_acc_fracDO = df_results_thigmo_corr.groupby(['acclimitization', 'filename']).apply(lambda x: x.loc[x['is_in_outer_zone']==1,'displacement'].sum()/len(x)).reset_index(name='frac_do')
df_acc_fracDO

In [None]:
fracDO_acc_1 = df_acc_fracDO.loc[df_acc_fracDO['acclimitization']==1, 'frac_do']
fracDO_acc_0 = df_acc_fracDO.loc[df_acc_fracDO['acclimitization']==0, 'frac_do']

In [None]:
mannwhitneyu(fracDO_acc_0, fracDO_acc_1)

## Thigmotaxis and clusters

In [None]:
df_clus_fracTO = df_results_thigmo_corr.groupby(['hdbscan_wv_scaled', 'filename']).apply(lambda x: x['is_in_outer_zone'].sum()/len(x)).reset_index(name='frac_to')
df_clus_fracTO

### Frac of time spent in outerzone

In [None]:
dict_clus_fracTO = {}
for name, group in df_clus_fracTO.groupby('hdbscan_wv_scaled'):
    dict_clus_fracTO[name] = group['frac_to'].values 

In [None]:
def get_results_mwu(dict_clus, alt='two-sided'):
    
    df_mwu_stat = pd.DataFrame(index=[f'clus_{i}' for i in dict_clus.keys()], columns=[f'clus_{i}' for i in dict_clus.keys()])
    df_mwu_pval = pd.DataFrame(index=[f'clus_{i}' for i in dict_clus.keys()], columns=[f'clus_{i}' for i in dict_clus.keys()])
    for clus1, clus2 in itertools.product(dict_clus.keys(),dict_clus.keys()):
        mwu_results = mannwhitneyu(dict_clus[clus1],dict_clus[clus2], alternative=alt)
        df_mwu_pval.loc[f'clus_{clus1}',f'clus_{clus2}'] = mwu_results[1]
        df_mwu_stat.loc[f'clus_{clus1}',f'clus_{clus2}'] = mwu_results[0]
    
    return df_mwu_stat, df_mwu_pval

In [None]:
df_mwu_pval_fracTO = get_results_mwu(dict_clus_fracTO)[1]
df_mwu_pval_fracTO

In [None]:
df_mwu_pval_fracTO

In [None]:
sns.heatmap(df_mwu_pval_fracTO < 0.05, cmap="PiYG", linewidths=0.5)

In [None]:
df_mwu_pval_fracTO_less = get_results_mwu(dict_clus_fracTO, alt='less')[1]
sns.heatmap(df_mwu_pval_fracTO_less < 0.05, cmap="PiYG", linewidths=0.5)

In [None]:
df_mwu_pval_fracTO_greater = get_results_mwu(dict_clus_fracTO, alt='greater')[1]
sns.heatmap(df_mwu_pval_fracTO_greater < 0.05, cmap="PiYG", linewidths=0.5)

### Median distance from center

In [None]:
df_clus_MD = df_results_thigmo_corr.groupby(['hdbscan_wv_scaled', 'filename']).agg({'dist_from_center':np.median}).reset_index()
df_clus_MD

In [None]:
dict_clus_MD = {}
for name, group in df_clus_MD.groupby('hdbscan_wv_scaled'):
    dict_clus_MD[name] = group['dist_from_center'].values 

In [None]:
df_mwu_pval_MD = get_results_mwu(dict_clus_MD)[1]
df_mwu_pval_MD

In [None]:
sns.heatmap(df_mwu_pval_MD < 0.05, cmap='PiYG', linewidths=0.5)

In [None]:
df_mwu_pval_MD_less = get_results_mwu(dict_clus_MD, alt='less')[1]
sns.heatmap(df_mwu_pval_MD_less < 0.05, cmap='PiYG', linewidths=0.5)

In [None]:
df_mwu_pval_MD_greater = get_results_mwu(dict_clus_MD, alt='greater')[1]
sns.heatmap(df_mwu_pval_MD_greater < 0.05, cmap='PiYG', linewidths=0.5)

### Total distance in outerzone

In [None]:
df_clus_fracDO = df_results_thigmo_corr.groupby(['hdbscan_wv_scaled', 'filename']).apply(lambda x: x.loc[x['is_in_outer_zone']==1,'displacement'].sum()/len(x)).reset_index(name='frac_do')
df_clus_fracDO

In [None]:
dict_clus_TDO = {}
for name, group in df_clus_fracDO.groupby('hdbscan_wv_scaled'):
    dict_clus_TDO[name] = group['frac_do'].values 

In [None]:
df_mwu_pval_TDO = get_results_mwu(dict_clus_TDO)[1]
df_mwu_pval_TDO

In [None]:
sns.heatmap(df_mwu_pval_TDO < 0.05, cmap='PiYG', linewidths=0.5)

In [None]:
df_mwu_pval_TDO_less = get_results_mwu(dict_clus_TDO, alt='less')[1]
sns.heatmap(df_mwu_pval_TDO_less < 0.05, cmap='PiYG', linewidths=0.5)

In [None]:
df_mwu_pval_TDO_greater = get_results_mwu(dict_clus_TDO, alt='greater')[1]
sns.heatmap(df_mwu_pval_TDO_greater < 0.05, cmap='PiYG', linewidths=0.5)

# Thigmotaxis control vs drugs