In [1]:
from pathlib import Path
import json
import pyarrow
import math
import re
import os

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import h5py

from scipy.stats import gaussian_kde
from scipy.ndimage import label
from sklearn.cluster import DBSCAN
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

import holoviews as hv
from holoviews import opts

import traceback


hv.extension("bokeh")

from shiny import App, Inputs, Outputs, Session, reactive, render, req, ui

from utils_behavior import (
    Sleap_utils,
    HoloviewsTemplates,
    Utils,
    Processing,
    Ballpushing_utils,
    Seaborn_Templates,
)

import Config

import importlib

  import multipart


Loading BallPushing utils version 20 Jan 2025


In [8]:
importlib.reload(Config)

<module 'Config' from '/home/durrieu/Tracking_Analysis/MazeRecorder/Analysis/TNT_Sreen_Notebooks/Config.py'>

In [2]:
BallTrajectories = pd.read_feather(
    "/mnt/upramdya_data/MD/MultiMazeRecorder/Datasets/Coordinates/240110_coordinates_Data/coordinates/250106_Pooled_coordinates.feather"
)

In [None]:
importlib.reload(Config)

In [3]:
BallTrajectories = Config.cleanup_data(BallTrajectories)

BallTrajectories = Config.map_split_registry(BallTrajectories)

BallTrajectories

Unnamed: 0,index,time,frame,adjusted_time,x_fly_0,y_fly_0,distance_fly_0,x_ball_0,y_ball_0,distance_ball_0,...,Brain region,Date,Genotype,Period,FeedingState,Orientation,Light,Crossing,Simplified Nickname,Split
0,0,0.034483,1,,0.000000,0.000000,0.000000,-10.584046,-147.231932,0.000000,...,MB extrinsic neurons,240104,TNTxZ1997,PM16,starved_noWater,std,on,1,PPL1-01,y
1,290,10.034483,291,,-0.258986,0.574956,0.630593,-10.585915,-147.257163,0.025300,...,MB extrinsic neurons,240104,TNTxZ1997,PM16,starved_noWater,std,on,1,PPL1-01,y
2,580,20.034483,581,,0.575676,3.024279,3.078581,-10.581800,-147.247442,0.015671,...,MB extrinsic neurons,240104,TNTxZ1997,PM16,starved_noWater,std,on,1,PPL1-01,y
3,870,30.034483,871,,-1.849664,-1.183738,2.196018,-10.577445,-147.250741,0.019933,...,MB extrinsic neurons,240104,TNTxZ1997,PM16,starved_noWater,std,on,1,PPL1-01,y
4,1160,40.034483,1161,,-2.404914,13.374129,13.588633,-10.588456,-147.235937,0.005957,...,MB extrinsic neurons,240104,TNTxZ1997,PM16,starved_noWater,std,on,1,PPL1-01,y
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1317347,103240,3560.034483,103241,,-13.818304,-283.987073,284.323061,-16.827118,-364.408652,206.816144,...,LH,231212,TNTxZ1884,PM16,starved_noWater,std,on,1,LH1284,y
1317348,103530,3570.034483,103531,,-10.876535,-331.789768,331.967994,-20.815885,-361.031985,203.509421,...,LH,231212,TNTxZ1884,PM16,starved_noWater,std,on,1,LH1284,y
1317349,103820,3580.034483,103821,,-12.146361,-287.939386,288.195462,-19.639780,-366.468499,208.915987,...,LH,231212,TNTxZ1884,PM16,starved_noWater,std,on,1,LH1284,y
1317350,104110,3590.034483,104111,,-22.297176,-337.100888,337.837494,-18.207566,-366.619821,209.042167,...,LH,231212,TNTxZ1884,PM16,starved_noWater,std,on,1,LH1284,y


# Plot one particular Nickname

In [None]:
MBON08 = BallTrajectories[BallTrajectories["Genotype"] == "TNTxG87"]

In [None]:
MBON08["fly"].unique()

In [None]:
importlib.reload(Config)

In [None]:
subset = Config.get_subset_data(BallTrajectories,"Nickname", "SS54549 (P-F3-5R patch line)")

subset

In [None]:
subset.groupby("Nickname").apply(lambda x: x["fly"].nunique())

In [None]:
# Check the distribution of frame values grouped by Nickname

subset.groupby("Nickname")["frame"].describe()

In [None]:
# Check how many unique "fly " values I have grouped by Nickname

subset.groupby("Nickname").apply(lambda x: x["fly"].nunique())

In [None]:
importlib.reload(Processing)

In [None]:
# Apply some lowpass filtering to distance_ball_0 grouped by fly

for fly in subset["fly"].unique():
    subset.loc[subset["fly"] == fly, "distance_ball_0_filtered"] = Processing.savgol_lowpass_filter(
        subset.loc[subset["fly"] == fly, "distance_ball_0"], 22
    )

In [None]:
subset

In [None]:
# Plot distance_ball_0 as function of time hue by Brain region

plt.figure(figsize=(10, 6))

sns.lineplot(
    data=subset,
    x="time",
    y="distance_ball_0",
    hue="Brain region",
    ci=None,
    palette=Config.color_dict,
)

plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0)

plt.show()

In [None]:
# Plot only one nickname but plot each individual fly separately

plt.figure(figsize=(10, 6))

sns.lineplot(
    data=subset[subset["Nickname"] == "MBON-08-GaL4  MBON-09-GaL4 "],
    x="time",
    y="distance_ball_0",
    hue="fly",
    ci=None,
)

plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0)

plt.show()

# Plot all nicknames groupes by brain region

In [5]:
importlib.reload(Config)
importlib.reload(Processing)

<module 'utils_behavior.Processing' from '/home/durrieu/utils_behavior/utils_behavior/Processing.py'>

In [4]:
# Initialize random generator
rg = np.random.default_rng()

output_dir = Path("/mnt/upramdya_data/MD/MultiMazeRecorder/Plots/250130_Coordinates_Downsampled_Signif_2/")

# If output directory does not exist, create it

output_dir.mkdir(parents=True, exist_ok=True)

# Process brain regions
miscellaneous_nicknames = []
for brain_region in Config.registries["brain_regions"]:
    output_path = output_dir / f"Full_euclidean_distance_coordinates_line_{brain_region}.png"
    
    if os.path.exists(output_path):
        print(f"Skipping Brain region {brain_region} as the plot already exists.")
        continue
    
    region_data = BallTrajectories[BallTrajectories['Brain region'] == brain_region]
    nicknames = region_data['Nickname'].unique()
    
    if len(nicknames) < 5 and brain_region != "Control":
        miscellaneous_nicknames.extend(nicknames)
        continue
    
    if brain_region != "Control":
        try:
            Config.create_and_save_plot(BallTrajectories, nicknames, brain_region, output_path, Config.registries, show_signif=True)
            print(f"Processed Brain region {brain_region}")
        except Exception as e:
            print(f"Error processing Brain region {brain_region}: {e}")
            traceback.print_exc()

# Process Miscellaneous brain region
if miscellaneous_nicknames:
    try:
        output_path = output_dir / "Full_euclidean_distance_coordinates_line_Miscellaneous.png"
        Config.create_and_save_plot(BallTrajectories, miscellaneous_nicknames, "Miscellaneous", output_path, Config.registries, show_signif=True)
        print("Processed Miscellaneous Brain region")
    except Exception as e:
        print(f"Error processing Miscellaneous Brain region: {e}")
        traceback.print_exc()

# Plot the Control brain region
try:
    control_nicknames = Config.registries["control_nicknames"]
    output_path = output_dir / "Full_euclidean_distance_coordinates_line_Control.png"
    Config.create_control_plot(BallTrajectories, control_nicknames, output_path)
    print("Processed Control Brain region")
except Exception as e:
    print(f"Error processing Control Brain region: {e}")
    traceback.print_exc()

Skipping Brain region CX as the plot already exists.
Skipping Brain region MB extrinsic neurons as the plot already exists.
Processing LC21 vs Empty-Split
Time Bin | Observed Diff | Raw p-value | Corrected p-value
       0 | +5.834 | 0.7570 | 0.9537
       1 | -24.185 | 0.4060 | 0.9537
       2 | -29.548 | 0.2480 | 0.9537
       3 | -11.927 | 0.6600 | 0.9537
       4 | +2.800 | 0.9220 | 0.9720
       5 | +7.938 | 0.7480 | 0.9537
       6 | +5.182 | 0.7630 | 0.9537
       7 | -1.320 | 0.9720 | 0.9720
       8 | +13.214 | 0.4820 | 0.9537
       9 | +14.399 | 0.3760 | 0.9537
Processing LC25 vs Empty-Split
Time Bin | Observed Diff | Raw p-value | Corrected p-value
       0 | -34.721 | 0.0010 | 0.0100
       1 | -69.542 | 0.0050 | 0.0125
       2 | -86.227 | 0.0030 | 0.0125
       3 | -85.179 | 0.0040 | 0.0125
       4 | -53.531 | 0.0510 | 0.1020
       5 | -28.787 | 0.2700 | 0.3375
       6 | -27.259 | 0.2550 | 0.3375
       7 | -34.099 | 0.1530 | 0.2550
       8 | -19.075 | 0.3970 | 0.441

# Load data from individual datasets

In [None]:
importlib.reload(Config)

In [None]:
# Process brain regions

registries = Config.registries
data_path = Path("/mnt/upramdya_data/MD/MultiMazeRecorder/Datasets/Skeleton_TNT/240120_short_contacts_no_cutoff_no_downsample_Data/coordinates_regions")
miscellaneous_nicknames = []
for brain_region in registries["brain_regions"]:
    
    if brain_region != "Control":
        output_path = f"/mnt/upramdya_data/MD/MultiMazeRecorder/Plots/250122_Coordinates_full/Full_euclidean_distance_coordinates_line_{brain_region}.png"
    
        if os.path.exists(output_path):
            print(f"Skipping Brain region {brain_region} as the plot already exists.")
            continue
        
        BallTrajectories = Config.load_datasets_for_brain_region(brain_region, data_path, registries, downsample_factor=10)
        
        region_data = BallTrajectories[BallTrajectories['Brain region'] != "Control"]
        nicknames = region_data['Nickname'].unique()
        
        if len(nicknames) < 5 and brain_region != "Control":
            miscellaneous_nicknames.extend(nicknames)
            continue
        try:
            Config.create_and_save_plot(region_data, nicknames, brain_region, output_path, registries, show_signif=True)
            print(f"Processed Brain region {brain_region}")
        except Exception as e:
            print(f"Error processing Brain region {brain_region}: {e}")
            
    else:
        output_path = "/mnt/upramdya_data/MD/MultiMazeRecorder/Plots/250120_coordinates/Full_euclidean_distance_coordinates_line_Control.png"
        
        if os.path.exists(output_path):
            
            print(f"Skipping Control Brain region as the plot already exists.")
            continue
        
        BallTrajectories = Config.load_datasets_for_brain_region(brain_region, data_path, registries, downsample_factor=10)


        nicknames = region_data['Nickname'].unique()
        
        # Plot the Control brain region
        try:
            control_nicknames = registries["control_nicknames"]
            
            Config.create_control_plot(region_data, control_nicknames, output_path)
            print("Processed Control Brain region")
        except Exception as e:
            print(f"Error processing Control Brain region: {e}")

# Process Miscellaneous brain region
if miscellaneous_nicknames:
    try:
        output_path = "/mnt/upramdya_data/MD/MultiMazeRecorder/Plots/250120_coordinates/Full_euclidean_distance_coordinates_line_Miscellaneous.png"
        Config.create_and_save_plot(region_data, miscellaneous_nicknames, "Miscellaneous", output_path, registries, show_signif=True)
        print("Processed Miscellaneous Brain region")
    except Exception as e:
        print(f"Error processing Miscellaneous Brain region: {e}")



# Permutation test

In [26]:
CX = pd.read_feather("/mnt/upramdya_data/MD/MultiMazeRecorder/Datasets/Skeleton_TNT/240120_short_contacts_no_cutoff_no_downsample_Data/coordinates_regions/CX.feather")

In [28]:
control = pd.read_feather("/mnt/upramdya_data/MD/MultiMazeRecorder/Datasets/Skeleton_TNT/240120_short_contacts_no_cutoff_no_downsample_Data/coordinates_regions/Control.feather")

In [29]:
CX_ctrl = pd.concat([CX, control], ignore_index=True)

In [None]:
# Pick a random subset

Test = Config.get_subset_data(CX_ctrl, "Nickname", "random")

In [None]:
Test.head()

In [32]:
# Split the data by Nickname

Focal = Test[Test["Brain region"] != "Control"]

Control = Test[Test["Brain region"] == "Control"]

In [33]:
# Make the average distance_ball_0 grouped by time

Focal_avg = Focal["distance_ball_0"].groupby(Focal["time"]).mean()

Ctrl_avg = Control["distance_ball_0"].groupby(Control["time"]).mean()

In [59]:
# Set the column "time" as index

Focal_avg = Focal_avg.reset_index()

Ctrl_avg = Ctrl_avg.reset_index()

In [35]:
# Step 2: Ensure both DataFrames have the same index (time points)
common_index = Focal_avg.index.intersection(Ctrl_avg.index)
Focal_avg = Focal_avg.loc[common_index]
Ctrl_avg = Ctrl_avg.loc[common_index]

In [36]:
# Step 3: Call the permutation_test function
observed_diff, p_values = Processing.permutation_test(Focal_avg, Ctrl_avg, n_permutations=1000)


In [None]:
# Find significant time points
significance_level = 0.05
significant_timepoints = np.where(p_values < significance_level)[0]

print(f"Number of significant time points: {len(significant_timepoints)}")
print(f"Percentage of significant time points: {len(significant_timepoints) / len(p_values) * 100:.2f}%")

# Optional: Apply multiple testing correction
from statsmodels.stats.multitest import multipletests

rejected, p_values_corrected, _, _ = multipletests(p_values, method='fdr_bh')
significant_timepoints_corrected = np.where(rejected)[0]

print(f"Number of significant time points after correction: {len(significant_timepoints_corrected)}")
print(f"Percentage of significant time points after correction: {len(significant_timepoints_corrected) / len(p_values) * 100:.2f}%")

In [None]:

plt.figure(figsize=(12, 6))
plt.plot(observed_diff.index, observed_diff.values, label='Observed Difference')
plt.scatter(observed_diff.index[significant_timepoints], 
            observed_diff.values[significant_timepoints], 
            color='red', label='Significant Points')
plt.axhline(y=0, color='k', linestyle='--')
plt.xlabel('Time')
plt.ylabel('Difference in distance_ball_0')
plt.title('Permutation Test Results: Focal vs Control')
plt.legend()
plt.show()

In [None]:
# Plot the trajectories associated with each Nickname

sns.lineplot(data=Test, x="time", y="distance_ball_0", hue="Nickname", ci=None)

In [40]:
# Add a column to indicate significant timepoints
Test['Significant'] = Test['time'].isin(Focal_avg.iloc[significant_timepoints]['time'])

In [None]:
importlib.reload(Config)

In [43]:
significant_times = Test[Test['Significant']]['time']
            

In [None]:
sns.lineplot(data=Test, x='time', y='distance_ball_0', hue='Brain region', palette=Config.color_dict, ci=None)

for time in significant_times:
    plt.axvspan(time - 0.5, time + 0.5, color='red', alpha=0.3)
    
    plt.set_title(f'Test')
    plt.set_xlabel('Time (s)')
    plt.set_ylabel('Median Euclidean Distance')
    