# Table of Contents

1. [Import Packages](#Import-Packages)
2. [Import Mouse Metadata](#Import-Mouse-Metadata)
3. [Concatenating all DLC results](#Concatenating-all-DLC-results)
4. [Preprocessing](#Preprocessing)
5. [Map Grid Nodes to Node Type](#Map-Grid-Nodes-to-Node-Type)
6. [Create Velocity column](#Create-Velocity-column)
7. [Save Preprocessed Combined csv](#save-preprocessed-combined-csv)


# Import Packages

In [1]:
import os
import sys
from pathlib import Path
import numpy as np
import pandas as pd

In [2]:
# autoreload (mainly for testing purposes)
%load_ext autoreload
%autoreload 2

In [3]:
CUSTOM_UTILS_PATH = Path(os.getcwd()).parent/'src'/'behavior'/'preprocessing'
if CUSTOM_UTILS_PATH.exists():
    sys.path.append(str(CUSTOM_UTILS_PATH))
    # Import custom analysis modules
    try:
        import preprocessing_utils
        from preprocessing_utils import *
        print("Utils loaded successfully!")
    except ImportError as e:
        print(f"Warning: Could not import custom modules: {e}")
else:
    print(f"Warning: Custom utils path not found: {CUSTOM_UTILS_PATH}")


import init_config
from init_config import *

Utils loaded successfully!
Location of Computer 1 Videos: 
Location of Computer 2 Videos: 
Central Video Location: c:\Users\PalopLabPortal\Gladstone Dropbox\Shreya Bangera\Labyrinth Mazes discussions\Code\TEST_COMPASS\videos\original_videos
DLC Scorer: DLC_resnet50_LabyrinthMar13shuffle1_1000000
Tracking bodyparts: nose, belly, sternum, leftflank, rightflank, tailbase
Experimental groups: ['A', 'B', 'C', 'D']


In [4]:
# Path to resources folder
RESOURCE_UTILS_PATH = Path(os.getcwd()).parent / "resources"

if RESOURCE_UTILS_PATH.exists():
    sys.path.append(str(RESOURCE_UTILS_PATH))  # only needed if importing .py utils
    # Full path to the CSV
    csv_path = RESOURCE_UTILS_PATH / "4step_adjacency_matrix.csv"
    # Load as DataFrame
    NODE4ADJ = pd.read_csv(csv_path)
    print(NODE4ADJ.head())
else:
    raise FileNotFoundError(f"Resources folder not found at {RESOURCE_UTILS_PATH}")

   Grid0  Grid1  Grid2  Grid3  Grid4  Grid5  Grid6  Grid7  Grid8  Grid9  ...  \
0      1      1      1      0      0      0      0      0      0      0  ...   
1      1      1      0      0      0      0      0      0      0      0  ...   
2      1      0      1      1      1      1      0      0      0      0  ...   
3      0      0      1      1      1      1      0      0      0      0  ...   
4      0      0      1      1      1      1      0      0      0      0  ...   

   Grid134  Grid135  Grid136  Grid137  Grid138  Grid139  Grid140  Grid141  \
0        0        0        0        0        0        0        0        0   
1        0        0        0        0        0        0        0        0   
2        0        0        0        0        0        0        0        0   
3        0        0        0        0        0        0        0        0   
4        0        0        0        0        0        0        0        0   

   Grid142  Grid143  
0        0        0  
1        0  

In [5]:
import matplotlib
matplotlib.rcParams['font.family'] = 'Arial'
matplotlib.rcParams['svg.fonttype'] = 'none'
matplotlib.rcParams['pdf.fonttype'] = 42

# Import Mouse Metadata

In [6]:
# Read in mouse metadata file
mouseinfo = pd.read_excel(os.path.join(METADATA_PATH,METADATA_FILE), sheet_name=TRIAL_TYPE)
print('Metadata columns: ',mouseinfo.columns)

# Consider only non-NA Sessions
mouseinfo = mouseinfo[~mouseinfo['Session #'].isna()]

# Find the subset of trials need to be excluded
mouseinfo = mouseinfo.loc[mouseinfo['Exclude Trial']!= 'yes'].reset_index(drop=True)

Metadata columns:  Index(['Session #', 'Run Date', 'Time Of Day', 'Rack Location', 'Computer',
       'Noldus Chamber', 'Camera #', 'Noldus Trial', 'Noldus Project Name',
       'DSI Trial', 'ID', 'Name', 'Sex', 'Status', 'Line', 'Genotype',
       'GenotypeOG', 'Birth Date', 'Age (months)', 'Housing ID',
       'Transmitter #', 'Exclude Trial', 'X1', 'X2', 'Y1', 'Y2', 'NOTES'],
      dtype='object')


# Concatenating all DLC results

In [8]:
#--------------- SET THESE VALUES --------------#
region_mapping = {
    'Target Zone': Target_Zone,
    'Entry Zone': Entry_Zone,
    'Reward Path': Reward_Path,
    'Dead Ends': Dead_Ends,
    'Neutral Zone': Neutral_Zone,
    'Loops': Loops
}

bp = 'sternum'


#____________________________________________

df_comb = compile_mouse_sessions(
    mouseinfo=mouseinfo,
    pose_est_csv_filepath=POSE_EST_CSV_PATH,
    DLCscorer=DLC_SCORER,
    bp=bp,
    region_mapping=region_mapping
)


# Preprocessing

In [10]:
#--------------- SET THESE VALUES --------------#

# Define the list of initial entry nodes (if different)
initial_nodes = [47, 46, 34, 22]


#____________________________________________________________
# Run full preprocessing
df_all_csv = preprocess_sessions(df_comb, NODE4ADJ, initial_nodes)


# Map Grid Nodes to NodeType

In [12]:
#--------------- SET THESE VALUES --------------#

# Mapping of variable names to NodeType labels
# key : value pair, key = list name (as in Initializations) & value = column value name decided by user 
label_mapping = {
    'Decision_Reward': 'Decision (Reward)',
    'NonDecision_Reward': 'Non-Decision (Reward)',
    'Corner_Reward': 'Corner (Reward)',
    'Decision_NonReward': 'Decision (Non-Reward)',
    'NonDecision_NonReward': 'Non-Decision (Non-Reward)',
    'Corner_NonReward': 'Corner (Non-Reward)',
    'Entry_Zone': 'Entry Nodes',
    'Target_Zone': 'Target Nodes'
}


#_______________________________________________________________
df_all_csv['NodeType'] = 'Unlabeled'

# Apply mapping to access the list by name
# Creates the column NodeType based on Grid Numbers
for var_name, label in label_mapping.items():
    node_list = globals()[var_name]
    df_all_csv.loc[df_all_csv['Grid Number'].isin(node_list), 'NodeType'] = label

# Create Velocity column

In [14]:
#--------------- SET THESE VALUES --------------#
behav_fps=5


#_____________________________________
df_all_csv = ensure_velocity_column(df_all_csv, fps=behav_fps) 

# Save Preprocessed Combined csv & Individual csvs

In [12]:
csv_dir = os.path.join(BASE_PATH, 'csvs')
combined_dir = os.path.join(csv_dir, 'combined')
individual_dir = os.path.join(csv_dir, 'individual')

# Create folders if they donâ€™t exist
os.makedirs(combined_dir, exist_ok=True)
os.makedirs(individual_dir, exist_ok=True)

# Save combined file
combined_path = os.path.join(combined_dir, 'Preprocessed_combined_file.csv')
df_all_csv.to_csv(combined_path, index=False)
print(f"Saved combined file: {combined_path}")

# Save per-session individual files
for session_id, df_session in df_all_csv.groupby('Session'):
    file_name = f'Session-{session_id}_preprocessed.csv'
    file_path = os.path.join(individual_dir, file_name)
    df_session.to_csv(file_path, index=False)

print(f"Saved {df_all_csv['Session'].nunique()} individual session CSVs to: {individual_dir}")

Saved combined file: c:\Users\PalopLabPortal\Gladstone Dropbox\Shreya Bangera\Labyrinth Mazes discussions\Code\TEST_COMPASS\csvs\combined\Preprocessed_combined_file.csv
Saved 5 individual session CSVs to: c:\Users\PalopLabPortal\Gladstone Dropbox\Shreya Bangera\Labyrinth Mazes discussions\Code\TEST_COMPASS\csvs\individual
