# Dewan Lab EPM Analysis

## STEP 1: Always Execute! Load Libraries and User Settings
### STEP 1A: Import Libraries

In [None]:
import os
os.environ['ISX'] = '0'  # Set to zero so we don't try to load the isx module

import cv2
import numpy as np
import pandas as pd

from pathlib import Path
from tqdm import tqdm, trange

from dewan_calcium import plotting
from dewan_calcium.helpers import IO, parse_json, EPM
from dewan_calcium.helpers.project_folder import ProjectFolder

print("Importing required packages complete!")

### STEP 1B: User Configurables

In [None]:
animal = 'ANIMAL_GOES_HERE'
date = 'DATE_GOES_HERE'

manual_start_frame = []  
# If DLC does a poor job tracking the LED, estimate the start frame by multiplying the time stamp by 60(FPS) to get an estimated start frame

EXPERIMENT_TIME = 10 # minutes
LED_CUTOFF = 0.95 # p-value cut off
NUM_PSEUDOTRIALS = 12  # Number of pseudotrials to subsample from each cell
TRIM_END_S = 0  # Number of seconds to remove from the end of the data; equivalent to data = data[:-(TRIM_END_S * FPS)]

EXCLUDE_PSEUDOTRIAL_SHORT_CELLS = False  # Set to true to exclude cells that do not satisfy number_of_cell_pseudotrials >= NUM_PSEUDOTRIALS 
SAVE_LOAD_RNG_ENTROPY = False # If we want the "randomly sampled" pseudotrials to be the same every time the notebook is run, the seed for the random number generator needs to be saved

# === CONSTANTS === #
PSEUDOTRIAL_LEN_S = 2 # duration of pseudotrial in seconds
OPEN_ARM_LENGTH_CM = 79  # Measured
MAX_MOUSE_SPEED = 1  # m/s

### STEP 1C: Load Project Folder

In [None]:
# Create Project Folder to Gather and Hold all the File Paths

project_folder = ProjectFolder('EPM')
file_header = animal + '-' + date + '-'

In [None]:
# If this is the first time the project folder has been created,
# move the files to the appropriate directories and then run this cell, otherwise skip this cel
project_folder.get_data()

In [None]:
# Get settings from imaging session and display them for the user

gain, LED_power, ENDOSCOPE_FRAMERATE, focal_planes = parse_json.get_session_settings(project_folder.raw_data_dir.session_json_path)

print(f'Recording Gain: {gain}')
print(f'Endoscope Framerate: {ENDOSCOPE_FRAMERATE}')
print(f'LED Power: {LED_power}')
print(f'Focal Plane(s): {focal_planes}')

## 2A: Import and pre-process the raw data

In [None]:
#STEP 2A.1: LOAD DLC DATA

tracked_points = pd.read_hdf(project_folder.raw_data_dir.points_h5_path)  # Load tracked points
labeled_video = cv2.VideoCapture(str(project_folder.raw_data_dir.labeled_video_path))  # Load Video
VIDEO_FPS = labeled_video.get(cv2.CAP_PROP_FPS)

In [None]:
#STEP 2A.2: LOAD INSCOPIX DATA

cell_trace_data = pd.read_csv(project_folder.inscopix_dir.cell_trace_path, engine='pyarrow')
GPIO_data = pd.read_csv(project_folder.inscopix_dir.GPIO_path, header=0, engine='pyarrow')
all_cell_props = pd.read_csv(project_folder.inscopix_dir.props_path, header=0, engine='pyarrow')
cell_outlines = parse_json.get_outline_coordinates(project_folder.inscopix_dir.contours_path)

In [None]:
# STEP 2A.3: PREPROCESSING

# STEP 2A.3.1: Drop the first row which contains all 'undecided' labels which is the Inscopix default label.
cell_trace_data = cell_trace_data.drop([0])

# STEP 2A.3.2: Force all dF/F values to be numbers and round times to 2 decimal places
cell_trace_data = cell_trace_data.apply(pd.to_numeric, errors='coerce')

# Set the times as the index so the listed data is all dF/F values
cell_trace_data[cell_trace_data.columns[0]] = cell_trace_data[cell_trace_data.columns[0]].round(2)
cell_trace_data = cell_trace_data.set_index(cell_trace_data.columns[0]) 

# STEP 2A.3.3: Remove spaces from column names and contents
cell_trace_data.columns = cell_trace_data.columns.str.replace(" ", "")
GPIO_data.columns = GPIO_data.columns.str.replace(" ", "")
GPIO_data['ChannelName'] = GPIO_data['ChannelName'].str.replace(" ", "")

# STEP 2A.3.4: Reduce properties to only include the cells with only one component
all_cell_props = all_cell_props[all_cell_props['NumComponents']==1]  # We only want cells that have one component
all_cell_props = all_cell_props.drop(columns='Status').reset_index(drop=True)
cell_names = all_cell_props['Name'].values

# STEP 2A.3.5: PARSE GPIO DATA
sniff_data = GPIO_data[GPIO_data['ChannelName'] == "GPIO-1"].reset_index(drop=True)
FV_data = GPIO_data[GPIO_data['ChannelName'] == "GPIO-2"].reset_index(drop=True)

# OPTIONAL UNUSED DATA
# running_data = GPIO_data[GPIO_data['ChannelName'] == "GPIO-3"]  # Running Wheel Data
# lick_data = GPIO_data[GPIO_data['ChannelName'] == "GPIO-4"]  # Lick Data

In [None]:
# STEP 2A.4: PREPROCESS DLC Data

num_cols = len(tracked_points.columns)

if num_cols == 6:
    cols = ['mouse_x', 'mouse_y', 'mouse_p', 'led_x', 'led_y', 'led_p'] 
elif num_cols == 9:
    cols = ['mouse_x', 'mouse_y', 'mouse_p', 'body_x', 'body_y', 'body_p', 'led_x', 'led_y', 'led_p'] 

# Reset the column names to something sensible
tracked_points.columns = cols 

## STEP 2B: Manual Curation

In [None]:
from dewan_manual_curation import dewan_manual_curation

curated_cells = dewan_manual_curation.launch_gui(project_folder_override=project_folder, cell_trace_data_override=cell_trace_data, cell_props_override=all_cell_props, cell_contours_override=cell_outlines)
if not curated_cells:
    print('Error, no good cells selected!')

### STEP 2C: Apply Manual Curation Results and Additional Preprocessing

In [None]:
# STEP 2C.1: Filter all data by the GoodCells identified in ManualCuration

curated_cell_props = all_cell_props[all_cell_props['Name'].isin(curated_cells)].reset_index(drop=True)
curated_trace_data = cell_trace_data[curated_cells]
cell_names = curated_cell_props['Name']

In [None]:
# STEP 2C.2 (Optional): Trim time from end of data if TRIM_END_S is set
if TRIM_END_S > 0:
    num_frames = curated_trace_data.shape[0]
    exp_length_frames = EXPERIMENT_TIME * 60 * ENDOSCOPE_FRAMERATE  # Time in minutes * 60 s/min * frames/s
    frames_to_trim = ENDOSCOPE_FRAMERATE * TRIM_END_S
    new_total_frames = num_frames - frames_to_trim
    print(f'Total Fluorescence Frames: {num_frames}; Experiment Length Frames: {exp_length_frames}; Frames to Trim: {frames_to_trim}\n'
          f'New Total Fluorescence Frames: {new_total_frames}')
    if new_total_frames < exp_length_frames:
        raise ValueError('You cannot trim the fluorescence data to be shorter than the experiment length. \n'
                         'Reduce EXPERIMENT_TIME to make the total experiment shorter, or reduce TRIM_END_S so less data is trimmed!')
    
    curated_trace_data = curated_trace_data.iloc[:frames_to_trim, :]

### STEP 2D: Pickle and Save all preprocessed data

In [None]:
# Pickle the reorganized CellTraceData incase its needed later
# Saves Cell Traces, GPIO, Odor List, Sniff, FV data, Good Cell Properties, Good Cells, and the labeled max projection
# Once these have been saved, they don't need to be re-run on the same data again unless the data itself is changed

folder = project_folder.analysis_dir.preprocess_dir.path

IO.save_data_to_disk(curated_trace_data, 'curated_trace_data', file_header, folder)
IO.save_data_to_disk(GPIO_data, 'GPIO_data', file_header, folder)
IO.save_data_to_disk(FV_data, 'FV_data', file_header, folder)
IO.save_data_to_disk(curated_cell_props, 'curated_cell_props', file_header, folder)
IO.save_data_to_disk(sniff_data, 'sniff_table', file_header, folder)

IO.save_data_to_disk(tracked_points, 'tracked_points', file_header, folder)
IO.save_data_to_disk(VIDEO_FPS, 'VIDEO_FPS', file_header, folder)

### Checkpoint 1

In [None]:
# Opens the saved pickle files.  If the files have already been saved, code can be re-run
# starting from this point

folder = project_folder.analysis_dir.preprocess_dir.path

curated_trace_data = IO.load_data_from_disk('curated_trace_data', file_header, folder)
GPIO_data = IO.load_data_from_disk('GPIO_data', file_header, folder)
FV_data = IO.load_data_from_disk('FV_data', file_header, folder)
curated_cell_props = IO.load_data_from_disk('curated_cell_props', file_header, folder)
sniff_data = IO.load_data_from_disk('sniff_table', file_header, folder)

cell_names = curated_cell_props['Name']  # List of cells, referenced periodically

tracked_points = IO.load_data_from_disk('tracked_points', file_header, folder)
VIDEO_FPS = IO.load_data_from_disk('VIDEO_FPS', file_header, folder)

folder = project_folder.raw_data_dir.path
labeled_video = cv2.VideoCapture(str(project_folder.raw_data_dir.labeled_video_path))  # Load Video

### STEP 3: Process DLC Output and Get EPM Bounds

In [None]:
# STEP 3A: Get True Start/End Time of Experiment
# There may be an instance where the model erroneously identified the LED for very short time periods
# find_index_bins returns the LED on and off bins (anywhere led_p > 0.98)
# true_led_bin ensures that the "true" start bin is selected and the "end" bin is not erroneously selected

if not manual_start_frame:
    LED_indexes = tracked_points.index[tracked_points['led_p'] > LED_CUTOFF].values
    led_bins = EPM.find_index_bins(LED_indexes)
    true_led_bin = EPM.get_true_bin_index(led_bins, len(tracked_points))
    led_on = true_led_bin[0] # The first instance where the LED is 'on'
else:
    led_on = manual_start_frame
    
experiment_frames = int(VIDEO_FPS * 60 * EXPERIMENT_TIME)  # FPS * 60 s/min * experiment length in minutes --> number of frames
end_frame = led_on + experiment_frames
good_points = tracked_points.iloc[led_on:end_frame] # Subset the frames from LED_ON -> EXPERIMENT_TIME minutes later
good_points = good_points.reset_index(drop=True) # Reset the index

# Get X, Y coordinates, cast to int, and combine them into tuples
head_x = good_points['mouse_x'].astype(int)
head_y = good_points['mouse_y'].astype(int)
coordinates = [list(item) for item in zip(head_x, head_y)]
coordinates = np.array(coordinates)

In [None]:
## STEP 3B: Display ROI Instructions

EPM.display_roi_instructions()

In [None]:
## STEP 3C.1: Get user to label open and closed arms

%matplotlib qt  
# Opens the matplotlib window using the QT backend

labeled_video.set(cv2.CAP_PROP_POS_FRAMES, led_on - 1) # Pull the frame that is our actual start
_, background_image = labeled_video.read()

arm_coordinates = EPM.get_arm_rois(background_image)

# Switch back to using inline displays
%matplotlib inline

## STEP 3C.2: Split two arms into the five regions
individual_regions, original_regions = EPM.get_region_polygons(arm_coordinates)  
# ([open_arm_1, open_arm_2, closed_arm_1, closed_arm_2, center_polygon], [open_arm, closed_arm, center])

## STEP 3C.3: Display image of EPM
fig, ax = plotting.plot_epm_roi(original_regions, background_image)

### STEP 3D (Optional): Interpolate jumps in animal's position

In [None]:
## Step 3D.1: Calculate Maximum Speed of Mouse in (pixels / frame)

open_arm_length_px = original_regions.loc['open_arm'].Length
mouse_max_movement_threshold = round(MAX_MOUSE_SPEED * 100 / OPEN_ARM_LENGTH_CM * open_arm_length_px / VIDEO_FPS, 3)  
# Max Mouse Speed (m/s) * 100 (cm/m) / 79 (cm/open arm) * Length of Open Arm (pixels) / video FPS (FPS) 

# STEP 3D.2: Interpolate jumps in animal's position 
# Run this cell to interpolate large distance jumps in the DLC tracking data
thresh, num_jumps, coordinates = EPM.interpolate_DLC_coordinates(coordinates, threshold=mouse_max_movement_threshold) 
print(f"There were {num_jumps} jumps that required interpolation.")
print(f"The interpolation threshold used was {thresh}")

### STEP 3E: Save EPM Regions 

In [None]:
## Save the ROIs and image

folder = project_folder.analysis_dir.figures_dir.subdir('EPM_ROI')

image_path = folder.joinpath('EPM_ROI.pdf')
fig.savefig(str(image_path), dpi=600)

folder = project_folder.analysis_dir.preprocess_dir.subdir('EPM_ROI')
IO.save_data_to_disk(arm_coordinates, 'arm_coordinates', file_header, folder)
IO.save_data_to_disk(individual_regions, 'individual_regions', file_header, folder)
IO.save_data_to_disk(original_regions, 'original_regions', file_header, folder)
IO.save_data_to_disk(background_image, 'background_image', file_header, folder)

### STEP 4: Isolate dF/F Data for Experiment

In [None]:
# STEP 4A: Parses the final valve data to identify when the final valve is open vs when it is closed based on TTL pulse from Arduino.
# In the EPM experiment, there is no final valve. However, we are using the same sync signal as used in the odor experiments to signal when the LED is triggered

FV_values = FV_data['Value'].astype(float).values # Get FV Values
num_values = len(FV_values)
valve_status = 0
FV_on_indexes = []
FV_off_indexes = []
for i in trange((num_values - 1), desc="Processing: "):
    valve_val_diff = FV_values[i + 1] - FV_values[i]

    if valve_status == 0:    # Start with valve off
        if valve_val_diff > 10000: # If the difference is a very large positive number, the valve opened
            FV_on_indexes.append(i + 1)
            valve_status = 1 # Set valve state to open
    else:
        if valve_val_diff < -10000: # If the difference is a very laarge negative number, the valve closed
            FV_off_indexes.append(i)
            valve_status = 0 # Set valve state to closed

FV_indexes = pd.DataFrame(zip(FV_on_indexes, FV_off_indexes), columns=['On', 'Off'])

In [None]:
# STEP 4B: Trim dF/F data to the FV On and Off Times

experiment_start_index = FV_indexes['On'][0]
FV_timestamps = FV_data['Time(s)']
trial_start_time = FV_timestamps[experiment_start_index]  # Trial start time in unix time (s)
trial_end_time = trial_start_time + (EXPERIMENT_TIME * 60)  # End time is whatever the duration of the experiment was in minutes

cell_trace_times = curated_trace_data.index.values

cell_trace_on_index = np.where(cell_trace_times <= trial_start_time)[0][-1]
cell_trace_off_index = np.where(cell_trace_times <= trial_end_time)[0][-1]  # We can't overshoot otherwise the coordinate will not match, so we may drop a single frame

trimmed_trace_data = curated_trace_data.iloc[cell_trace_on_index:cell_trace_off_index, :]

trimmed_cell_trace_times = trimmed_trace_data.index.values
shifted_cell_trace_times = np.subtract(trimmed_cell_trace_times, trimmed_cell_trace_times[0])
rounded_cell_trace_times = np.round(shifted_cell_trace_times, 2)

trimmed_trace_data.index = rounded_cell_trace_times

good_points_index = good_points.index.values
good_points_time = np.divide(good_points_index, VIDEO_FPS)
good_points_time = np.round(good_points_time, 2)
good_points.index = good_points_time

In [None]:
## Step 4C: Align Cell Traces with the DLC Data
## Since the DLC data is typically recorded at 6X the rate as the neural data, there is typically multiple data points we can choose for the coordinate of a trace
## For simplicity, we will pick the coordinate that exactly matches the time point of the trace
## In the future we can do some averaging or picking the median, etc. 

trace_coordinate_indexes = []
good_points_index = good_points.index.values

for time in tqdm(trimmed_trace_data.index):  # Check each 
    coordinate_index = np.where(good_points.index <= time)[0][-1]
    trace_coordinate_indexes.append(coordinate_index)

trace_coordinates = coordinates[trace_coordinate_indexes]
trace_coordinates = trace_coordinates.tolist()
trimmed_trace_data.insert(0, 'Coordinate_Index', trace_coordinate_indexes)
trimmed_trace_data.insert(0, 'Coordinates', trace_coordinates)

## Step 4D: Save trace data

In [None]:
## Save the paired coordinates - trace data

folder = project_folder.analysis_dir.preprocess_dir.path

IO.save_data_to_disk(trimmed_trace_data, 'trimmed_trace_data', file_header, folder)
IO.save_data_to_disk(true_led_bin, 'true_led_bin', file_header, folder)

## CHECKPOINT 2

In [None]:
## Load the ROIs and image

folder = project_folder.analysis_dir.preprocess_dir.subdir('EPM_ROI')
arm_coordinates = IO.load_data_from_disk('arm_coordinates', file_header, folder)
individual_regions = IO.load_data_from_disk('individual_regions', file_header, folder)
original_regions = IO.load_data_from_disk('original_regions', file_header, folder)
background_image = IO.load_data_from_disk('background_image', file_header, folder)

folder = project_folder.analysis_dir.preprocess_dir.path
trimmed_trace_data = IO.load_data_from_disk('trimmed_trace_data', file_header, folder)
curated_cell_props = IO.load_data_from_disk('curated_cell_props', file_header, folder)
cell_names = curated_cell_props['Name']

## STEP 5: Associate coordinates with arms

In [None]:
## STEP 5A: Find the arm each coordinate is located in
# Get region for each time point and distance from occupied region

animal_coordinates = trimmed_trace_data['Coordinates']
coordinate_locations, region_indexes = EPM.get_regions(animal_coordinates, individual_regions) # What region each (x, y) is in

In [None]:
## STEP 5B (Optional): Interpolate the locations classified at "The_Void"
# Run this cell if you would like to interpolate any position that fell outside the 5 regions

void_indexes = np.where(coordinate_locations == "The_Void")[0]  # "The_Void" is the location assigned to points that fall outside of the five regions (open1, open2, closed1, closed2, center). This can occur if the animal looks over the side of the open arms
void_index_bins = EPM.find_index_bins(void_indexes)
coordinate_locations, region_indexes = EPM.replace_the_void(coordinate_locations, region_indexes, void_index_bins)

In [None]:
## STEP 5C (Optional): Get distance into each arm
# Optional: Run this cell if you would like to calculate the "distance" into the arm the animal has traveled

coordinate_pairs = list(zip(animal_coordinates, region_indexes))
distances = EPM.get_distances(individual_regions, coordinate_pairs)  # Return distance from animal -> occupied region
# distances = EPM.normalize_distance(individual_regions, coordinate_locations, distances)
# If desired, the distances can be normalized to the length of the arms. All positions become 'percentages' of the length along the arm
trimmed_trace_data.insert(0, 'Distance', distances)

In [None]:
## STEP 5D: Add animal Location to the main list of trace data

trimmed_trace_data.insert(0, 'Location', coordinate_locations)

## STEP 6: Create 'PSEUDOTRIALS'

In [None]:
## STEP 6A: Find transitions from region -> region
# Gather all visits per each region
# Calculate some statistics about our 'pseudotrials'

animal_locations = trimmed_trace_data['Location']
transitions, arm_indexes = EPM.find_region_transitions(animal_locations)

In [None]:
rng_seed = None

if SAVE_LOAD_RNG_ENTROPY:  # If we are saving/loading
    folder = project_folder.analysis_dir.preprocess_dir.path
    try:
        entropy_value = IO.load_data_from_disk('entropy_value', file_header, folder)  # If we have a saved value, load it; otherwise, returns None
        print('Found entropy file, reloading old seed!')
        rng_seed = np.random.SeedSequence(entropy=entropy_value)
    except FileNotFoundError as fnfe:
        print('Entropy file not found. Creating new seed!')
        # If no entropy value, create a new sequence and save the value
        rng_seed = np.random.SeedSequence()
        entropy_value = rng_seed.entropy
        IO.save_data_to_disk(entropy_value, 'entropy_value', file_header, folder)

In [None]:
## STEP 6B.1: Segment transitions into trials that meet the PSEUDOTRIAL_LEN_S length criteria

pseudotrials, trial_stats = EPM.get_pseudotrials(arm_indexes, transitions, PSEUDOTRIAL_LEN_S, ENDOSCOPE_FRAMERATE)

pseudotrials = EPM.subsample_pseudotrials(pseudotrials, NUM_PSEUDOTRIALS, rng_seed)
## STEP 6B.2: Print PSEUDOTRIAL Stats
pseudotrial_stats = EPM.calc_pseudotrial_stats(pseudotrials, trial_stats)

EPM.print_pseudotrial_stats(pseudotrial_stats)
EPM.save_pseudotrial_stats(pseudotrial_stats, project_folder)

In [None]:
## STEP 6C: Gather the dF/F values for each pseudotrial

pseudotrial_traces = {
    'open1': [],
    'open2': [],
    'closed1': [],
    'closed2': []
}

frames_per_pseudotrial = int(np.floor(PSEUDOTRIAL_LEN_S * ENDOSCOPE_FRAMERATE)) 

for arm in pseudotrials:
    arm_visits = pseudotrials[arm]
    
    for visit in arm_visits:
        start_index = visit['start']
        end_index = start_index + frames_per_pseudotrial
        traces = trimmed_trace_data[cell_names].iloc[start_index:end_index]
        # We first grab the columns for the cells and then grab the rows for our trial
        pseudotrial_traces[arm].append(traces)

In [None]:
## STEP 6D: Get mean dF/F values for each trial

pseudotrial_means = {
    'open1': pd.DataFrame(),
    'open2': pd.DataFrame(),
    'closed1': pd.DataFrame(),
    'closed2': pd.DataFrame(),
}

for arm in pseudotrial_means:
    for trial in pseudotrial_traces[arm]:
        pseudotrial_means[arm] = pd.concat((pseudotrial_means[arm], trial.mean(axis=0)), axis=1)
    
    pseudotrial_means[arm] = pseudotrial_means[arm].T
    pseudotrial_means[arm] = pseudotrial_means[arm].reset_index(drop=True)

## Step 6E: Save PSUEDOTRIALS

In [None]:
folder = project_folder.analysis_dir.output_dir.subdir('pseudotrials')

IO.save_data_to_disk(pseudotrials, 'pseudotrials', file_header, folder)
IO.save_data_to_disk(trial_stats, 'trial_stats', file_header, folder)
IO.save_data_to_disk(transitions, 'transitions', file_header, folder)
IO.save_data_to_disk(arm_indexes, 'arm_indexes', file_header, folder)
IO.save_data_to_disk(pseudotrial_traces, 'pseudotrial_traces', file_header, folder)
IO.save_data_to_disk(pseudotrial_means, 'pseudotrial_means', file_header, folder)

### Checkpoint 3

In [None]:
folder = project_folder.analysis_dir.output_dir.subdir('pseudotrials')

pseudotrials = IO.load_data_from_disk('pseudotrials', file_header, folder)
trial_stats = IO.load_data_from_disk('trial_stats', file_header, folder)
transitions = IO.load_data_from_disk('transitions', file_header, folder)
arm_indexes = IO.load_data_from_disk('arm_indexes', file_header, folder)
pseudotrial_traces = IO.load_data_from_disk('pseudotrial_traces', file_header, folder)
pseudotrial_means = IO.load_data_from_disk('pseudotrial_means', file_header, folder)

folder = project_folder.analysis_dir.preprocess_dir.path
curated_cell_props = IO.load_data_from_disk('curated_cell_props', file_header, folder)
cell_names = curated_cell_props['Name']

## Step 7A: auROC Analysis

In [None]:
from dewan_calcium import AUROC

groups = (['open1', 'open2'], ['closed1', 'closed2'])
AUROC_results = AUROC.pooled_EPM_auroc(pseudotrial_means, groups, num_workers=20)

## Step 7B: Save auROC output

In [None]:
folder = project_folder.analysis_dir.output_dir.subdir('AUROC')
IO.save_data_to_disk(AUROC_results, 'AUROC_results', file_header, folder)

### Checkpoint 4

In [None]:
folder = project_folder.analysis_dir.output_dir.subdir('AUROC')
AUROC_results = IO.load_data_from_disk('AUROC_results', file_header, folder)

folder = project_folder.analysis_dir.preprocess_dir.path
curated_cell_props = IO.load_data_from_disk('curated_cell_props', file_header, folder)
cell_names = curated_cell_props['Name']
trimmed_trace_data = IO.load_data_from_disk('trimmed_trace_data', file_header, folder)
folder = project_folder.analysis_dir.preprocess_dir.subdir('EPM_ROI')
background_image = IO.load_data_from_disk('background_image', file_header, folder)


## Step 8: Output and Graph Results

In [None]:
## STEP 8A: Output auROC results

auroc_output = []
for data in AUROC_results:
   
    direction_index = round(2 * (data['auroc'] - 0.5), 2)
    auroc = round(data['auroc'], 2)
    bounds = (data['lb'], data['ub'])
    significance = data['significance']

    new_row = [auroc, direction_index, bounds, significance]
    auroc_output.append(new_row)
    
auroc_output = pd.DataFrame(auroc_output, index=cell_names, columns=['auROC', 'direction_index', 'bounds', 'significant'])
folder = project_folder.analysis_dir.output_dir.path
file_name = f'{file_header}EPM_data_output.xlsx'
file_path = folder.joinpath(file_name)
auroc_output.to_excel(file_path)

In [None]:
## STEP 8B: Graph shuffle histograms and auROC histograms

coordinates = trimmed_trace_data['Coordinates'].values
line_coordinates = EPM.generate_position_lines(coordinates)
plotting.plot_auroc_distribution(AUROC_results, project_folder)
plotting.plot_auroc_shuffles(AUROC_results, project_folder)
plotting.plot_animal_track(line_coordinates, background_image, project_folder) 