# Dewan Lab EPM Analysis

## STEP 1: Always Execute! Load Libraries and User Settings
### STEP 1A: Import Libraries

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.environ['ISX'] = '0'  # Set to zero so we don't try to load the isx module

import numpy as np
import pandas as pd

from tqdm.notebook import tqdm, trange

# TODO: Figure out OASIS numpy complaint
from dewan_calcium import AUROC, plotting, deconv
from dewan_calcium.helpers import IO, parse_json, HFvFM
from dewan_calcium.helpers.project_folder import ProjectFolder

print("Importing required packages complete!")

Importing required packages complete!


### STEP 1B: User Configurables

In [3]:
animal = 'Combined_HFvFM'
date = '10-24-2024'

HF_first = True
TRIAL_DURATION_S = 300
PSEUDOTRIAL_LEN_S = 2  # 
AUROC_NUM_PSEUDOTRIALS = 20
ENDOSCOPE_FRAMERATE = 10

DECAY_TIME_S = 0.4  # Time in seconds for the decay of 10 action potentials (0.4 for GCaMP6f)
RISE_TIME_S = 0.08  # Time in seconds for the rise to peak of 10 action potentials (0.08 for GCaMP6f)
INTER_SPIKE_INTERVAL_S = 0.1 # Time in seconds that must elapse before another "spike"
PEAK_MIN_DUR_S = 0.4  # Time in seconds that must elapse for a "peak" to be considered a "spike"

### STEP 1C: Load Project Folder

In [4]:
# Create Project Folder to Gather and Hold all the File Paths
project_folder = ProjectFolder('HFvFM', combined=True, project_dir=r'C:\Projects\Test_Data\4_Combined\HFvFM')
file_header = animal + '-' + date + '-'

In [5]:
# If this is the first time the project folder has been created, move the files to the appropriate directories and then run this cell, otherwise skip this cell
project_folder.get_data()

In [6]:
combined_data = pd.read_pickle(project_folder.raw_data_dir.combined_data_path, compression={'method': 'xz'})
cell_names = combined_data.columns.get_level_values(0).values
trial_labels = np.unique(combined_data.columns.get_level_values(1).values)
#trial_labels = IO.load_data_from_disk('trial_labels', file_header, folder)
#cell_names = curated_cell_props['Name']

In [7]:
# STEP 6A: Smooth raw df/F values with OASIS
smoothing_kernel = deconv.calc_smoothing_params(ENDOSCOPE_FRAMERATE, DECAY_TIME_S, RISE_TIME_S)
smoothed_trace_data = deconv.pooled_deconvolution(combined_data, smoothing_kernel, workers=20)

  return_dicts = process_map(partial_function, iterable, max_workers=workers)


  0%|          | 0/1418 [00:00<?, ?it/s]

In [10]:
# STEP 6B: Identify transients
z_scored_data = deconv.z_score_data(smoothed_trace_data, cell_names)
transient_indexes = deconv.find_peaks(z_scored_data, cell_names, ENDOSCOPE_FRAMERATE, INTER_SPIKE_INTERVAL_S, PEAK_MIN_DUR_S, )

AxisError: axis 0 is out of bounds for array of dimension 0

In [9]:
IO.save_data_to_disk(smoothed_trace_data, 'combined_smoothed_data', file_header, project_folder.analysis_dir.output_dir.path)

Combined_HFvFM-10-24-2024-combined_smoothed_data has been saved!


In [None]:
# STEP 6C: Get Stats for Each Cell
for cell in cell_names:
    cell_data = transient_indexes[cell]
    
    stats = HFvFM.calc_transient_stats(cell_data, trial_labels)
    
    cell_data['stats'] = stats
    transient_indexes[cell] = cell_data

In [None]:
# STEPS 6D: Stack Trials from each Cell
stacked_indices = HFvFM.stack_trial_indices(trial_labels, cell_names, transient_indexes)

In [None]:
# STEP 6E: Offset each trial to match its trial
from functools import partial

trial_length_frames = ENDOSCOPE_FRAMERATE * TRIAL_DURATION_S

for i, trial in enumerate(trial_labels):
    if i > 0:
        offset_value = i * trial_length_frames  # Max trial length
        print(f'{trial}: {offset_value}')
        
        temp_func = partial(HFvFM.add_if_num, i*offset_value)
        trial_dataframe = stacked_indices[trial]
        stacked_indices[trial] = trial_dataframe.map(temp_func)
        
    stacked_indices[trial] = stacked_indices[trial].T
    stacked_indices[trial].index=cell_names

In [None]:
# STEP 6F: Combine Stats into a DataFrame
cell_stats = {}
for cell in cell_names:
    cell_stats[cell] = transient_indexes[cell]['stats']  
cell_stats = pd.DataFrame(cell_stats).T

In [None]:
# STEP 6G: Combine Stats, Transients, and Remove Spaces

super_mega_ultra_df = pd.DataFrame()
for trial in trial_labels: 
    trial_df = stacked_indices[trial]
    num_columns = len(trial_df.columns)
    new_columns = [trial for _ in range(num_columns)]
    trial_df.columns = new_columns
    super_mega_ultra_df = pd.concat([super_mega_ultra_df, trial_df], axis=1)
    
super_mega_ultra_df = super_mega_ultra_df.apply(lambda row: pd.Series(row.dropna().values), axis=1)  # Voodoo to remove NaN values
super_mega_ultra_df = pd.concat((cell_stats, super_mega_ultra_df), axis=1)

file_name = f'{file_header}TransientLocations.xlsx'
path = project_folder.analysis_dir.output_dir.path.joinpath(file_name)

super_mega_ultra_df.to_excel(path)

In [None]:
# STEP 6H: Save deconv data
folder = project_folder.analysis_dir.output_dir.subdir('deconv')

IO.save_data_to_disk(smoothed_trace_data, 'smoothed_trace_data', file_header, folder)
IO.save_data_to_disk(z_scored_data, 'z_scored_data', file_header, folder)
IO.save_data_to_disk(transient_indexes, 'transient_indexes', file_header, folder)

## Checkpoint 3

In [None]:
folder = project_folder.analysis_dir.preprocess_dir.path
curated_cell_props = IO.load_data_from_disk('curated_cell_props', file_header, folder)
trial_labels = IO.load_data_from_disk('trial_labels', file_header, folder)
cell_names = curated_cell_props['Name']

folder = project_folder.analysis_dir.combined_dir.path
combined_data = IO.load_data_from_disk('combined_data', file_header, folder)

folder = project_folder.analysis_dir.output_dir.subdir('deconv')
smoothed_trace_data = IO.load_data_from_disk('smoothed_trace_data', file_header, folder)
z_scored_data = IO.load_data_from_disk('z_scored_data', file_header, folder)
transient_indexes = IO.load_data_from_disk('transient_indexes', file_header, folder)

## Step 7: Pseudotrial creation and auROC Analysis

In [None]:
# STEP 7A: BASELINE SHIFT THE DATA SO THERE ARE NO NEGATIVE NUMBERS
min_value = abs(combined_data.min().min()) # Get minimum for each row, then the minimum of those values
combined_data_shift = combined_data.add(min_value)

In [None]:
# STEP 7B: Get Pseudotrials and stack the data
pseudotrial_dff_per_cell = HFvFM.get_dff_for_pseudotrials(combined_data_shift, cell_names, trial_labels, PSEUDOTRIAL_LEN_S, ENDOSCOPE_FRAMERATE)
stacked_pseudotrials = HFvFM.stack_trial_indices(trial_labels, cell_names, pseudotrial_dff_per_cell)

In [None]:
# STEP 7C: Get mean dF/F value for each trial
pseudotrial_means = {}

for trial in trial_labels:
    trial_data = stacked_pseudotrials[trial].T
    trial_means = trial_data.map(np.mean)
    trial_means.index = cell_names
    pseudotrial_means[trial] = trial_means

In [None]:
# STEP 7D: Run auROC on below groups and save data
groups = [['HF-1', 'HF-2'], ['FM-1', 'FM-2']]
auroc_returns = AUROC.pooled_HFFM_auroc(pseudotrial_means, groups, num_workers=20)

folder = project_folder.analysis_dir.output_dir.subdir('AUROC')
IO.save_data_to_disk(auroc_returns, 'auroc_returns', file_header, folder)

## CHECKPOINT 4

In [None]:
folder = project_folder.analysis_dir.output_dir.subdir('AUROC')
auroc_returns = IO.load_data_from_disk('auroc_returns', file_header, folder)

## Step 8: Create figures and save data

In [None]:
# STEP 8A: Plot auROC Figures
plotting.plot_auroc_distribution(auroc_returns, project_folder)
plotting.plot_auroc_shuffles(auroc_returns, project_folder)

In [None]:
## STEP 8B: Restructure returned items into a dataframe and save
auroc_return_dict = {}

for cell_data in auroc_returns:
    cell_name = cell_data['name']
    cell_data.pop('name', None)
    
    auroc_val = round(cell_data['auroc'], 3)
    direction_index = round(2 * (auroc_val - 0.5), 2)
    
    cell_data['auroc'] = auroc_val
    cell_data['direction_index'] = direction_index
    
    auroc_return_dict[cell_name] = cell_data
    
HFvFM_df = pd.DataFrame(auroc_return_dict).T
folder = project_folder.analysis_dir.output_dir.path
file_name = f'{file_header}HFvFM_data_output.xlsx'
file_path = folder.joinpath(file_name)
HFvFM_df.to_excel(file_path)