
# Dewan Lab Image Analysis

## STEP 1: Always Execute! Load Libraries and User Settings

### STEP 1A: Import Libraries

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm import tqdm, trange
from dewan_calcium.helpers import data_stores, DewanIOhandler, DewanJSON
from dewan_calcium.helpers.project_folder import ProjectFolder
from dewan_calcium import AUROC, plotting, DewanStats

from dewan_manual_curation import dewan_manual_curation

pd.options.mode.copy_on_write = "warn"

### STEP 1B: User Configurables

In [None]:
animal = 'ANIMAL_GOES_HERE'
date = 'DATE_GOES_HERE'

pre_trial_time = 3.5  # Imaging time before the final valve opens
post_trial_time = 3.5  # Imaging time after final valve closes

# Configurables for AUROC
baseline_duration = 2  # number of seconds before the FV turns on
response_duration = 2  # number of seconds after the FV turns off
plot_figures = True

In [None]:
# Create Project Folder to Gather and Hold all the File Paths

test_path = Path("/mnt/dev/Test_Data/Odor/VGLUT-20")  # On Fedora
# test_path = Path("C:\\Projects\\Test_Data\\VGLUT-20")  # On Desktop

project_folder = ProjectFolder(project_dir=test_path)
file_header = animal + '-' + date + '-'

In [None]:
# If this is the first time the project folder has been created,
# move the files to the appropriate directories and then run this cell, otherwise skip this cell

project_folder.get_data()

In [None]:
# Get settings from imaging session and display them for the user

gain, LED_power, focal_planes = DewanJSON.get_session_settings(project_folder.raw_data_dir.session_json_path)

print(f'Recording Gain: {gain}')
print(f'LED_power: {LED_power}')
print(f'LED_power: {focal_planes}')

## STEP 2: Data Import and Preprocessing

### 2A: Import and pre-process the raw data

In [None]:
#STEP 2A.2: LOAD INSCOPIX DATA

cell_trace_data = pd.read_csv(project_folder.inscopix_dir.cell_trace_path, engine='pyarrow')
GPIO_data = pd.read_csv(project_folder.inscopix_dir.GPIO_path, header=0, engine='pyarrow')
all_cell_props = pd.read_csv(project_folder.inscopix_dir.props_path, header=0, engine='pyarrow')
odor_data = pd.read_excel(project_folder.raw_data_dir.odorlist_path, usecols=[0], header=None, engine='openpyxl') # usecols=[0] because we only care about the first column which has the odornames, row number == trial number
cell_outlines = DewanJSON.get_outline_coordinates(project_folder.inscopix_dir.contours_path)  # TODO: remove cell keys from the json function

In [None]:
# STEP 2A.2: PREPROCESSING

# STEP 2A.2.1: Drop the first row which contains all 'undecided' labels which is the Inscopix default label.
cell_trace_data = cell_trace_data.drop([0])

# STEP 2A.2.2: Force all dF/F values to be numbers and round times to 2 decimal places
cell_trace_data = cell_trace_data.apply(pd.to_numeric, errors='coerce')

# Set the times as the index so the listed data is all dF/F values
cell_trace_data[cell_trace_data.columns[0]] = cell_trace_data[cell_trace_data.columns[0]].round(2)
cell_trace_data = cell_trace_data.set_index(cell_trace_data.columns[0]) 

# STEP 2A.2.3: Remove spaces from column names and contents
cell_trace_data.columns = cell_trace_data.columns.str.replace(" ", "")
GPIO_data.columns = GPIO_data.columns.str.replace(" ", "")
GPIO_data['ChannelName'] = GPIO_data['ChannelName'].str.replace(" ", "")

# STEP 2A.2.4: Reduce properties to only include the cells with only one component
all_cell_props = all_cell_props[all_cell_props['NumComponents']==1]  # We only want cells that have one component
all_cell_props = all_cell_props.drop(columns='Status').reset_index(drop=True)
cell_names = all_cell_props['Name'].values

# STEP 2A.2.5: PARSE GPIO DATA
sniff_data = GPIO_data[GPIO_data['ChannelName'] == "GPIO-1"].reset_index(drop=True)
FV_data = GPIO_data[GPIO_data['ChannelName'] == "GPIO-2"].reset_index(drop=True)

# STEP 2A.2.6: Get a list of all the unique odors to reuse for consistency
odor_data = pd.Series(odor_data[0], name='Odors')
odor_list = odor_data.unique().astype(str)

# OPTIONAL UNUSED DATA
# running_data = GPIO_data[GPIO_data['ChannelName'] == "GPIO-3"]  # Running Wheel Data
# lick_data = GPIO_data[GPIO_data['ChannelName'] == "GPIO-4"]  # Lick Data


In [None]:
## Run if someone forgot to put delimiters in the odor names

new_odor_data = []

for each in odor_data:
    try:
        first_val = int(each[0])
        each = '-'.join([each[0], each[1:]])
    except ValueError:
        pass
    
    new_odor_data.append(each)
    odor_data = pd.Series(new_odor_data, name='Odors')    
    
odor_list = odor_data.unique().astype(str)

### STEP 2B: Manual Curation

In [None]:
# STEP 2B.2: Run ManualCuration GUI
curated_cells = dewan_manual_curation.launch_gui(project_folder_override=project_folder, cell_trace_data_override=cell_trace_data, cell_contours_override=cell_outlines, cell_names_override=cell_names)
if curated_cells is None:
    print('Error, no good cells selected!')

### STEP 2C: Apply Manual Curation Results and Additional Preprocessing

In [None]:
# STEP 2C.1: Filter all data by the GoodCells identified in ManualCuration
curated_cell_props = all_cell_props[all_cell_props['Name'].isin(curated_cells)].reset_index(drop=True)
curated_trace_data = cell_trace_data[curated_cells]
cell_names = curated_cell_props['Name']

### STEP 2D: Pickle and Save all preprocessed data

In [None]:
# Pickle the reorganized CellTraceData incase its needed later
# Saves Cell Traces, GPIO, Odor List, Sniff, FV data, Good Cell Properties, Good Cells, and the labeled max projection
# Once these have been saved, they don't need to be re-run on the same data again unless the data itself is changed

folder = project_folder.analysis_dir.preprocess_dir.path

DewanIOhandler.save_data_to_disk(curated_trace_data, 'curated_trace_data', file_header, folder)
DewanIOhandler.save_data_to_disk(GPIO_data, 'GPIO_data', file_header, folder)
DewanIOhandler.save_data_to_disk(odor_data, 'odor_data', file_header, folder)
DewanIOhandler.save_data_to_disk(odor_list, 'odor_list', file_header, folder)
DewanIOhandler.save_data_to_disk(FV_data, 'FV_data', file_header, folder)
DewanIOhandler.save_data_to_disk(curated_cell_props, 'curated_cell_props', file_header, folder)
DewanIOhandler.save_data_to_disk(sniff_data, 'sniff_table', file_header, folder)

## Checkpoint 1: Load Preprocessed Data

In [None]:
# Opens the saved pickle files.  If the files have already been saved, code can be re-run
# starting from this point

folder = project_folder.analysis_dir.preprocess_dir.path


curated_trace_data = DewanIOhandler.load_data_from_disk('curated_trace_data', file_header, folder)
GPIO_data = DewanIOhandler.load_data_from_disk('GPIO_data', file_header, folder)
odor_data = DewanIOhandler.load_data_from_disk('odor_data', file_header, folder)
odor_list = DewanIOhandler.load_data_from_disk('odor_list', file_header, folder)
FV_data = DewanIOhandler.load_data_from_disk('FV_data', file_header, folder)
curated_cell_props = DewanIOhandler.load_data_from_disk('curated_cell_props', file_header, folder)
sniff_data = DewanIOhandler.load_data_from_disk('sniff_table', file_header, folder)
cell_names = curated_cell_props['Name']  # List of cells, referenced periodically

## STEP 3: Indexing and Aligning FV/Sniff/CellTrace Data

In [None]:
# STEP 3A: Parses the final valve data to identify when the final valve is open vs when it is closed based on TTL pulse from Arduino.
FV_values = FV_data['Value'].astype(float).values # Get FV Values
num_values = len(FV_values)
valve_status = 0
FV_on_indexes = []
FV_off_indexes = []
for i in trange((num_values - 1), desc="Processing: "):
    valve_val_diff = FV_values[i + 1] - FV_values[i]

    if valve_status == 0:    # Start with valve off
        if valve_val_diff > 10000: # If the difference is a very large positive number, the valve opened
            FV_on_indexes.append(i + 1)
            valve_status = 1 # Set valve state to open
    else:
        if valve_val_diff < -10000: # If the difference is a very large negative number, the valve closed
            FV_off_indexes.append(i)
            valve_status = 0 # Set valve state to closed

FV_indexes = pd.DataFrame(zip(FV_on_indexes, FV_off_indexes), columns=['On', 'Off'])

In [None]:
# STEP 3B.1: Find trial start and end times with the pre/post trial offsets
time_points = FV_data['Time(s)']

FV_on_times = time_points.iloc[FV_indexes['On']]
FV_off_times = time_points.iloc[FV_indexes['Off']]

trial_start_times = FV_on_times.subtract(pre_trial_time)
trial_end_times = FV_off_times.add(post_trial_time)
FV_times = pd.DataFrame(zip(FV_on_times, FV_off_times), columns=['On', 'Off'])
trial_times = pd.DataFrame(zip(trial_start_times, trial_end_times), columns=['Start', 'End'])

In [None]:
# STEP 3B.2: Find the start/end indexes for the CellTrace data based on the closest time points for each trial
# NOTE: Needed because the sample rate of the GPIO and the Endoscope are different, so the time points do not always perfectly line up
# Occasionally, you will get trials that are 1 frame/sample longer/shorter than each other due to this mismatch
cell_trace_start_indices = []
cell_trace_stop_indices = []

time_points = curated_trace_data.index.values

for i, each in enumerate(tqdm(trial_times['Start'], desc="Trial: ")):
    if time_points[-1] < trial_times['End'].iloc[-1] and i == len(trial_times['Start'])-1:
        # This is an edge case for when the last trial got cut off early or the experiment crashed
        # It checks to see if the EndTime occurred after the last available time point
        continue
        
    cell_trace_start_indices.append(np.where(time_points <= each)[0][-1]) # Find first value less than/= the start time. We would always rather start 1 frame early than late
    cell_trace_stop_indices.append(np.where(time_points >= trial_times['End'].iloc[i])[0][0]) # Find the first value greater than/= the end time. We would always rather stop 1 frame late than early

cell_trace_indices = pd.DataFrame(zip(cell_trace_start_indices, cell_trace_stop_indices), columns = ['Start', 'Stop'])

In [None]:
# STEP 3C: Find the start/end indexes for the SNIFF data based on the closest time points for each trial
# Compiles data for sniffing from good trials
sniff_start_indices = []
sniff_end_indices = []

time_points = sniff_data['Time(s)']

for i in trange(len(trial_times), desc="Sniff Trial: "):
    if time_points.iloc[-1] < trial_times['End'].iloc[-1] and i == len(trial_times['Start'])-1:
        # This is an edge case for when the last trial got cut off early or the experiment crashed
        # It checks to see if the EndTime occurred after the last available time point
        continue
    sniff_start_indices.append(np.where(time_points <= trial_times['Start'].iloc[i])[0][-1]) # Find first index less than/= the start time. We would always rather start 1 frame early than late
    sniff_end_indices.append(np.where(time_points >= trial_times['End'].iloc[i])[0][0]) # Find the first index greater than/= the end time. We would always rather stop 1 frame late than early
    
sniff_indices = pd.DataFrame(zip(sniff_start_indices, sniff_end_indices), columns=['Start', 'End'])

In [None]:
# STEP 3D: TRIM ODOR LIST
num_trials = len(FV_indexes)
odor_data = odor_data.iloc[:num_trials] # If any trials on the end are cut off, we need to trim them from the list

# TODO: STEP 3E: SAVE SNIFF DATA INTO SEPARATE FILE, CURRENTLY BROKEN, DON'T USE

## STEP 4: Gather all cell v. time v. trial data into single array

In [None]:
# STEP 4A: COMBINE ALL OF THE CELL TRACE DATA INTO A CELL X TRIAL X FRAMES ARRAY
combined_data = []
num_cells = len(cell_names)
odor_labels = odor_data.astype(str)  # All the odors as strings
for cell in tqdm(cell_names, desc="Cell: "): # Loop through each cell
    cell_data = []
    
    for indices in cell_trace_indices[['Start', 'Stop']].values: # Loop through trials
        start_index, stop_index = indices
        trial_data = curated_trace_data[cell].iloc[start_index:stop_index].reset_index(drop=True)
        cell_data.append(trial_data)
    cell_data = pd.DataFrame(cell_data, index=odor_labels).T  # Transpose dataframe so columns are trials and rows are frames        
    cell_data = cell_data.reset_index(drop=True)    
    combined_data.append(cell_data)

In [None]:
# STEP 4B: CROP THE ARRAY TO THE SHORTEST TRIAL TO GET RID OF TRAILING ZEROS
combined_data = pd.concat(combined_data, axis=1, keys=cell_names, names=['Cells', 'Frames'])
combined_data = combined_data.dropna(axis=0)

In [None]:
# STEP 4C: BASELINE SHIFT THE DATA SO THERE ARE NO NEGATIVE NUMBERS
min_value = abs(combined_data.min().min()) # Get minimum for each row, then the minimum of those values
combined_data_shift = combined_data.add(min_value)

In [None]:
# STEP 4D: GET TIMESTAMPS FOR EACH TRIAL
# Note: There are two lists of timestamps.
# List 1) FinalValveTimeMap ranges from preTrialTime -> Final Valve On Time -> postTrialTime then the Final Valve On Time is subtracted from the whole list to set the FVOnTime to zero (e.g -3.5 -> 0 -> 3.5)
# List 2) All the raw time values in Unix Time Form (Inscopix time output)

FV_timestamps = []
unix_timestamps = []

trace_times = curated_trace_data.index.values

for trial in trange(num_trials, desc="Trial: "): # Loop through each trial
    start_index, end_index = cell_trace_indices.iloc[trial]
    timestamps = trace_times[start_index:end_index].astype(float)

    zero_time = FV_times['On'].iloc[trial]
    FV_time = timestamps - zero_time
    FV_timestamps.append(FV_time)
    unix_timestamps.append(timestamps)
    
FV_timestamps = pd.DataFrame(FV_timestamps)
unix_timestamps = pd.DataFrame(unix_timestamps)

# STEP 4E: CROP THE ARRAYs TO THE SHORTEST TRIAL TO GET RID OF TRAILING ZEROS
FV_timestamps = FV_timestamps.dropna(axis=1)
unix_timestamps = unix_timestamps.dropna(axis=1)

# STEP 5E: Transpose and add odor names
FV_timestamps = FV_timestamps.T
FV_timestamps.columns = odor_labels
unix_timestamps = unix_timestamps.T
unix_timestamps.columns = odor_labels

## STEP 5: SAVE THE COMBINED DATA
##### The combined data file contains a Table of Contents with the contour information for each cell, a TimeStamp map (rows -> trial; columns -> frames), and then a tab for each cell with the rows labeled with the odor for that particular trial, and columns are frames.

In [None]:
# STEP 5A: CREATE TABLE OF CONTENTS FOR CELL DESCRIPTORS
column_names = ['Name', 'CentroidX', 'CentroidY', 'NumComponents', 'Size']
toc = curated_cell_props[column_names]
toc = toc.set_index('Name', drop=True)

# STEP 5B: SET FILE PATH AND CREATE EXCEL-SHEET WRITER
file_name = f'{file_header}CombinedData.xlsx'
path = project_folder.analysis_dir.combined_dir.path.joinpath(file_name)
writer = pd.ExcelWriter(path, engine='xlsxwriter')

# STEP 5C: WRITE TABLE OF CONTENTS
toc.to_excel(writer, sheet_name='TOC')
# STEP 5D: WRITE FINAL VALVE TIME MAP FOR ALL TRIALS
time_columns = np.arange(FV_timestamps.shape[1]) # Columns are 0 -> number of frames/trial
time_indexes = np.arange(1, FV_timestamps.shape[0] + 1) # Rows are 1 -> number of trials
time_map_sheet = pd.DataFrame(FV_timestamps)
time_map_sheet.to_excel(writer, sheet_name='TimeMap')
# STEP 5E: WRITE ALL CELL TRACE DATA
for cell in tqdm(cell_names, desc="Writing Cell: "):
    _data = combined_data_shift[cell]
    _data.to_excel(writer, sheet_name=f'Cell {cell}')

writer.close()

# STEP 5F: ORGANIZE AND WRITE SNIFF DATA TO FILE TODO: Keep or discard?
# headers = []
# for i in range(len(SniffData[0,:])):
#     headers.append('Trial ' + str(i))
#     SniffDF = pd.DataFrame(SniffData)
#     sniff_path = f'./CombinedData/{file_header}/SniffData.xlsx'
#     SniffDF.to_excel(sniff_path, sheet_name = 'Data', header=headers)

### STEP 5H: Save information needed for AUROC
##### The combined data excel sheet is saved to __./ImagingAnalysis/CombinedData/__
##### Any data needed for the AUROC analysis that has not been saved will be saved in __./ImagingAnalysis/AUROCImports__

In [None]:
folder = project_folder.analysis_dir.combined_dir.path
DewanIOhandler.save_data_to_disk(combined_data, 'combined_data', file_header, folder)
DewanIOhandler.save_data_to_disk(combined_data_shift, 'combined_data_shifted', file_header, folder)

folder = project_folder.analysis_dir.preprocess_dir.path
DewanIOhandler.save_data_to_disk(FV_indexes, 'FV_indexes', file_header, folder)
DewanIOhandler.save_data_to_disk(unix_timestamps, 'unix_timestamps', file_header, folder)
DewanIOhandler.save_data_to_disk(FV_timestamps, 'FV_timestamps', file_header, folder)

### Checkpoint 2: Load Data for AUROC

In [None]:
folder = project_folder.analysis_dir.combined_dir.path
combined_data_shift = DewanIOhandler.load_data_from_disk('combined_data_shifted', file_header, folder)

folder = project_folder.analysis_dir.preprocess_dir.path
FV_data = DewanIOhandler.load_data_from_disk('FV_data', file_header, folder)
FV_indexes =  DewanIOhandler.load_data_from_disk('FV_indexes', file_header, folder)
unix_timestamps = DewanIOhandler.load_data_from_disk('unix_timestamps', file_header, folder)
FV_timestamps = DewanIOhandler.load_data_from_disk('FV_timestamps', file_header, folder)
odor_data = DewanIOhandler.load_data_from_disk('odor_data', file_header, folder)
odor_list = DewanIOhandler.load_data_from_disk('odor_list', file_header, folder)

## STEP 6: AUROC
### STEP 6A: RUN AUROC

In [None]:
cell_names = cell_names[:1]  ## For testing
combined_data_shift = combined_data_shift[cell_names]

In [None]:
# STEP 6A.1: RUN AUROC FOR ON-TIME CELLS
# Note: On time cells are those that respond during the stimulus window (0s-2s)
on_time_AUROC_return = AUROC.new_pooled_auroc(combined_data_shift, FV_timestamps, baseline_duration, 20, False) # This takes a long time!

# # STEP 6A.2: RUN AUROC FOR LATENT CELLS
# Note: Latent cells are those that respond immediately after the stimulus window (2s-4s)
# latent_AUROC_return = AUROC.new_pooled_auroc(combined_data_shift, FV_timestamps, baseline_duration, 20, True) # This takes a long time!

### STEP 6B: PARSE AUROC OUTPUT

In [None]:
ontime_dataframes = [pd.DataFrame(return_dict).set_index(odor_list) for return_dict in on_time_AUROC_return]
ontime_AUROC_data = pd.concat(ontime_dataframes, axis = 1, keys=cell_names[:1])

In [None]:
latent_dataframes = [pd.DataFrame(return_dict).set_index(odor_list) for return_dict in latent_AUROC_return]
latent_AUROC_data = pd.concat(latent_dataframes, axis = 1, keys=cell_names)

In [None]:
ontime_significance_table = pd.DataFrame()
# latent_significance_table = pd.DataFrame()

for cell in cell_names:
    ontime_cell_df = ontime_AUROC_data[cell]
    ontime_significance_data = ontime_cell_df['significance_chart']
    ontime_significance_table = pd.concat([ontime_significance_table, ontime_significance_data], axis=1)
    
    # latent_cell_df = ontime_AUROC_data[cell]
    # latent_significance_data = latent_cell_df['Significance']
    # latent_significance_table.append(latent_significance_data)
    
ontime_significance_table.columns=cell_names
# latent_significance_table.columns=cell_names

### STEP 6C: Save AUROC Output

In [None]:
# STEP 6C.1: SAVE SIGNIFICANCE TABLE TO XLSX
folder = project_folder.analysis_dir.output_dir.path

ontime_file_name = f'{file_header}ontime_significance_table.xlsx'
ontime_file_path = folder.joinpath(ontime_file_name)
ontime_significance_table.to_excel(ontime_file_path)

# latent_file_name = f'{file_header}latent_significance_table.xlsx'
# latent_file_path = folder.joinpath(latent_file_name)
# latent_significance_table.to_excel(latent_file_path)

# STEP 6C.2: PICKLE DATA
DewanIOhandler.save_data_to_disk(ontime_AUROC_data, 'ontime_AUROC_data', file_header, folder)
DewanIOhandler.save_data_to_disk(ontime_significance_table, 'ontime_significance_table', file_header, folder)
# DewanIOhandler.save_data_to_disk(latent_AUROC_data, 'latent_AUROC_data', file_header, folder)
# DewanIOhandler.save_data_to_disk(latent_significance_table, 'latent_significance_table', file_header, folder)

### Checkpoint 3: Load Data for Plotting

In [None]:
folder = project_folder.analysis_dir.output_dir.path

ontime_AUROC_data = DewanIOhandler.load_data_from_disk('ontime_AUROC_data', file_header, folder)
ontime_significance_table = DewanIOhandler.load_data_from_disk('ontime_significance_table', file_header, folder)
# latent_AUROC_data = DewanIOhandler.load_data_from_disk('latent_AUROC_data', file_header, folder)
# latent_significance_table = DewanIOhandler.load_data_from_disk('latent_significance_table', file_header, folder)

folder = project_folder.analysis_dir.preprocess_dir.path
odor_data = DewanIOhandler.load_data_from_disk('odor_data', file_header, folder)

## STEP 7: Plotting

In [None]:
# STEP 7A: Plot AUROC Distributions (Optional)
plotting.plot_auroc_distributions(ontime_AUROC_data, odor_data, project_folder)
# plotting.plot_auroc_distributions(latent_AUROC_data, odor_data, project_folder)

In [None]:
# STEP 7B: Plot Cell v Odor Significance Matricies 
# Note: One Optional Argument: 1) latent_cells_only
# False by default, pass True to create matrix for latent cells
# DewanPlotting.plot_significance_matricies(onTimePlottingData) # OnTime
# DewanPlotting.plot_significance_matricies(latentPlottingData, True) # Latent

In [None]:
# STEP 7C: Plot Significant Cell Traces
# Plot significant cells v odors
# Note: two optional arguemnts 1) latent_cells_only 2) plot_all
# Both are False by default; example lines for plotting all cells are included below
DewanPlotting.pooled_cell_plotting(onTimePlottingData)
DewanPlotting.pooled_cell_plotting(latentPlottingData, True)

# Uncomment to plot All Cells
# DewanPlotting.plot_cells(onTimePlottingData, num_workers = 8, latent_cells_only = False, plot_all = True) 
# DewanPlotting.plot_cells(latentPlottingData, num_workers = 8, latent_cells_only = True, plot_all = True) 

# Note 2: Parameter names are included for clarity, but only the boolean values are required when running the function

In [None]:
# STEP 7D: Plot Trial Variances
# Creates vertical scatter plots showing the change in flourescence for all trials for each cell-odor pairing
trialScatterData = DewanDataStore.AUROCdataStore(Combined_Baseline_Shift_Data, CellList, OdorData, FV_data, file_header, FinalValveOnIndex, UnixTimeArray, baseline_duration, response_duration, False)

# Note: One Optional Argument: 1) latent_cells_only
# False by default, pass True to create plots for latent cells
DewanPlotting.plot_trial_variances(trialScatterData, onTimeAUROCSignificanceTable)
DewanPlotting.plot_trial_variances(trialScatterData, onTimeAUROCSignificanceTable, True)

## STEP 8. Statistics
### Checkpoint 4: Load Data for Statistics

In [None]:
folder = ['ImagingAnalysis', 'AUROCImports']

Combined_Baseline_Shift_Data = DewanIOhandler.load_data_from_disk('Combined_Baseline_Shift_Data', file_header, folder)
FinalValveOnIndex = DewanIOhandler.load_data_from_disk('FVonIdx', file_header, folder)
UnixTimeArray = DewanIOhandler.load_data_from_disk('UnixTimeArray', file_header, folder)
OdorData = DewanIOhandler.load_data_from_disk('OdorData', file_header, folder)

folder = ['ImagingAnalysis', 'AUROCData']
onTimeAUROCSignificanceTable = DewanIOhandler.load_data_from_disk('onTimeAUROCSignificanceTable', file_header, folder)

folder = ['ImagingAnalysis', 'PreProcessedData']

CellList = DewanIOhandler.load_data_from_disk('CellList', file_header, folder)
FVData = DewanIOhandler.load_data_from_disk('FV_Data', file_header, folder)
GoodCellProps = DewanIOhandler.load_data_from_disk('GoodCellProperties', file_header, folder)

### STEP 8A: Chemotopic Correlations
#### Figure plotting pairwise distance correlation (y axis) v. physical cell-cell distance (x-axis)

In [None]:
#8A.1 Remove Buzzer and MO from odors
buzzer_index = np.where(OdorData == 'Buzzer')[0]
#mo_index = np.where(OdorData == 'MO')[0]
#indices_to_delete = np.hstack((buzzer_index, mo_index))

StatsOdorData = np.delete(OdorData, buzzer_index) # Only remove buzzer cells

significant_ontime_cells = np.unique(np.nonzero(onTimeAUROCSignificanceTable > 0)[0])  # Get indexes of only the cells that had >= 1 significant response
stats_data = DewanDataStore.AUROCdataStore(Combined_Baseline_Shift_Data, CellList, StatsOdorData, FV_Data, file_header, FinalValveOnIndex, UnixTimeArray, baseline_duration, response_duration, False)

#8A.2 Gather trial-averaged odor responses per cell
trial_averaged_responses_matrix = DewanStats.trial_averaged_odor_responses(stats_data, significant_ontime_cells)

#8A.3 Z-score trial-averaged responses and calculated pairwise distances
odor_pairwise_distances, cell_pairwise_distances = DewanStats.calculate_pairwise_distances(trial_averaged_responses_matrix, stats_data.unique_odors)


#8A.4 Plot correlation matricies
DewanPlotting.pairwise_correlation_distances(odor_pairwise_distances, cell_pairwise_distances, stats_data.Cell_List[significant_ontime_cells], stats_data.unique_odors)


#8A.5 Convert Averaged Responses to PD Dataframe
trial_averaged_responses_matrix = pd.DataFrame(trial_averaged_responses_matrix, index=significant_ontime_cells, columns=stats_data.unique_odors) # Attach odor names to columns for cross-experiment alignment

### STEP 8B: Gather cell-cell physical distances

In [None]:
#8B.1: Get centroids for significant cells
Centroids = GoodCellProps[['CentroidX', 'CentroidY']]
Centroids = Centroids.iloc[significant_ontime_cells, :]

unique_distance_v_correlation = DewanStats.cell_v_correlation(Centroids, cell_pairwise_distances) 

### STEP 8C: Plot distance v correlation

In [None]:
DewanPlotting.plot_distance_v_correlation(unique_distance_v_correlation) #TODO: Save Figures Properly

### STEP 8D: Save Combined Distance v. Correlation Data

In [None]:
folder = ['ImagingAnalysis', 'CombinedData']

DewanIOhandler.save_data_to_disk(
    trial_averaged_responses_matrix, 'trial_averaged_responses_matrix', file_header, folder)

DewanIOhandler.save_data_to_disk(
    unique_distance_v_correlation, 'unique_distance_v_correlation', file_header, folder)

## STEP 9: Lifetime and Population Sparseness
### STEP 9A: Remove MO and Buzzer Cells, Get Significant Cells, Gather & Average Trial Data

In [None]:
cells_lifetime_sparseness = []

sparseness_data = DewanDataStore.AUROCdataStore(Combined_Baseline_Shift_Data, CellList, OdorData, FV_Data, file_header, FinalValveOnIndex, UnixTimeArray, baseline_duration, response_duration, False)

significant_ontime_cells = np.unique(np.nonzero(onTimeAUROCSignificanceTable > 1)[0])  # Get indexes of only the cells that had >= 1 significant positive response
zeroed_trial_averaged_responses_matrix = trial_averaged_responses_matrix.copy()
zeroed_trial_averaged_responses_matrix[zeroed_trial_averaged_responses_matrix < 0] = 0
zeroed_trial_averaged_responses_matrix = zeroed_trial_averaged_responses_matrix.loc[significant_ontime_cells]

### STEP 9B: Calculate Lifetime and Population Sparseness

In [None]:
lifetime_sparseness_DF = DewanStats.lifetime_sparseness(zeroed_trial_averaged_responses_matrix, significant_ontime_cells, stats_data.Cell_List)
population_sparseness_DF = DewanStats.population_sparseness(zeroed_trial_averaged_responses_matrix, significant_ontime_cells, stats_data.unique_odors)

### STEP 9C: Save Spraseness Data

In [None]:
folder = ['ImagingAnalysis', 'Statistics']
folder_path = Path(*folder)

lifetime_sparseness_excel_path = folder_path.joinpath(f'{file_header}lifetime_sparseness.xlsx')
population_sparseness_excel_path = folder_path.joinpath(f'{file_header}population_sparseness.xlsx')

lifetime_sparseness_DF.to_excel(lifetime_sparseness_excel_path, na_rep='NaN')
population_sparseness_DF.to_excel(population_sparseness_excel_path, na_rep='NaN')
DewanIOhandler.save_data_to_disk(lifetime_sparseness_DF, 'lifetime_sparseness_DF', file_header, folder)
DewanIOhandler.save_data_to_disk(population_sparseness_DF, 'population_sparseness_DF', file_header, folder)