# Dewan Lab EPM Analysis
## 0: Run once to create all needed directories at beginning of a project

In [18]:
from dewan_calcium.helpers import DewanIOhandler
DewanIOhandler.create_project_framework('EPM')

## STEP 1: Always Execute! Load Libraries and User Settings
### STEP 1A: Import Libraries

In [None]:
%load_ext autoreload
%autoreload 2

In [2]:
import cv2
import numpy as np
import pandas as pd
from pathlib import Path

from dewan_calcium import DewanDeconv, DewanManualCuration
from dewan_calcium.helpers import DewanEPM, DewanIOhandler, DewanJSON

### STEP 1B: User Configurables

In [23]:
animal = 'ANIMAL_GOES_HERE'
date = 'DATE_GOES_HERE'

# TODO: Get these configurables from the JSON file using the JSON Module

# Settings from Inscopix box
LED_power = 1
GAIN = 2.2
FOCUS = 250

EXPERIMENT_TIME = 10  # Time in minutes for the experiment

fileHeader = animal + '-' + date + '-'

In [4]:
test_path = Path('R:\\2_Inscopix\\1_DTT\\3_EPM\VGLUT-20')

DewanIOhandler.create_project_framework('EPM', test_path)

## 2A: Import and pre-process the raw data

#### Copy DLC output .h5 file and labled video -> EPM_Analysis\RawData

## CAUTION WARNING HEY LOOK HERE WE ARE USING VGAT12 DLC DATA WITH VGLUT20 NEURAL DATA FOR TESTING PURPOSES


In [14]:
# New way that we should be grabbing the data

#STEP 2A.1: LOAD DLC DATA
# Read in data for processing.  Needs Cell Traces, Odor List, and GPIO file.
dlc_path_stem = Path(*['EPM_Analysis','DLC_Data'])
dlc_path = test_path.joinpath(dlc_path_stem)

labeled_video_path = list(dlc_path.glob('*DLC*labeled*'))[0] # Usually mp4 files, but this is more flexible
points_path = list(dlc_path.glob('*.h5'))[0]

tracked_points = pd.read_hdf(points_path)  # Load tracked points
labeled_video = cv2.VideoCapture(str(labeled_video_path))  # Load Video

VIDEO_FPS = labeled_video.get(cv2.CAP_PROP_FPS)

In [24]:
#STEP 2A.2: LOAD INSCOPIX DATA

inscopix_path_stem = Path(*['InscopixProcessing', 'DataAnalysis'])  # Rename this to inscopix_path when done testing and delete the line below
inscopix_path = test_path.joinpath(inscopix_path_stem)

# We assume there is only one file that matches each query
cell_trace_path = list(inscopix_path.glob('*TRACES.csv'))[0]
GPIO_path = list(inscopix_path.glob('*GPIO.csv'))[0]
cell_props_path = list(inscopix_path.glob('*props.csv'))[0]
contours_path = list(inscopix_path.glob('*CONTOURS.json'))[0]
max_projection_path = list(inscopix_path.glob('*HD*MAX_PROJ.tiff'))[0]  # Match anything that includes HD and MAX_PROJ

cell_trace_data = pd.read_csv(cell_trace_path, engine='pyarrow')
GPIO_data = pd.read_csv(GPIO_path, header=None, engine='pyarrow')
all_cell_props = pd.read_csv(cell_props_path, header=0, engine='pyarrow')
cell_keys, cell_outlines = DewanJSON.get_outline_coordinates(contours_path)

In [None]:
# STEP 2A.2: PREPROCESSING

# STEP 2A.2.1: Drop the first row which contains all 'undecided' labels which is the Inscopix default label.
cell_trace_data.drop([0], inplace=True)

# STEP 2A.2.2: Force all dF/F values to be numbers and round times to 2 decimal places
cell_trace_data = cell_trace_data.apply(pd.to_numeric, errors='coerce')
# Set the times as the index so the listed data is all dF/F values
cell_trace_data[cell_trace_data.columns[0]] = cell_trace_data[cell_trace_data.columns[0]].round(2)
cell_trace_data.set_index(cell_trace_data.columns[0], inplace=True)

# STEP 2A.2.3: Remove spaces from column names
cell_trace_data.columns = [key.replace(' ', '') for key in cell_trace_data.columns.values]

# STEP 2A.2.4: REMOVE ALL MULTI-COMPONENT CELLS
# Generate a list of cell numbers based off the number of cells
cell_list = np.arange(len(all_cell_props['NumComponents'])) # Example Cell Numbers: 0, 1, 2, 3, 4
# Get indices where there are only one cell part
one_piece_cells = np.where(all_cell_props['NumComponents'] == 1)[0] # Example One-Component Indexes: 0, 1, 4
# Filter out all the multi-component cells, leaving only the one-piece cells
cell_list = cell_list[one_piece_cells] # Example Filtered Cell Numbers: 0, 1 ,4
cell_keys = cell_keys[one_piece_cells] # Example Filtered Cell Keys, C00, C01, C04
all_cell_props = all_cell_props.iloc[one_piece_cells] # Filter out two-piece cells as above

# STEP 2A.2.5: PARSE GPIO DATA
GPIO_data.iloc[:, 1] = GPIO_data.iloc[:, 1].str.replace(' ', '')  # Remove Random Spaces in Data
GPIO1 = np.array(GPIO_data.iloc[:, 1] == "GPIO-1")  # Get Sniff Sensor Data Truth Table
GPIO2 = np.array(GPIO_data.iloc[:, 1] == "GPIO-2")  # Get FV Actuation Data Truth Table
FV_data = np.array(GPIO_data.iloc[GPIO2,:]) # Create an array with FV values only

# STEP 2A.2.6: Make all numeric values floats and remove nullbytes

remove_null_bytes = lambda item: item.split('\x00')[0]
# For some reason the data will occasionally contain a very long string of null bytes '\\x00'
# this will remove everything after the null bytes,

# Iterate over each item and remove the nullbytes; simultaneously cast values to floats
FV_data[:, 0] = np.fromiter(map(remove_null_bytes, FV_data[:, 0]), 'float')
FV_data[:, 2] = np.fromiter(map(remove_null_bytes, FV_data[:, 2]), 'float')

In [61]:
# STEP 2A.3: PREPROCESSING DLC Data

cols = ['mouse_x', 'mouse_y', 'mouse_p', 'led_x', 'led_y', 'led_p'] 
# Reset the column names to something sensible
tracked_points.columns = cols 

### STEP 2B: Manual Curation

In [28]:
# STEP 2B.1: Load the Maximum Projection Image, draw the cell outlines and labels, and output labeled image
MaxProjectionImage = DewanManualCuration.generate_max_projection(max_projection_path, all_cell_props, cell_keys, cell_outlines, return_raw_image=False)
# generate_max_projection(ImagePath, AllCellProps, CellKeys, CellOutlines, return_raw_iamge, is_downsampled=False, downsample_factor=4, brightness=1.5, contrast=1.5, font_size=24, text_color='red', outline_color='yellow', outline_width=2):
# Optional configuration values that are set by default, change as desired
# Note: Set save_image=True to output a max projection with all cells detected by CNMFE regardless if they are good cells or not

# STEP 2B.2: Run ManualCuration GUI
good_cells = DewanManualCuration.manual_curation_gui(cell_list, cell_trace_data, MaxProjectionImage)
if good_cells is None:
    print('Error, no good cells selected!')

### STEP 2C: Apply Manual Curation Results and Additional Preprocessing

In [30]:
# STEP 2C.1: Filter all data by the GoodCells identified in ManualCuration
good_cell_props = all_cell_props.iloc[good_cells, :]
good_cell_props.reset_index()  # Resets index to 0 -> len(GoodCellProperties)
good_cell_list = cell_list[good_cells]
good_cell_keys = cell_keys[good_cells]
good_cell_trace_data = cell_trace_data.iloc[:, good_cells]


# STEP 2C.2: OUTPUT MAX PROJECTION IMAGE WITH CONTOURS OF GOOD CELLS
image = DewanManualCuration.generate_max_projection(max_projection_path, good_cell_props, good_cell_keys, cell_outlines,
                                                    return_raw_image=True)
# generate_max_projection(ImagePath, AllCellProps, CellKeys, CellOutlines, return_raw_image, brightness=1.5, contrast=1.5, font_size=24, text_color='red', outline_color='yellow', outline_width=2):
# Optional configuration values that are set by default, change as desired


### STEP 2D: Pickle and Save all preprocessed data

In [62]:
# Pickle the reorganized CellTraceData incase its needed later
# Saves Cell Traces, GPIO, Odor List, Sniff, FV data, Good Cell Properties, Good Cells, and the labeled max projection
# Once these have been saved, they don't need to be re-run on the same data again unless the data itself is changed

folder_stem = ['EPM_Analysis', 'PreProcessedData']
folder = test_path.joinpath(*folder_stem)
folder = folder.parts

DewanManualCuration.save_image(image, folder)
DewanIOhandler.save_data_to_disk(good_cell_trace_data, 'good_cell_trace_data', fileHeader, folder, True)
DewanIOhandler.save_data_to_disk(GPIO_data, 'GPIO_data', fileHeader, folder, True)
DewanIOhandler.save_data_to_disk(FV_data, 'FV_data', fileHeader, folder, False)
DewanIOhandler.save_data_to_disk(good_cell_props, 'good_cell_props', fileHeader, folder, True)
DewanIOhandler.save_data_to_disk(good_cell_list, 'good_cell_list', fileHeader, folder, False)

DewanIOhandler.save_data_to_disk(tracked_points, 'tracked_points', fileHeader, folder, True)

ANIMAL_GOES_HERE-DATE_GOES_HERE-good_cell_trace_data has been saved!
ANIMAL_GOES_HERE-DATE_GOES_HERE-GPIO_data has been saved!
ANIMAL_GOES_HERE-DATE_GOES_HERE-FV_data has been saved!
ANIMAL_GOES_HERE-DATE_GOES_HERE-good_cell_props has been saved!
ANIMAL_GOES_HERE-DATE_GOES_HERE-good_cell_list has been saved!
ANIMAL_GOES_HERE-DATE_GOES_HERE-tracked_points has been saved!


In [25]:
# Opens the saved pickle files.  If the files have already been saved, code can be re-run
# starting from this point

folder_stem = ['EPM_Analysis', 'PreProcessedData']
folder = test_path.joinpath(*folder_stem)
folder = folder.parts

good_cell_trace_data = DewanIOhandler.load_data_from_disk('good_cell_trace_data', fileHeader, folder, True)
GPIO_data = DewanIOhandler.load_data_from_disk('GPIO_data', fileHeader, folder, True)
FV_data = DewanIOhandler.load_data_from_disk('FV_data', fileHeader, folder, False)
good_cell_props = DewanIOhandler.load_data_from_disk('good_cell_props', fileHeader, folder, True)
good_cell_list = DewanIOhandler.load_data_from_disk('good_cell_list', fileHeader, folder, False)
tracked_points = DewanIOhandler.load_data_from_disk('tracked_points', fileHeader, folder, False)


ANIMAL_GOES_HERE-DATE_GOES_HERE-good_cell_trace_data has loaded successfully!
ANIMAL_GOES_HERE-DATE_GOES_HERE-GPIO_data has loaded successfully!
ANIMAL_GOES_HERE-DATE_GOES_HERE-FV_data has loaded successfully!
ANIMAL_GOES_HERE-DATE_GOES_HERE-good_cell_props has loaded successfully!
ANIMAL_GOES_HERE-DATE_GOES_HERE-good_cell_list has loaded successfully!
ANIMAL_GOES_HERE-DATE_GOES_HERE-tracked_points has loaded successfully!


In [None]:
# There may be an instance where the model erroneously identified the LED for very short time periods
# find_led_start bins the possible LED on times (anywhere led_p > 0.98)
# We then find the bin with the largest size, which means it has the most frames where the LED is identified
# This is most likely the period where the experimenter turned on the LED
led_bins = np.array(DewanEPM.find_led_start(tracked_points))
true_led_bin = np.argmax(np.subtract(led_bins[:, 1], led_bins[:,0]))

led_on = led_bins[true_led_bin][0] # Find first row where the LED is 'on'
good_points = tracked_points.iloc[led_on:-1] # Delete all data before the LED is 'on'
good_points.reset_index(drop=True, inplace=True) # Reset the index

# Get X, Y coordinates, cast to int, and combine them into tuples
head_x = good_points['mouse_x'].astype(int)
head_y = good_points['mouse_y'].astype(int)
coordinates = list(zip(head_x, head_y))

In [None]:

_ = labeled_video.set(cv2.CAP_PROP_POS_FRAMES, led_on)  # Set the first frame to the first LED frame
_, frame = labeled_video.read() # Read said frame

for pair in coordinates:  # Loop through each coordinate and draw a red point
    cv2.circle(frame, pair, 0, (0,0,255), -1)

# _ = cv2.imwrite(str(image_path), frame) # Save image