## Distance moved analysis 

#### Experimental design : 6 minutes acclimation then 18 minutes of light/dark cycling (3 x 2 x 3 min)

#### The purpose of the script is to create a single 5dpf movement dataset with all timepoints in the second light cycle using QC'd data from step 1.

#### Standardize all location data relative to center of well and forward fill (pad) missing data

by: Dr. Adrian Green, *Fall 2021*

In [1]:
# import libraries

import numpy as np
import pandas as pd
import openpyxl
import os
import xlrd 
import sys

from Green_scripts_v1 import save_obj, load_obj

# np.set_printoptions(threshold=sys.maxsize)
# pd.set_option('display.max_colwidth', -1)
basedirname = '/home2/ajgreen4/ZF_Projects/HTS_Behavior_Project'

In [2]:
# output location
dataPath = "/home2/ajgreen4/ZF_Projects/HTS_Behavior_Project/results/"

# Noldus raw data file location
rawDataPath = "/home2/ajgreen4/ZF_Projects/HTS_Behavior_Project/QC_Data_Files/OSU_all/"

newlist = os.listdir(rawDataPath)
mylist = []
for names in newlist:
    if names.endswith(".pkl"):
        mylist.append(names)
print("Number of raw data files: ",len(mylist))

# Read in well to arena key
waKey = pd.read_csv(basedirname + "/Raw_Data_Files/well-arena.csv")
[waNumRows, waNumCols] = waKey.shape

# Read in plate to chemical to well key
pcwKey = pd.read_csv(basedirname + "/Raw_Data_Files/plateTable.csv")
[pcwNumRows, pcwNumCols] = pcwKey.shape

wellSize = 54

Number of raw data files:  52


### Code to read in one ethovision raw export file at a time, standardizes, pads, and filters to second light cycle.

Second light cycle includes 720s to 1080s

In [3]:
%%time
# make output folder
try: 
    os.chdir(dataPath) 
# Caching the exception     
except: 
    os.makedirs(dataPath)   

# Initialize output variable
all_movement= pd.DataFrame([], 
                           columns=['PlateID', 'ChemID', 'Well', 'Conc', 
                                    'Trial time', 'X center', 'Y center',
                                    'Distance moved', 'Velocity', 'ClassID'])  
# read in data files
for file in mylist:
    # for an earlier version of Excel, you may need to use the file extension of 'xls'
    noldusDataFile = load_obj(rawDataPath, os.path.splitext(file)[0])
    
    try:
        for key, sheet in noldusDataFile.items():
            locData = pd.DataFrame(sheet)
            headerLines = int(locData.loc[0, 1])
            [numRows, numCols] = locData.shape

            # Determine center of well using all x,y data
            xCoords = locData.iloc[headerLines:numRows, [2]].to_numpy()
            yCoords = locData.iloc[headerLines:numRows, [3]].to_numpy()
            xCoords = xCoords[xCoords != '-']
            yCoords = yCoords[yCoords != '-']

            # Find midpoint and normalize
            heatmap, xedges, yedges = np.histogram2d(xCoords, yCoords, bins=(wellSize, wellSize), density=False)
            xmid = np.abs(xedges[0]+(xedges[-1] - xedges[0])/2)
            ymid = np.abs(yedges[0]+(yedges[-1] - yedges[0])/2)

            # remove header lines
            locSubset = locData.loc[headerLines:numRows,:]
            # add column names
            locSubset.columns = locData.loc[headerLines-2,:]
            # delete unneeded columns
            del locSubset['Recording time']
            # # select first light cycle
            locSubset = locSubset.loc[(locSubset['Trial time'] < 1080)]
            # set missing distance and velocity data to 0
            locSubset[locSubset.loc[:,['Distance moved','Velocity']] == '-'] = 0
            # set missing x,y data to nan
            locSubset[locSubset.loc[:,] == '-'] = np.nan
            # forward will missing x,y data
            locSubset = locSubset.fillna(method='pad')
            # standardize x,y data relative to center of the well
            locSubset.loc[:, ['X center', 'Y center']] = locSubset.loc[:, ['X center', 'Y center']].abs()
            locSubset.loc[:,'X center'] = locSubset.loc[:,'X center'] - xmid
            locSubset.loc[:,'Y center'] = locSubset.loc[:,'Y center'] - ymid
            # select first light cycle
            locSubset = locSubset.loc[(locSubset['Trial time'] >= 720) & 
                                      (locSubset['Trial time'] < 1080)]
            # retrieve info about chemical exposure from header lines
            plateID = ""
            chemID = ""
            well = ""
            typeID = ""
            # plate info can be found in either the file name or in the video name
            try:
                plateID = int(os.path.splitext(file)[0][0:5])
            except:  
                try:
                    end = len(os.path.splitext(file)[0])
                    plateID = int(os.path.splitext(file)[0][end-5:end])
                    if len(str(plateID)) != 5:
                        raise
                except:
                    aviName = locData.loc[(locData[0] == 'Video file')].iloc[0,1]
                    end = len(aviName)-4
                    plateID = int(aviName[end-5:end])
            arenaName = int(locData.loc[(locData[0] == 'Arena name')].iloc[0,1])
            well = waKey.loc[(waKey['arena']==arenaName)].iloc[0,0]
            try:
                chemID = pcwKey.loc[(pcwKey['PlateID']==plateID) & (pcwKey['Well']==well)].iloc[0,1]
                conc = pcwKey.loc[(pcwKey['PlateID']==plateID) & (pcwKey['Well']==well)].iloc[0,3]
                # assign class
                if conc == 0:
                    typeID = "Control"
                    classID = 0
                else:
                    typeID = 'Treated'
                    classID = 2
#                 print("Plate:", plateID, " Chem:", chemID, " Well:", well)
        #         if typeID == "Control":
        #             dirName = dataPath + str(chemID) + "-" + typeID + "/"    
        #         else:
        #             dirName = dataPath + str(chemID) + "/"   
                # create new DataFrame with larvae ID variables and sort the columns
                zf_movement = locSubset
                zf_movement = zf_movement.assign(PlateID=plateID,
                                                 ChemID = chemID, 
                                                 Well = well, 
                                                 Conc = conc,
                                                 ClassID = classID)
                zf_movement = zf_movement[['PlateID', 'ChemID', 'Well', 'Conc', 
                                            'Trial time', 'X center', 'Y center',
                                            'Distance moved', 'Velocity', 'ClassID']] 
                # add individual larvae to single output file
                all_movement = all_movement.append(zf_movement)
            except:
                print("Error in Plate:", plateID, " Chem:", chemID, " Well:", well)
                traceback.print_tb(e.__traceback__)
        # save current data to disk
        all_movement.reset_index(drop=True, inplace=True)
        save_obj(all_movement, dataPath, "5dpf_2nd_cycle_movement-etho_data_Step2_standardize_analyze_output")
        print('File : ', file, ' - complete and witten to disk.')
    except Exception as e:
        print('Error in : ', file)
        traceback.print_tb(e.__traceback__)
        break

File saved:  /home2/ajgreen4/ZF_Projects/HTS_Behavior_Project/results/ 5dpf_2nd_cycle_movement-etho_data_Step2_standardize_analyze_output .pkl
File :  Raw data-21532.pkl  - complete and witten to disk.
File saved:  /home2/ajgreen4/ZF_Projects/HTS_Behavior_Project/results/ 5dpf_2nd_cycle_movement-etho_data_Step2_standardize_analyze_output .pkl
File :  Raw data-21534.pkl  - complete and witten to disk.
File saved:  /home2/ajgreen4/ZF_Projects/HTS_Behavior_Project/results/ 5dpf_2nd_cycle_movement-etho_data_Step2_standardize_analyze_output .pkl
File :  Raw data-21542.pkl  - complete and witten to disk.
File saved:  /home2/ajgreen4/ZF_Projects/HTS_Behavior_Project/results/ 5dpf_2nd_cycle_movement-etho_data_Step2_standardize_analyze_output .pkl
File :  Raw data-21543.pkl  - complete and witten to disk.
File saved:  /home2/ajgreen4/ZF_Projects/HTS_Behavior_Project/results/ 5dpf_2nd_cycle_movement-etho_data_Step2_standardize_analyze_output .pkl
File :  Raw data-21546.pkl  - complete and witten

In [4]:
all_movement.to_csv(dataPath + "5dpf_2nd_cycle_movement-etho_data_Step2_standardize_analyze_output.csv", index=False)