## Thresholding data to remove outlier frames

#### The purpose of the script is to use data from step 3 to:
- Create 3D numpy data tables
- Split movement data and experimental details

by: Dr. Adrian Green, *Fall 2021*

In [1]:
# import libraries
import numpy as np
import pandas as pd
import sys
import os
import math

from Green_scripts_v1 import zf_behavior_KStest

### Load data

In [2]:
# load all location data for processing
basedirname = '/home2/ajgreen4/ZF_Projects/HTS_Behavior_Project'
data_path = basedirname + '/results/'

# Loading the dataset 
all_movement = pd.read_csv(data_path + "5dpf_2nd_cycle_movement-etho_data_Step3_threshold_output_cleaned.csv")

# change units from mm to µm
all_movement['Distance moved'] = all_movement['Distance moved']*1000

# load all location data for processing
pcwKey = pd.read_csv(basedirname + "/Raw_Data_Files/plateTable.csv")

### Create data tables for autoencoder training

In [3]:
larvae = all_movement.loc[(all_movement['Trial time'] == 720)].count()[0]
wt_larvae = all_movement.loc[(all_movement['ClassID'] == 0) & 
                             (all_movement['Trial time'] == 720)].count()[0]
tx_larvae = all_movement.loc[(all_movement['ClassID'] == 2) & 
                              (all_movement['Trial time'] == 720)].count()[0]

print('Total number of larvae: ', larvae)
print('Total number of WT larvae: ', wt_larvae)
print('Total number of TX larvae: ', tx_larvae)

Total number of larvae:  3425
Total number of WT larvae:  706
Total number of TX larvae:  2719


In [4]:
print('Number of Chemicals: ', len(all_movement['ChemID'].unique()), '\n')
print('All treatments (Chemicals + Doses): ', len(zf_behavior_KStest(all_movement, 3)), '\n')

Number of Chemicals:  30 

All treatments (Chemicals + Doses):  103 



In [5]:
%%time
test = all_movement.copy()
test = test.fillna(method='pad')

# count larvae in dataset to create zero array
larvae = 0
for plate in test['PlateID'].unique():
    for chem in test.loc[(test['PlateID'] == plate)].loc[:,'ChemID'].unique():
        for well in test.loc[(test['PlateID'] == plate) & 
                                 (test['ChemID'] == chem)].loc[:,'Well'].unique():
            larvae += 1
arr_size = test.loc[(test['PlateID'] == plate) & (test['ChemID'] == chem) & 
                        (test['Well'] == well)].loc[:,'Trial time':'Velocity'].shape
X = np.zeros([larvae, arr_size[0], arr_size[1]])
y = np.zeros(larvae)
z = pd.DataFrame([], columns=['PlateID', 'ChemID', 'Well', 'Conc']) 

# fill the array with individual zf movement data (n, 9751, 6)
larvae = 0
for plate in test['PlateID'].unique():
    for chem in test.loc[(test['PlateID'] == plate)].loc[:,'ChemID'].unique():
        for conc in test.loc[(test['PlateID'] == plate) & (test['ChemID'] == chem)].loc[:,'Conc'].unique():
            for well in test.loc[(test['PlateID'] == plate) & (test['ChemID'] == chem) & 
                                 (test['Conc'] == conc)].loc[:,'Well'].unique():
                zf_movement = test.loc[(test['PlateID'] == plate) & (test['ChemID'] == chem) & 
                                       (test['Well'] == well) & 
                                       (test['Conc'] == conc)].loc[:,'Trial time':'ClassID']
                y[larvae] = zf_movement['ClassID'].unique()
                X[larvae] = zf_movement.drop(['ClassID'], axis=1).to_numpy()
                z = z.append(test.loc[(test['PlateID'] == plate) & (test['ChemID'] == chem) & 
                                            (test['Well'] == well) & 
                                            (test['Conc'] == conc)].head(1).loc[:,'PlateID':'Conc'])
                larvae += 1

CPU times: user 3h 10min 10s, sys: 24 s, total: 3h 10min 34s
Wall time: 3h 3s


In [6]:
# Save proccessed data to numpy file - training
np.save('/home2/ajgreen4/ZF_Projects/HTS_Behavior_Project/results/5dpf_2nd_cycle_movement_X_data.npy',X)
np.save('/home2/ajgreen4/ZF_Projects/HTS_Behavior_Project/results/5dpf_2nd_cycle_movement_y_data.npy',y)
z.to_csv('/home2/ajgreen4/ZF_Projects/HTS_Behavior_Project/results/5dpf_2nd_cycle_movement_z_data.csv', index=False)