# Set up
This section should be interacted with. Use `data_path` to designate the folder where the OpenBARR data is. This folder needs to be formatted beforehand - take a look at the **README** or see `/sample_data/original_data` for an example. Change `output_path` to a folder where you want the graphs and the data used to make them.

Here, you will also specify the experimental conditions in `conditions` and days in `days`. This will help the program navigate the files and batch process.


In [1]:
# connect to google drive
from google.colab import drive
drive.mount('/content/drive')

# import packages we need for analysis and graphing
import os
import numpy as np
import pandas as pd

# specify where the data is - this should be changed
data_path = '/content/drive/MyDrive/0 Revamp/sample_data/original_data'
output_path = '/content/drive/MyDrive/0 Revamp/sample_data/output'

# specify experimental conditions - this should be changed
conditions = ['3EtOH', '25EtOH', '50EtOH', '75EtOH']
days = ['D1', 'D2', 'D3']

Mounted at /content/drive


# Preprocess the data
We format the OpenBARR data to be more understandable and remove the first second of data in case of initial mistracking.

In [10]:
def read_raw(file_path):
  """Read an OpenBARR file.

  Reads a tab-delimited OpenBARR file and specifies the column names.

  Parameters
  ----------
  file_path : str
    The path to the OpenBARR file.

  Returns
  -------
  pandas.DataFrame
    A DataFrame containing the data from the OpenBARR file.
  """
  return pd.read_csv(file_path, sep='\t', header=None,
                     names=['time', 'x', 'y', 'in', 'entry', 'exit'])

def adjust_y(df):
  """Adjust the y-coordinates of an OpenBARR dataframe.

  Adjusts the y-coordinates of an OpenBARR dataframe relative to the border of
  ROSA and RONSA. Where y >= 0 indicates how far animal is in ROSA while y < 0
  indicate how far animal is in RONSA. This adjustment makes the data more
  intuitive and easier to analyze.

  Parameters
  ----------
  df : pandas.DataFrame
    A DataFrame containing the data from the OpenBARR file.

  Notes
  -----
  This works under the assumption, in short, that the OpenBARR is set up in the
  exact same way. Common problems could be flipped ROSA/RONSA causing RONSA to
  be >= 0 and ROSA <0 and camera height differences could change the arbitrary
  coordinates, affecting the adjustment for a fly never entering ROSA.
  """
  border = df[df['entry'] == 1]['y']  # get y-coords when fly initially enters ROSA
  if len(border) > 0:  # if fly was ever in ROSA
    df['y'] = max(border) - df['y']  # the adjustment
  else:  # if the fly never entered ROSA,
    df['y'] = 235 - df['y']  # use upper-bound estimate - see notes

def preprocess(file_path):
  """Preprocess an OpenBARR file.

  Reads an OpenBARR file, removes the first second of data, and adjusts the
  y-coordinates relative to the border of ROSA and RONSA.

  Parameters
  ----------
  file_path : str
    The path to the OpenBARR file.

  Returns
  -------
  pandas.DataFrame
    A DataFrame containing the preprocessed data from the OpenBARR file.
  """
  df = read_raw(file_path)
  df = df[df['time'] >= 1]
  adjust_y(df)
  return df

In [11]:
test_path = os.path.join(data_path, '3EtOH/D1/JH-20-1FE3_D1.txt')
preprocess(test_path)

Unnamed: 0,time,x,y,in,entry,exit
29,1.021689,30,-172,0,0,0
30,1.053995,30,-170,0,0,0
31,1.087044,29,-170,0,0,0
32,1.121439,29,-172,0,0,0
33,1.162935,29,-173,0,0,0
...,...,...,...,...,...,...
26847,899.865315,41,-140,0,0,0
26848,899.897654,42,-137,0,0,0
26849,899.929413,42,-136,0,0,0
26850,899.961259,41,-134,0,0,0
