In [1]:
import os
import numpy as np
import pandas as pd

import trompy as tp

# the next two lines will expand the printed tables
# pd.options.display.max_rows= 40000
# pd.options.display.max_columns= 40000

In [2]:
DATA_FOLDER = "..\\data"

def tweak_fed(df):
    return (df
             .rename(columns={"MM:DD:YYYY hh:mm:ss": "t",
                             "InterPelletInterval": "ipi"})
             .assign(t=lambda df_: pd.to_datetime(df_.t))
             .loc[:, ["t", "Event", "FR", "Device_Number", "ipi", "Poke_Time"]]
           )

df = pd.read_csv(os.path.join(DATA_FOLDER, "FED001_041723_00.CSV"))
df1 = tweak_fed(df)

df = pd.read_csv(os.path.join(DATA_FOLDER, "FED002_041723_00.CSV"))
df2 = tweak_fed(df)

In [3]:
print(df1.head(3))
print(df2.head(3))

                    t   Event  FR  Device_Number  ipi  Poke_Time
0 2023-04-17 11:18:10   Right   1              1  NaN      12.78
1 2023-04-17 11:19:11    Left   1              1  NaN       0.25
2 2023-04-17 11:21:57  Pellet   1              1  NaN        NaN
                    t  Event  FR  Device_Number  ipi  Poke_Time
0 2023-04-17 11:18:20  Right   1              2  NaN      12.59
1 2023-04-17 11:25:38  Right   1              2  NaN       0.00
2 2023-04-17 11:26:38   Left   1              2  NaN       0.25


In [4]:
def sync_feds(df1, df2):
    
    offset = df2.query("Poke_Time > 10").t - df1.query("Poke_Time > 10").t
    offset = pd.Timedelta(offset.iloc[0])
    
    
    df2 = df2.assign(t=df2.t.sub(offset))
    
    return df1, df2, offset

In [None]:
METAFILE = "..\\FEDXD_METAFILE.xls"
rows, header = tp.metafilereader(METAFILE, sheetname="METAFILE_SE")

def prep4tidy(row):
    mouseid, sex, batch, fedfile, choice_session, fed_pellet, mode, diet = row
    
    df = tweak_fed(pd.read_csv(os.path.join(DATA_FOLDER, fedfile)))
    
    return (df
            .assign(mouseid = mouseid,
                    diet = diet,
                    sex = sex,
                    mode = mode,
                    choice_session = choice_session,
                    batch = int(batch),
                    fed_pellet = fed_pellet
                    )
           )

list_of_dfs = []
for row in rows:
    list_of_dfs.append(prep4tidy(row))
    
feddata = pd.concat(list_of_dfs)

File is excel file. Making csv metafile first


In [None]:
mice = feddata.mouseid.unique()

offsets, failed_keys, synced_dfs = [], [], []

for mouse in mice:
    for session in ["ONE", "TWO", "THREE", "FOUR"]:
        temp_df = feddata.query("mouseid == @mouse & choice_session == @session")

        devices = temp_df.Device_Number.unique()

        try:
            fed1 = temp_df.query("Device_Number == @devices[0]")
            fed2 = temp_df.query("Device_Number == @devices[1]")

            df1, df2, offset = sync_feds(fed1, fed2)
            synced_dfs.append(df1)
            synced_dfs.append(df2)
            offsets.append(offset.total_seconds())
        except IndexError:
            print(f"Cannot sync for {mouse} in choice session {session}")
            failed_keys.append((mouse, session))

feddata_synced = pd.concat(synced_dfs)

offsets = [0 if np.isnan(i) else i for i in offsets]
print(failed_keys)
print(offsets)

In [None]:
# shows that FEDs are normally only out of sync by 40 seconds or less
# need to inspect nans to work out what is happening there
import seaborn as sns
sns.kdeplot(offsets)

In [None]:
# saves dataframe as feather file
feddata_synced.reset_index().to_feather("..\\processeddata\\feddata_synced.feather")  