In [1]:
import pandas as pd, numpy as np,xarray as xr
from pathlib import Path
import re, yaml, copy

In [2]:
base = Path("/home/julienb/Documents/database_scripts/database_scripts_test/poly_dat_files/Rats/Luisa/Rat101_0729_opto_01")
dat_path = base.with_suffix(".dat")
task_path = base.with_suffix(".xls")
info_path = base.with_suffix(".yaml")
exists = {"dat_path":dat_path.exists(), "task_path":task_path.exists(), "info_path": info_path.exists()}
if not np.all(list(exists.values())):
    display(exists)
    raise Exception("Missing some input files...")

In [3]:
event_df = pd.read_csv(dat_path, sep="\t", names=['time (ms)', 'family', 'nbre', '_P', '_V', '_L', '_R', '_T', '_W', '_X', '_Y', '_Z'], skiprows=13, dtype=int)
event_df.insert(0, "t", event_df.pop("time (ms)")/1000)
event_df = event_df.sort_values("t").reset_index(drop=True).reset_index(names="poly_evnum")
print(event_df.to_string())

      poly_evnum         t  family  nbre  _P  _V  _L  _R   _T   _W   _X   _Y  _Z
0              0     0.006      10     1   1   1   0   0    1    0    0    0   0
1              1     0.023      10     1   1   1   0   0   36    0    0    0   0
2              2     0.023       6    22   1   1   1   0    2    1    1    1   0
3              3     0.118      10     1   1   1   0   0   38    0    0    0   0
4              4     0.128       1     2   0   0   0   0    0    0    0    0   0
5              5     0.128       1     3   0   0   0   0    0    0    0    0   0
6              6     0.128       1     2   0   0   0   0    0    0    0    0   0
7              7     0.128       1     4   0   0   0   0    0    0    0    0   0
8              8     0.128       1     4   0   0   0   0    0    0    0    0   0
9              9     0.128      10     1   1   1   0   0    3    0    0    0   0
10            10     0.128      10     1   1   1   0   0    2    0    0    0   0
11            11     0.128  

In [4]:
task_df = pd.read_csv(task_path, sep="\t", header=11)
task_df = task_df.rename(columns={task_df.columns[0]: "state_num" })
display(task_df.columns)
task_df

Index(['state_num', 'T1', 'T2', 'T3', 'T4', 'LED1(1,2)', 'LED2(1,3)',
       'LED3(1,4)', 'L1(2,1)', 'L2(2,2)', 'LK1(5,1)', 'LK2(5,2)', 'RD(6,11)',
       'ASND(6,20)', 'PAD(6,22)', 'STR1(13,1)', 'STR2(13,2)', 'TTLP1(15,1)',
       'TTLP2(15,2)', 'TTLP3(15,3)', 'TTLP4(15,4)', 'TTLP5(15,5)',
       'TTLP6(15,6)', 'TTLP7(15,7)', 'TTLP8(15,8)', 'NEXT1', 'NEXT2', 'NEXT3',
       'CT1', 'CT2'],
      dtype='object')

Unnamed: 0,state_num,T1,T2,T3,T4,"LED1(1,2)","LED2(1,3)","LED3(1,4)","L1(2,1)","L2(2,2)",...,"TTLP4(15,4)","TTLP5(15,5)","TTLP6(15,6)","TTLP7(15,7)","TTLP8(15,8)",NEXT1,NEXT2,NEXT3,CT1,CT2
0,1,10,,,,,,,,,...,,,,,,!ti(36),,,,
1,2,_essais,,,,!on,!on,!on,,,...,,,,,,PAD_L=1,L1_L=1(28),L2_L=1(26),,
2,3,70,,,,!on,!on,!on,,,...,,,,,,PAD_L=0(2),!ti,,,
3,4,100,,,,,,on,,,...,,,,,,PAD_L=0(2),!ti,,,
4,5,400-1400,,,,,,on,,,...,,,,,,PAD_L=0(2),!ti,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61,62,,,,,,,,,,...,,,,,,,,,,
62,63,,,,,,,,,,...,,,,,,,,,,
63,64,,,,,,,,,,...,,,,,,,,,,
64,65,,,,,,,,,,...,,,,,,,,,,


In [5]:
info = yaml.safe_load(info_path.open("r"))
info

[{'event_name': '{channel_name}',
  'filter': {'channel_name': ['LED(\\d+)', 'ASND', 'TTLP(\\d+)']},
  'state_value': '_V',
  'start_condition': '_V==1',
  'meta': {'count': '_L'}},
 {'event_name': '{channel_name}',
  'filter': {'channel_name': 'L(\\d+)'},
  'start_condition': '_V==1',
  'state_value': '_V'},
 {'event_name': '{channel_name}_lick',
  'filter': {'channel_name': 'LK(\\d+)'},
  'start_condition': '_V==1',
  'state_value': '_V'},
 {'event_name': '{channel_name}_reward',
  'filter': {'channel_name': 'LK(\\d+)'},
  'start_condition': '_P==1',
  'state_value': '_P'},
 {'event_name': 'PAD_V',
  'filter': {'channel_name': 'PAD'},
  'start_condition': '_V==0',
  'state_value': '_V'},
 {'event_name': 'PAD_P',
  'filter': {'channel_name': 'PAD'},
  'start_condition': '_P==0',
  'state_value': '_P'},
 {'event_name': 'poly_linenum_change',
  'filter': {'family': 10},
  'state_value': '_T'},
 {'event_name': 'file_read',
  'filter': {'channel_name': 'RD'},
  'meta': {'read_value': '_T'

In [7]:


channels = pd.Series(task_df.columns).str.extract(r'\s*(?P<channel_name>\w+)\s*\((?P<family>\d+)\s*,\s*(?P<nbre>\d+)\)\s*').dropna(how="all")
channels["family"] = channels["family"].astype(int)
channels["nbre"] = channels["nbre"].astype(int)
channels

    

Unnamed: 0,channel_name,family,nbre
5,LED1,1,2
6,LED2,1,3
7,LED3,1,4
8,L1,2,1
9,L2,2,2
10,LK1,5,1
11,LK2,5,2
12,RD,6,11
13,ASND,6,20
14,PAD,6,22


In [8]:
event_channels_df = channels.merge(event_df, on=["family", "nbre"], how="right").sort_values("t")
event_channels_df

Unnamed: 0,channel_name,family,nbre,poly_evnum,t,_P,_V,_L,_R,_T,_W,_X,_Y,_Z
0,,10,1,0,0.006,1,1,0,0,1,0,0,0,0
1,,10,1,1,0.023,1,1,0,0,36,0,0,0,0
2,PAD,6,22,2,0.023,1,1,1,0,2,1,1,1,0
3,,10,1,3,0.118,1,1,0,0,38,0,0,0,0
11,LED2,1,3,11,0.128,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5745,PAD,6,22,5745,1175.145,1,1,1,0,895,843,493,402,2
5746,LK1,5,1,5746,1175.902,1,1,2,1,62,824,0,763,61
5747,LK1,5,1,5747,1175.902,1,1,2,1,62,824,0,763,61
5748,LK1,5,1,5748,1176.052,0,1,2,0,62,825,0,763,62


In [14]:
event_channels_df.eval('channel_name=="LED2"')

0       False
1       False
2       False
3       False
11       True
        ...  
5745    False
5746    False
5747    False
5748    False
5749    False
Name: channel_name, Length: 5750, dtype: bool

In [9]:
handle_dict = {}
for item in info:
  if isinstance(item["filter"], list):
    raise Exception("filter list not handled yet")
  event_channels_df["kept"] = True
  for col, val in item["filter"].items():
    if not isinstance(val ,list):
       val = [val]
    event_channels_df["tmp_kept"] = False
    for v in val:
      if col=="__expr":
          event_channels_df["tmp_kept"] = event_channels_df["tmp_kept"] | event_channels_df.eval(v)
      elif col in event_channels_df.columns:
        if col =="channel_name":
          event_channels_df["tmp_kept"] = event_channels_df["tmp_kept"] | event_channels_df["channel_name"].str.fullmatch(v)
        else:
          event_channels_df["tmp_kept"] = event_channels_df["tmp_kept"] | (event_channels_df[col] == v)
    event_channels_df["kept"] = event_channels_df["kept"] & event_channels_df["tmp_kept"]
    event_channels_df = event_channels_df.drop(columns="tmp_kept")
  filtered = event_channels_df.loc[event_channels_df["kept"]].copy()
  event_channels_df = event_channels_df.drop(columns="kept")
  filtered["event_name"] = filtered.apply(lambda row: item["event_name"].format(**row.to_dict()), axis=1)
  new_names = filtered["event_name"].drop_duplicates().to_list()
  for name in new_names:
     if name in handle_dict:
        raise Exception(f'Each item must add different event_names... There is overlap for name {name}')
     handle_dict[name] = dict(info_item=item, ev_df = filtered.loc[filtered["event_name"] == name].drop(columns=["kept"]))
    #  display(filtered.loc[filtered["event_name"] == name])
captured_evnums = set([k for d in handle_dict.values() for k in d["ev_df"]["poly_evnum"]])
unused_evs = set(event_channels_df["poly_evnum"]) - captured_evnums
if len(unused_evs) > 0:
  display(event_channels_df.loc[event_channels_df["poly_evnum"].isin(unused_evs)])
  raise Exception("Some lines are not handled...")
list(handle_dict.keys())

['LED2',
 'LED3',
 'LED1',
 'TTLP1',
 'TTLP3',
 'TTLP2',
 'ASND',
 'TTLP5',
 'TTLP7',
 'TTLP6',
 'TTLP8',
 'TTLP4',
 'L1',
 'LK1_lick',
 'LK1_reward',
 'PAD_V',
 'PAD_P',
 'poly_linenum_change',
 'zone_change',
 'stop']

In [10]:
all=[]
for ev_name, d in handle_dict.items():
    df: pd.DataFrame = d["ev_df"].sort_values("t")
    info_item = d["info_item"]
    if "discarded" in info_item and info_item["discarded"]:
        continue
    if "meta" in info_item:
        r = pd.DataFrame()
        for k, v in info_item["meta"].items():
            r[k] = df.eval(v)
        df["meta"] = r.apply(lambda row: row.to_dict(), axis=1)
    else: 
        df["meta"] = None
    if "state_value" in info_item:
        df["state_value"] = df.eval(info_item["state_value"])
        df["state_grp"] =( df["state_value"] != df["state_value"].shift(1)).cumsum()
        df["next_t"] = df["t"].shift(-1)
        df["next_meta"] = df["meta"].shift(-1)
        def state_grp_agg(d: pd.DataFrame):
            meta = d["next_meta"].iat[0]
            ret = dict(start=d["t"].iat[0], 
                        duration=d["next_t"].iat[-1]-d["t"].iat[0], 
                        meta = meta)
            return pd.Series(d.iloc[0, :].to_dict() | ret)
        df = df.groupby("state_grp").apply(state_grp_agg, include_groups=False)
    else:
        df["start"] = df["t"]
        df["duration"] = np.nan
        df["state_value"] = np.nan
        df["meta"]= None
    if "start_condition" in info_item:
        df=df.loc[df.eval(info_item["start_condition"])]
    all.append(df[["event_name", "start", "duration", "state_value", "meta"]])
    
all = pd.concat(all).sort_values("start")
display(all)
# print(all.loc[all["event_name"] =="LK1_reward"].to_string())
# print(all.loc[all["state_value"] ==12].to_string())

Unnamed: 0,event_name,start,duration,state_value,meta
1,poly_linenum_change,0.006,0.017,1.0,
2,poly_linenum_change,0.023,0.095,36.0,
3,poly_linenum_change,0.118,0.010,38.0,
4,poly_linenum_change,0.128,0.000,2.0,
5,poly_linenum_change,0.128,0.070,3.0,
...,...,...,...,...,...
982,PAD_P,1174.729,0.028,0.0,
802,PAD_V,1174.922,0.018,0.0,
984,PAD_P,1175.115,0.030,0.0,
223,LK1_lick,1175.902,,1.0,


In [11]:

event_items_df = event_channels_df[["channel_name", "family", "nbre"]].drop_duplicates().apply(lambda row: pd.Series(dict(ev1=1, ev2=2)), axis=1)
event_items_df


Unnamed: 0,ev1,ev2
0,1,2
2,1,2
11,1,2
8,1,2
6,1,2
23,1,2
22,1,2
21,1,2
20,1,2
30,1,2
