In [4]:
from pathlib import Path
import pandas as pd
from functools import reduce
import numpy as np
from datetime import datetime
import secrets

In [5]:
data_dir = Path(".").absolute().parent / "data"

In [6]:
group_names = ["A", "B", "C", "D"]
behaviours = [
    "drink",
    "eat",
    "eathand",
    "groom",
    "hang",
    "rear",
    "rest",
    "sniff",
    "walk"
]

In [7]:
def load_data(data_dir, group_name, behaviour):
    return (
        pd.read_csv(data_dir / "raw" / f"FC_{group_name}_{behaviour}.csv")
        .rename(columns={"Unnamed: 0": "mouse_id"})
    )

def tidy_data(df, group_name, behaviour):
    return (
        df
        .replace("No video", np.nan)
        .melt(id_vars="mouse_id", var_name="time", value_name=behaviour)
        .assign(
            month= lambda x: x.time.str.extract("(.{2})M"),
            day= lambda x: x.time.str.extract("(.{2})D"),
            hour= lambda x: x.time.str.extract("(.{2})h"),
            group= group_name,
            dt = lambda x: x.apply(lambda r: datetime(year=2018, month=int(r.month), day=int(r.day), hour=int(r.hour)),
                                   axis=1)
        )
        .drop("time", axis=1)
    )

def get_mid_mapper(mids):
    return {original: secrets.token_hex(15) for original in mids}

In [12]:
group = "A"
behaviour = "drink"

frames = []
mappers = []
for group in group_names:
    gframes = []
    for behaviour in behaviours:
        df = load_data(data_dir, group, behaviour)
        df = tidy_data(df, group, behaviour)
        gframes.append(df)
    
    df = reduce(lambda x, y: pd.merge(x, y, how="outer"), gframes)
    mapper = get_mid_mapper(df.mouse_id.unique())
    df = df.assign(mouse_id = lambda x: x.mouse_id.map(mapper))
    frames.append(df)
    mappers.append(mapper)
df = (
    pd.concat(frames)
    [["mouse_id", "group", "month", "day", "hour", "drink", "eat", 
      "eathand", "groom", "hang", "rear", "rest", "sniff", "walk", "dt"]]
)

In [13]:
df = (
    df
    .assign(
        first_day = lambda x: x.groupby("mouse_id")["dt"].transform(lambda x: np.min(x)),
        experimental_day = lambda x: (x["dt"] - x["first_day"]).dt.days
           )
    .drop(["month", "day", "hour"], axis=1)
    .assign(time_sinse_start=lambda x: x.groupby("mouse_id")["dt"].transform(lambda y: pd.to_datetime(y) - pd.to_datetime(y).min()))
    [["mouse_id", "group", "dt", "experimental_day", "time_sinse_start",
     "drink", "eat", "eathand", "groom", "hang", "rear", "rest", "sniff", "walk"]]
    
)

In [14]:
df.to_csv(data_dir / "tidy.csv", index=False)