In [1]:
import warnings
warnings.filterwarnings('ignore')

import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from matplotlib import pyplot as plt

import torch
import torchvision
from preprocessing import preprocess, base_bp

pd.options.mode.chained_assignment = None

PATH = '../../Datasets/MABe-mouse-behavior-detection/'
df = pd.read_csv(PATH+'train.csv')

In [2]:
def process_labels(X):
    if isinstance(X, str): X = eval(X)
    if isinstance(X, list):
        y = []
        for x in X:
            y.append(x.split(',')[0].replace("'", ""))
            y.append(x.split(',')[1].replace("'", ""))
        return np.unique(y)
    return []

def process_labels2(X):
    if isinstance(X, str): X = eval(X)
    if isinstance(X, list):
        y = []
        for x in X:
            y.append(x.split(',')[-1].replace("'", ""))
        return y
    return []

def make_labels(x):
    out = np.zeros(len(LABELS))
    out[LABELS.index(x)]=1
    return out


def get_behaviors_labeled(x):
    if isinstance(x, str):
        B = {}
        X = eval(x)
        for x in X:
            m1,m2,b = x.split(',')
            m1,m2 = m1.replace("mouse", ''),m2.replace("mouse", '')
            if (m1,m2) in B: B[(m1,m2)].append(b.replace('"', '').replace("'", ''))
            else: B[(m1,m2)] = [b.replace('"', '').replace("'", '')]

        for p in B:
            vec = torch.zeros((len(LABELS)))
            for x in B[p]: vec[LABELS.index(x)] = 1
            vec[-1]=1
            B[p] = vec
        return B
    return {}
    

df['solo'] = df.behaviors_labeled.apply(process_labels)
df['label'] = df.behaviors_labeled.apply(process_labels2)
LABELS = np.unique(df['label'].explode().dropna()).tolist()
df['behaviors_labeled'] = df.behaviors_labeled.apply(get_behaviors_labeled)
print(len(LABELS))

LAB_SOLO = {}

for lab in np.unique(df.lab_id):
    tmp = df[df.lab_id==lab]
    SOLO = np.unique(tmp['solo'].explode().dropna()).tolist()+['none']
    LAB_SOLO[lab] = ('self' in SOLO)

LABELS = np.unique(df['label'].explode().dropna()).tolist()+['none']

37


In [3]:
LAB_SOLO

{'AdaptableSnail': True,
 'BoisterousParrot': False,
 'CRIM13': True,
 'CalMS21_supplemental': False,
 'CalMS21_task1': True,
 'CalMS21_task2': False,
 'CautiousGiraffe': False,
 'DeliriousFly': False,
 'ElegantMink': True,
 'GroovyShrew': True,
 'InvincibleJellyfish': True,
 'JovialSwallow': False,
 'LyricalHare': True,
 'MABe22_keypoints': False,
 'MABe22_movies': False,
 'NiftyGoldfinch': True,
 'PleasantMeerkat': False,
 'ReflectiveManatee': False,
 'SparklingTapir': False,
 'TranquilPanther': True,
 'UppityFerret': True}

In [4]:
BP = {}
for bpt in df[~df.lab_id.str.contains('MABe')].body_parts_tracked:
    bpt = eval(bpt)
    for bp in bpt:
        if bp in BP: BP[bp] += 1
        else: BP[bp] = 1

In [5]:
BP

{'body_center': 188,
 'ear_left': 863,
 'ear_right': 863,
 'headpiece_bottombackleft': 7,
 'headpiece_bottombackright': 7,
 'headpiece_bottomfrontleft': 7,
 'headpiece_bottomfrontright': 7,
 'headpiece_topbackleft': 7,
 'headpiece_topbackright': 7,
 'headpiece_topfrontleft': 7,
 'headpiece_topfrontright': 7,
 'lateral_left': 169,
 'lateral_right': 169,
 'neck': 651,
 'nose': 846,
 'tail_base': 863,
 'tail_midpoint': 17,
 'tail_tip': 104,
 'hip_left': 655,
 'hip_right': 655,
 'head': 17,
 'spine_1': 21,
 'spine_2': 21,
 'tail_middle_1': 21,
 'tail_middle_2': 21}

In [6]:
try:
    data = pd.read_csv(PATH+'train_data.csv').dropna(subset='mouse_1')
except:
    data = pd.DataFrame(columns=['lab_id', 'video_id', 'mouse_1', 'mouse_2', 'ok', 'arena_w', 'arena_h'])
already_done = np.unique(data['lab_id']+'_'+data['video_id'].astype(str))

In [None]:
for idx in tqdm(df.index[:]):
    if not(f'{df.loc[idx, 'lab_id']}_{df.loc[idx, 'video_id']}' in already_done) and not('MABe22' in df.loc[idx, 'lab_id']):
        filepath = f'{df.loc[idx, 'lab_id']}/{df.loc[idx, 'video_id']}'
        track = pd.read_parquet(PATH+f'train_tracking/{filepath}.parquet').drop_duplicates(subset=['mouse_id', 'bodypart', 'video_frame'])
        try:
            annot = pd.read_parquet(PATH+f'train_annotation/{filepath}.parquet')
        except: continue

        fps = df.loc[idx, 'frames_per_second']
        ppc = df.loc[idx, 'pix_per_cm_approx']
        avail_lbl = df.loc[idx, 'behaviors_labeled']

        min_coord = np.array([track.x.min(), track.y.min()])
        arena_w,arena_h = np.array([track.x.max(), track.y.max()])-min_coord

        track, annot, Mice = preprocess(track, annot)
        
        
        for m1 in Mice:
            for m2 in Mice:
                if m1!=m2 or LAB_SOLO[df.loc[idx, 'lab_id']]:
                    if not((((str(m1),str(m2)) in avail_lbl) and m1!=m2) or (((str(m1),'self') in avail_lbl) and m1==m2)): 
                        continue
                        
                    out_annot = annot[(annot.mouse_id==m1)&(annot.target_id==m2)].reset_index(drop=True)
                    
                    cols1 = list(base_bp + ' - ' + str(m1))
                    
                    if m1!=m2:                            
                        cols2 = list(base_bp + ' - ' + str(m2))
                        cols = cols1+cols2
                    else:
                        cols2 = cols1
                        cols = cols1
                    out = track[cols]
                    
                    for c in out.columns:
                        out[c] = out[c].apply(lambda x: x-min_coord if (isinstance(x, np.ndarray) and pd.isna([x]).sum()==0) else np.array([0., 0.]))

                    
                    if len(out_annot)>0:
                        out['action'] = out_annot.loc[out.index].action.values
                    else: out['action']='none'
                        
                    if len(out)<128: continue

                    
                    X = torch.tensor(np.stack([np.stack(x) for x in out.drop(columns=['action']).values]))
                    y = torch.tensor(np.stack(out['action'].apply(make_labels).values).astype(int))


                    resize_x = torchvision.transforms.Resize((int(len(X)*(30./fps)), 2))
                    resize_y = torchvision.transforms.Resize((int(len(X)*(30./fps)), y.size(1)))

                    if m1==m2: X = torch.cat([X,X], dim=1)
                    
                    x = []
                    for i in range(14):
                        x.append(resize_x(X[:,i].unsqueeze(0))[0].unsqueeze(1) / ppc)
                    
                    X = torch.cat(x, dim=1)
                    y = resize_y(y.unsqueeze(0))[0].bool().numpy()
                    X = X.reshape((len(X),14,2)).float().numpy()

                    ch = 0
                    np.save(PATH+f'train_processed/{df.loc[idx, 'lab_id']}_{df.loc[idx, 'video_id']}_{m1}-{m2}_{ch}.npy', X, allow_pickle=False)
                    np.save(PATH+f'train_processed/{df.loc[idx, 'lab_id']}_{df.loc[idx, 'video_id']}_{m1}-{m2}_{ch}_labels.npy', y, allow_pickle=False)
                    data.loc[len(data)] = [df.loc[idx, 'lab_id'],df.loc[idx, 'video_id'],m1,m2,True, arena_w/ppc, arena_h/ppc]

        if len(Mice)==0: 
            data.loc[len(data)] = [df.loc[idx, 'lab_id'],df.loc[idx, 'video_id'],None,None,False, arena_w/ppc, arena_h/ppc]
        # except:
        #     data.loc[len(data)] = [df.loc[idx, 'lab_id'],df.loc[idx, 'video_id'],None,None,0,False]
        data.to_csv(PATH+'train_data.csv', index=False)
                

  0%|                                                       | 1/8789 [00:11<27:41:55, 11.35s/it]

In [None]:
data.drop_duplicates(subset=['lab_id', 'video_id']).ok.sum()