# CS 159 Project

### 1. Imports

In [None]:
import glob
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl

import torch 

np.random.seed(0)
torch.manual_seed(0)

print("All packages imported!")

### 2. Data

Sequences are 1 minute long (1800 frames at 30Hz) in the mouse dataset. The `sequence_id` is a random hash to anonymize experiment details.


- `user_train.npy` - Training set for the task, which follows the following schema :

```
{
    "sequences" : {
        "<sequence_id> : {
            "keypoints" : a ndarray of shape (4500, 11, 24, 2)
        }
    }
}
```

- `submission_clips.npy` - Test set for the task, which follows the following schema:

```
{
    "<sequence_id> : {
        "keypoints" : a ndarray of shape (4500, 11, 24, 2)
    }
}
```
- `sample_submission.npy` - Template for a sample submission for this task, follows the following schema :

```
{
    "frame_number_map": 
        {"<sequence_id-1>": (start_frame_index, end_frame_index),
        "<sequence_id-2>": (start_frame_index, end_frame_index),
        ...
        "<sequence_id-n>": (start_frame_index, end_frame_index),
        }
    "<sequence_id-1>" : [
            [0.321, 0.234, 0.186, 0.857, 0.482, 0.185], .....]
            [0.184, 0.583, 0.475], 0.485, 0.275, 0.958], .....]
        ]
}
```

In `sample_submission`, each key in the `frame_number_map` dictionary refers to the unique sequence id of a video in the test set. The item for each key is expected to be an the start and end index for slicing the `embeddings` numpy array to get the corresponding embeddings. The `embeddings` array is a 2D `ndarray` of floats of size `total_frames` by `X` , where `X` is the dimension of your learned embedding (6 in the above example; maximum permitted embedding dimension is **128**), representing the embedded value of each frame in the sequence. `total_frames` is the sum of all the frames of the sequences, the array should be concatenation of all the embeddings of all the clips.

#### Download

In [None]:
# set to false to download data
DATA_DOWNLOADED = True

mabe_fp = os.path.join('data', 'mabe')

if not DATA_DOWNLOADED:
    # credentials
    !pip install -U aicrowd-cli
    %load_ext aicrowd.magic
    %aicrowd login
    
    # dirs
    if not os.path.exists('data'):
        os.mkdir('data')
    if not os.path.exists(mabe_fp):
        os.mkdir(mabe_fp)

    aicrowd_challenge_name = "mabe-2022-mouse-triplets"
    
    # run only if you do not have the data
    #%aicrowd ds dl -c {aicrowd_challenge_name} -o mabe_fp # Download all files (optional)
    %aicrowd ds dl -c {aicrowd_challenge_name} -o {mabe_fp} *submission_data* # download only the submission keypoint data
    %aicrowd ds dl -c {aicrowd_challenge_name} -o {mabe_fp} *user_train* # download data with the public task labels provided

#### Importing

In [None]:
sub_clip_fp = os.path.join(mabe_fp, 'submission_data.npy')
user_train_fp = os.path.join(mabe_fp, 'user_train.npy')
                           
sub_clip_dict = np.load(sub_clip_fp,allow_pickle=True).item()
user_train_dict = np.load(user_train_fp,allow_pickle=True).item()

#### Processing