In [None]:
# for Windows

pip install -r requirements.txt

In [None]:
# for Mac

pip install -r requirements_mac.txt

In [None]:
import sys
import os

work_dir = os.path.abspath(os.path.join('..'))
if work_dir not in sys.path:
    sys.path.append(work_dir+"/src/")
    
print(work_dir)

In [None]:
# import packages

import glob
import itertools
import re
import umap
import hdbscan
import numpy as np
import pandas as pd

from lstm_train import train
from sklearn.preprocessing import StandardScaler
from visualization import hdbscan_figure, umap_figure, LSTM_hdbscan_figure, LSTM_umap_figure

from IPython import display
from IPython.display import Image, Video

In [None]:
# Setting keypoints

keypoint_names = ['mouth', 'head', 'dorsal_front', 'dorsal_center', 'dorsal_back', 'caudal_fin']

key_characters = [f'distance_{keypoint_names[1]}-{keypoint_names[1]}',
                  f'alignment_{keypoint_names[1]}-{keypoint_names[0]}',
                  f'alignment_{keypoint_names[4]}-{keypoint_names[3]}',
                  f'angles_{keypoint_names[1]}-{keypoint_names[0]} to {keypoint_names[1]}',
                  f'angles_{keypoint_names[4]}-{keypoint_names[3]} to {keypoint_names[1]}',
                  f'angles_{keypoint_names[1]}-{keypoint_names[0]} to {keypoint_names[4]}',
                  f'angles_{keypoint_names[4]}-{keypoint_names[3]} to {keypoint_names[4]}']

key_characters

In [None]:
## key_character explanation

display.display(Image(work_dir + '/data/key_characters.png', embed=True))

In [None]:
# import sampled frames with key characters
## key characters were already calculated from raw tracking data
sampled_df = pd.read_csv(work_dir + '/data/sampled_umap_cluster.csv')

## extract train_data with UMAP-HDBSCAN clustering ids (umap_neighbor == 15)

train_data = sampled_df[key_characters].to_numpy()
train_label = sampled_df['cluster_un15'].to_numpy()

## standardize the train data

scaler = StandardScaler().fit(train_data)
train_data = scaler.transform(train_data)

In [None]:
# create umap model

umap_train = umap.UMAP(n_neighbors=15, random_state=0).fit(train_data)
train_data = umap_train.embedding_

In [None]:
# visualize umap embedding of key characters of frames with HDBSCAN clustering resutls
## HDBSCAN clustering is only for visualization, not used for further steps

## UMAP embedding without clustering results
umap_figure(train_data)

## UMAP embedding with HDBSCAN clustering results
hdbscan_figure(train_data, train_label)

In [None]:
## examples of pose clusters

display.display(Image(work_dir + '/data/example_pose_cluster.png', embed=True))

## red arrow represents head of each fish.

In [None]:
# import time stamps for pre-classified behaviors
## example_motion_class.csv includes time stamps of lateral display and bite behavior

time_stamp_df = pd.read_csv(work_dir + '/data/example_motion_class.csv')

trial_path = os.path.join(work_dir+'/data/multi_*.csv')
trial_ls = glob.glob(os.path.normpath(trial_path))

In [None]:
time_stamp_df[:10]

In [None]:
trial_df_sample = pd.read_csv(trial_ls[0])[:10]

trial_df_sample

In [None]:
# create input data for training LSTM autoencoder

pd.options.mode.chained_assignment = None

max_seq_len = 0
raw_behav_seq = []
true_labels = []
for trial in trial_ls:
    ## import feature and time_stamp dataframes
    trial_df = pd.read_csv(trial)
    trial_name = re.split(r'\\|/', trial)[-1][:-12]
    print(f"import data from {trial_name}")
    
    file_time_stamp = time_stamp_df[time_stamp_df['file']==trial_name]
    
    trial_df_labeled = pd.DataFrame()
    for index, row in file_time_stamp.iterrows():
        
        ## calculate start & end frames of each behavior
        ### raw videos were divided into three files due to the camera setting
        ### each file has 63660 frames
        part = int(row['part'])-1 
        bout_start = row['start'] + (part*63660) 
        bout_end = row['end'] + (part*63660) 
        
        ## save true labels
        bout_class = row['class']
        true_labels.append(bout_class)
        
        ## transform key characters of each behaviors into umap embedding
        bout_df = trial_df[(trial_df['time_stamp'] >= bout_start) & (trial_df['time_stamp'] <= bout_end)]
        bout_feature = bout_df[key_characters].to_numpy()
        bout_feature = scaler.transform(bout_feature)
        bout_umap = umap_train.transform(bout_feature)
        
        ## upadate max_seq_len for zero-padding
        if bout_umap.shape[0] > max_seq_len:
            max_seq_len = bout_umap.shape[0]
        
        raw_behav_seq.append(bout_umap)

In [None]:
## zero-padding to max_seq_len

input_seq = []
for indiv_seq in raw_behav_seq:
    indiv_seq = np.array(indiv_seq)
    pad_width_0 = (max_seq_len-indiv_seq.shape[0])//2
    pad_width_1 = (max_seq_len-indiv_seq.shape[0]) - pad_width_0
    indiv_seq_pad = np.pad(indiv_seq, ((pad_width_0, pad_width_1),(0, 0)))
    input_seq.append(indiv_seq_pad)
    
input_seq = np.array(input_seq)

input_seq.shape

In [None]:
# train LSTM autoencoder
# Mac with M1 chips need special setting for using tensorflow libraries
# For this course, we provide pretrained lstm models for Mac users.
# please choose the right cell for your machine.

In [None]:
## For Windows

## setting parameters
LEARNING_RATE = 0.0005
BATCH_SIZE = 16
EPOCHS = 100

## train LSTM autoencoder
Autoencoder, Encoder, Decoder = train(input_seq, LEARNING_RATE, BATCH_SIZE, EPOCHS)

## get latent_representation of input sequences
latent_representation = Encoder.predict(input_seq)

In [None]:
## for Mac users with M1 chips
from tensorflow import keras

## load pretrained lstm encoder
encoder_path = work_dir + f"/data/lstm_model/lstm_encoder"
Encoder = keras.models.load_model(encoder_path, compile=False)

## get latent_representation of input sequences
latent_representation = Encoder.predict(input_seq)

In [None]:
# HDBSCAN clustering of latent representations

cluster_labels_df = pd.DataFrame()
umap_neighbor = [10, 15, 20]
for neighbor in umap_neighbor:
    print(f"Results with umap_neighbor = {neighbor}")
    ## UMAP with latent representation
    reducer = umap.UMAP(random_state=0, n_neighbors=neighbor).fit(latent_representation)
    second_embedding = reducer.transform(latent_representation)
    ### visualize umap embedding of LSTM latent representation
    LSTM_umap_figure(second_embedding)
    
    ## HDBSCAN clustering
    clusterer = hdbscan.HDBSCAN(min_cluster_size=3, min_samples=1)
    clusterer.fit(second_embedding)
    cluster_labels = clusterer.labels_
    ## append cluster_labels
    cluster_labels_df[f"cluster_un{neighbor}"] = cluster_labels
    
    ### visualize HDBSCAN clustering
    LSTM_hdbscan_figure(second_embedding, cluster_labels, true_labels)

In [None]:
# create datafrane with predicted labels

fin_df = pd.concat([time_stamp_df, cluster_labels_df], axis=1)

fin_df

In [None]:
# compare the clustering with different umap_neighbor parameters

from sklearn.metrics import adjusted_rand_score, adjusted_mutual_info_score

## For more information about umap_neighbor, read the doc below. 
## https://umap-learn.readthedocs.io/en/latest/parameters.html
## For more information about metrics below, read the doc below. 
## https://scikit-learn.org/stable/modules/classes.html#module-sklearn.metrics

In [None]:
true_label = fin_df["class"].to_numpy()
for neighbor in umap_neighbor:
    print(f"umap_neighbor: {neighbor}")
    pred_label = fin_df[f"cluster_un{neighbor}"]
    rand_score = adjusted_rand_score(true_label, pred_label)
    mutual_info = adjusted_mutual_info_score(true_label, pred_label)
    print(f"adjusted_rand_score for umap_neighbor: {rand_score}")
    print(f"adjusted_mutual_info for umap_neighbor: {mutual_info}")

In [None]:
# load video clips for each cluster

video_loc = os.path.join(work_dir+'/data/videos')

In [None]:
## put the best umap_neighbor below.
best_umap_neighbor = 20

predicted_groups = fin_df.groupby(f"cluster_un{best_umap_neighbor}")

clip_dict = {}
for pred_label, group in predicted_groups:
    
    clip_list = group["clip_no"].tolist()
    
    clip_loc_list = []
    for clip in clip_list:
        clip_name = "clip_" + str(clip)
        clip_loc = os.path.normpath(video_loc + f"/{clip_name}.mp4")
        clip_loc_list.append(clip_loc)
        
    clip_dict[pred_label] = clip_loc_list

In [None]:
for key in range(len(clip_dict)):
    print(f"videos for predicted group {key}")
    clip_list = clip_dict[key]
    Video(clip_list[0], embed=True)
    [display.display(Video(clip_id, embed=True)) for clip_id in clip_list]

In [None]:
# Discussion

## 1. What are the major differences in annotations between manual classification and automated classification?
## 2. What are the drawbacks of manual and automated behavior classification?
## 3. Could we improve the automated behavior classification? If so, how?
## 4. What do you prefer between manual and automated behavior classification? and why?