In [None]:
#hide
%load_ext autoreload
%autoreload 2

import os

if os.getcwd().endswith('nbs'):
    os.chdir("..")
os.getcwd()

'/Users/diogoneves/Documents/Projects/metaphora/DataImporters'

# Review Workflow

> Code that allows inspecting and reviewing the dataset.

The reviews can result in annotations that are applied to the dataset.  

For more information on the annotation flow, see `Annotation`

In [None]:
#hide

from __future__ import annotations

from DataImporters.sources.core import *
from DataImporters.dataset import DatasetPaths
from DataImporters.annotation import *

from itertools import cycle
import os
import pandas as pd
import librosa as lr
import IPython.display as ipd
import panel as pn
pn.extension()

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
#hide

DATA_DIR = "data"
DATASET_NAME = "small_balanced"
annotation_path = os.path.join(DATA_DIR, "annotations", DATASET_NAME + ".csv")
PATHS = DatasetPaths(DATA_DIR, DATASET_NAME, annotation_path)

In [None]:
#hide

metadata = pd.read_csv(PATHS.metadata_output_path)
if os.path.exists(PATHS.annotation_path):
    annotations = load_annotations(PATHS.annotation_path)
else:
    annotations = create_annotations()

## Review Plan

1. Look at a category at a time and
1. Play sounds and show their labels
1. Change the labels if necessary (add row to annotation)

Let's start by looking at the available categories:

In [None]:
#collapse

by_category = metadata.groupby("category")
metadata["category"].unique()

array(['Zombie_noises', 'Ship_horn', 'Sword_hit', 'Glitch_effect',
       'Monster_roar', 'Laser', 'Bending', 'Robot_movement', 'Footsteps',
       'Wobble_effect', 'Wood_creak', 'Water', 'Wood', 'Zombie', 'Ghost',
       'Horn', 'Alien', 'Sword', 'Robot', 'Birds'], dtype=object)

In [None]:
#collapse

by_category.count()

Unnamed: 0_level_0,filename,label,extra,source,version
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alien,19,19,0,19,19
Bending,4,4,0,4,4
Birds,9,9,0,9,9
Footsteps,33,33,0,33,33
Ghost,3,3,0,3,3
Glitch_effect,33,33,0,33,33
Horn,14,14,0,14,14
Laser,34,34,0,34,34
Monster_roar,20,20,0,20,20
Robot,6,6,0,6,6


# Annotate

To annotate, run the cells bellow. The flow is encoded in the cells themselves (including page movement).  
Once you're done or simply want a break, run the last couple of cells to extract the annotations and save.  

Next time you come back, the annotations will be reloaded and resumed.

In [None]:
#collapse

import param
from panel.viewable import Viewer

class MetadataRow(Viewer):
    value = param.Series(default=None)
    categories = param.List(default=None)
    labels = param.List(default=None)
    annotation = param.Series(default=None)

    def __init__(self, **params):
        def change_keep_handler(*args):
            self._category.disabled = not self._keep.value
            self._labels.disabled = not self._keep.value
            if self._keep.value:
                self._row["version"] = self._original["version"]
            else:
                self._row = delete_row(self._row)
        
        def change_category_handler(*args):
            self._row["category"] = self._category.value
        
        def change_label_handler(*args):
            self._row["label"] = ",".join(self._labels.value)
        
        def reset_handler(*args):
            self._row = self._original.copy()

        self._original = params["value"]
        annotation = params["annotation"]
        self._row = self._original.copy() if annotation is None else annotation
        self._categories = params["categories"]
        self._label_options = params["labels"]

        self.sync_row()
        change_keep_handler()

        self._keep.param.watch(change_keep_handler, "value")
        self._category.param.watch(change_category_handler, "value")
        self._labels.param.watch(change_label_handler, "value")
        self._reset.on_click(reset_handler)

        left = pn.Column(self._audio, self._extra, self._source)
        right = pn.Column(self._keep, self._category, self._labels)

        super().__init__(**params)
        self._layout = pn.Column(
            f"## {self._row['filename']}",
            pn.Row(left, right, self._reset)
        )

    def sync_row(self):
        audio_path = os.path.join(PATHS.audio_output_path, self._row["filename"])
        self._audio = pn.pane.Audio(audio_path, name=self._row["filename"], loop=True)
        self._keep = pn.widgets.Checkbox(name="Keep?", value=not is_deleted(self._row))
        assert self._row["category"] in self._categories
        self._category = pn.widgets.Select(value=self._row["category"], options=self._categories, name="Category")
        label_values = self._row["label"].split(",")
        self._labels = pn.widgets.MultiChoice(options=self._label_options, value=label_values, name="Labels")
        self._extra = pn.widgets.StaticText(name="Extra", value=self._row["extra"])
        self._source = pn.widgets.StaticText(name="Source", value=self._row["source"])
        self._reset = pn.widgets.Button(name="Reset", button_type="danger", width=30)
    
    def __panel__(self):
        return self._layout

    def keep(self) -> bool:
        return self._keep.value

    def row(self) -> pd.Series:
        return self._row if self.keep() else delete_row(self._row)
    
    def changed(self) -> bool:
        return not (self.keep() and self._original.equals(self._row))

In [None]:
#collapse

def get_annotations(views: list[MetadataRow]) -> pd.DataFrame:
    return create_annotations([v.row() for v in views if v.changed()])

In [None]:
#collapse

categories = metadata["category"].unique().tolist()
labels = metadata["label"].str.split(",").explode().unique().tolist()
if os.path.exists(PATHS.annotation_path):
   annotations = load_annotations(PATHS.annotation_path)
else:
   empty = create_annotations()
   annotations = Annotation(empty, empty.copy())

views = [MetadataRow(
            value=row,
            categories=categories,
            labels=labels,
            annotation=get_annotation_for(annotations, row["filename"]))
         for _, row in metadata.iterrows()]

### Navigation

Use the cell bellow to reset to a position.  
For example, `start = 2 * steps` will start at the 2nd page of annotations.

In [None]:
steps = 3
start = 0 * steps
total_pages = len(views) // steps + len(views) % steps

### Main Annotation Flow

Keep running this cell to annotate and move to the next page (every time you run, it's a new page, to reset see cell above).  
The annotation changes are saved (in memory) as you go.  

In [None]:
current = views[start:start+steps]
page_info = pn.widgets.StaticText(name="Page", value=f"{(start//steps)+1} of {total_pages}")
start += steps
pn.Column(page_info, *current)

BokehModel(combine_events=True, render_bundle={'docs_json': {'0d4cb4d1-d510-4e05-9b39-643637be1d9e': {'defs': …

### Saving

The cell below previews the current state of annotation (including any annotations done in previous sections).  
The cell after saves the results, you can do it as many times as needed.

In [None]:
annotations = get_annotations(views)
annotations

Unnamed: 0,filename,category,label,extra,source,version


In [None]:
# CAREFULL!!! There's currently a bug where in some conditions the annotations are empty
# and will override! Confirm they're correct above!
annotations.to_csv(PATHS.annotation_path, index=False)