In [None]:
#hide
import os

if os.getcwd().endswith('nbs'):
    os.chdir("..")
os.getcwd()

'/home/diogoneves/Projects/metaphora/DataImporters'

# Review Workflow

> Code that allows inspecting and reviewing the dataset.

The reviews can result in annotations that are applied to the dataset.  

For more information on the annotation flow, see `Annotation`

In [None]:
#hide

from DataImporters.sources.core import *
from DataImporters.dataset import DatasetPaths
from DataImporters.annotation import *

from itertools import cycle
import os
import pandas as pd
import librosa as lr
import IPython.display as ipd
import panel as pn
pn.extension()

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
DATA_DIR = "data"
DATASET_NAME = "small_balanced"
annotation_path = os.path.join(DATA_DIR, "annotations", DATASET_NAME + ".csv")
PATHS = DatasetPaths(DATA_DIR, DATASET_NAME, annotation_path)

In [None]:
metadata = pd.read_csv(PATHS.metadata_output_path)
if os.path.exists(PATHS.annotation_path):
    annotations = load_annotations(PATHS.annotation_path)
else:
    annotations = create_annotations()

## Review Plan

1. Look at a category at a time and
1. Play sounds and show their labels
1. Change the labels if necessary (add row to annotation)

Let's start by looking at the available categories:

In [None]:
by_category = metadata.groupby("category")
metadata["category"].unique()

array(['Zombie_noises', 'Drone', 'Ghost', 'Ship_horn', 'Alien',
       'Sword_hit', 'Glitch_effect', 'Monster_roar', 'Laser', 'Bending',
       'Robot_movement', 'Footsteps', 'Silly_effect', 'Birds', 'Wind',
       'Wobble_effect', 'Wood_creak', 'Water', 'Wood'], dtype=object)

In [None]:
by_category.count()

Unnamed: 0_level_0,filename,label,extra,source,version
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alien,12,12,0,12,12
Bending,7,7,0,7,7
Birds,12,12,0,12,12
Drone,27,27,0,27,27
Footsteps,29,29,0,29,29
Ghost,4,4,0,4,4
Glitch_effect,25,25,0,25,25
Laser,28,28,0,28,28
Monster_roar,24,24,0,24,24
Robot_movement,26,26,0,26,26


In [None]:
import param
from panel.viewable import Viewer

class MetadataRow(Viewer):
    value = param.Series(default=None)
    categories = param.List(default=None)

    def __init__(self, **params):
        def change_keep_handler(*args):
            self._labels.disabled = not self._keep.value
        
        def change_category_handler(*args):
            self._row["category"] = self._category.value
        
        def change_label_handler(*args):
            self._row["label"] = ",".join(self._labels.value)
        
        def reset_handler(*args):
            self._row = self._original.copy()

        self._original = params["value"]
        self._row = self._original.copy()
        audio_path = os.path.join(PATHS.audio_output_path, self._row["filename"])
        self._audio = pn.pane.Audio(audio_path, name=self._row["filename"], loop=True)
        self._keep = pn.widgets.Checkbox(name="Keep?", value=True)
        self._categories = params["categories"]
        assert self._row["category"] in self._categories
        self._category = pn.widgets.Select(value=self._row["category"], options=self._categories, name="Category")
        label_options = self._row["label"].split(",")
        self._labels = pn.widgets.MultiChoice(options=label_options, value=label_options, name="Labels")
        self._extra = pn.widgets.StaticText(name="Extra", value=self._row["extra"])
        self._source = pn.widgets.StaticText(name="Source", value=self._row["source"])
        self._reset = pn.widgets.Button(name="Reset", button_type="danger", width=30)

        self._keep.param.watch(change_keep_handler, "value")
        self._category.param.watch(change_category_handler, "value")
        self._labels.param.watch(change_label_handler, "value")
        self._reset.on_click(reset_handler)

        left = pn.Column(self._audio, self._extra, self._source)
        right = pn.Column(self._keep, self._category, self._labels)

        super().__init__(**params)
        self._layout = pn.Column(
            f"## {self._row['filename']}",
            pn.Row(left, right, self._reset)
        )
    
    def __panel__(self):
        return self._layout

    def keep(self) -> bool:
        return self._keep.value

    def row(self) -> pd.Series:
        return self._row if self.keep() else delete_row(self._row)
    
    def changed(self) -> bool:
        return not (self.keep() and self._original.equals(self._row))

In [None]:
def get_annotations(views: list[MetadataRow]) -> pd.DataFrame:
    return create_annotations([v.row() for v in views if v.changed()])

In [None]:
categories = metadata["category"].unique().tolist()
views = [MetadataRow(value=row, categories=categories) for _, row in metadata.iterrows()]

In [None]:
start = 0
steps = 3
total_pages = len(views) // steps

In [None]:
current = views[start:start+steps]
page_info = pn.widgets.StaticText(name="Page", value=f"{(start//steps)+1} of {total_pages}")
start += steps
pn.Column(page_info, *current)

BokehModel(combine_events=True, render_bundle={'docs_json': {'e6d071f1-a6b6-4107-ad44-e7613c201309': {'defs': …

In [None]:
annotations = get_annotations(views)
annotations

Unnamed: 0,filename,category,label,extra,source,version
0,a2abde886f52e83e.wav,Drone,Horror,,custom_fsd,16


In [None]:
annotations.to_csv(PATHS.annotation_path, index=False)