In [1]:
import pandas as pd

Gives us more space to use horizontally:

In [2]:
from IPython.display import display, HTML
display(HTML("<style>:root { --jp-notebook-max-width: 75% !important; }</style>"))

In [3]:
from glob import glob
import pathlib

In [4]:
allVideos = glob("Videos/*/*.mp4")

In [5]:
len(allVideos)

156

In [6]:
new_annot_filename = "HD_EPIC_Sounds_annot.csv"
try: 
    new_annot_df = pd.read_csv(new_annot_filename) 
except:
    new_annot_df = pd.read_csv("HD_EPIC_Sounds.csv")

tap_class = "tap water"
tap_class_id = 44   # 45th class added. Will mostly be very similar to water and pour, but only active for actual tap water sounds!


In [7]:
annot_csv = pd.read_csv("HD_EPIC_Sounds.csv")

all_rec_ids = annot_csv.video_id.unique()

water_df = annot_csv[(annot_csv["class"] == "water")|(annot_csv["class"]=="pour")]

water_df = water_df[(water_df.stop_sample - water_df.start_sample) / 48000 > 3]

In [8]:
from tqdm.auto import tqdm

In [9]:
import ipywidgets as widgets
import plotly.graph_objects as go
import pandas as pd
from IPython.display import display, HTML, Javascript, Video

# Ensure the required packages are installed
try:
    import plotly
    import ipywidgets
    import pandas
except ImportError:
    !pip install plotly ipywidgets pandas
    import plotly
    import ipywidgets
    import pandas


def annotate_single_video(video_path):
    rec_id = pathlib.Path(video_path).name[:-4]
    # Create video display using HTML

    video_wid = widgets.Output()

    with video_wid:
        display(HTML(f"""
            <video id="video_player" width="400" controls>
                <source src="{video_path}" type="video/mp4">
                Your browser does not support the video tag.
            </video>
        """))
        
    video_df = water_df[water_df.video_id == rec_id].copy()
    video_df["Start"] = video_df.start_sample / 48000
    video_df["End"] = video_df.stop_sample / 48000

    if len(video_df) == 0:
        print("Video does not contain any water or pour annotations!")
        return
    
    
    slider = widgets.FloatSlider(
        min=0, max=max(video_df["End"]) + 5, step=0.005, description="Time:", continuous_update=True)
    
    out = widgets.Output()
    
    
    def update_video_float(fl):
        update_video({"new": fl})
    # Function to update video playback based on slider
    def update_video(change):
        js = f"document.querySelector('video').currentTime = {change['new']};"
        with out:        
            display(Javascript(js))
        out.clear_output()
    
    slider.observe(update_video, names="value")
    
    # Create an interactive timeline using Plotly
    fig = go.FigureWidget()


    
    def update_timeline():
        fig.data = []
        for i, row in video_df.iterrows():
            fig.add_trace(go.Scatter(
                x=[row["Start"], row["End"]],
                y=[row["class"], row["class"]],
                mode="lines+markers",
                line=dict(width=6),
                marker=dict(size=12),
                name=row["class"],
                customdata=[i],
            ))
    
    update_timeline()
    
    # Combine widgets into a single output
    ui = widgets.VBox([widgets.HBox([video_wid, fig, out]), slider])
    
    
    display(ui)

    video_df = water_df[water_df.video_id == rec_id].copy()
    video_df["Start"] = video_df.start_sample / 48000
    video_df["End"] = video_df.stop_sample / 48000

    print(f"In this video, {rec_id}, there are", len(video_df), "annotations to check!")
    for index, line in tqdm(video_df.iterrows(), total=len(video_df)):
        fig.update_layout(xaxis=dict(range=[line.Start-15,line.End+15]))
        update_video({"new": line.Start})
        slider.max = line.End + 60
        slider.min = line.Start - 60
        slider.value = line.Start

        while True:
            new_index = len(new_annot_df)
            r = input(f"change annotation ({int(line.End - line.Start)}s)? {line.Start},{line.End}:")
            if r == "":
                # keep annotation the same
                
                new_annot_df.loc[new_index] = line
                new_annot_df.loc[new_index, "class"] = tap_class
                new_annot_df.loc[new_index, "class_id"] = tap_class_id
                print("keeping annotation")
                break
            elif r == "d":
                print("dropped")
                break
            elif type(eval(r)) is not tuple:
                print(r, type(r))
                r = eval(input("wrong type entered, try again!:"))
            else:
                # r is a tuple, of start and end seconds.
                r = eval(r)
                start = r[0]
                stop = r[1]
                add_additional_annotation(start, stop, line.participant_id, rec_id)
                break
    ui.close()

In [10]:
def add_additional_annotation(start, stop, part_id, video_id):
    start_sample = int(start * 48000)
    stop_sample = int(stop * 48000)
    start_timestamp = get_timestamp(start)
    stop_timestamp = get_timestamp(stop)
    new_annot_df.loc[len(new_annot_df)] =  [part_id, video_id, start_timestamp, stop_timestamp, start_sample, stop_sample, tap_class, tap_class_id]

def get_timestamp(seconds):
    # returns a string, timestamp with HH:MM:SS.mmm.
    hours = seconds // 3600
    left = seconds - hours * 3600
    minutes = left // 60
    left = left - minutes * 60
    
    hours = f"{int(hours):d}"
    if len(hours) == 1:
        hours = "0" + hours

    minutes = str(int(minutes))
    if len(minutes) == 1:
        minutes = "0" + minutes
    #print(hours, minutes, left)

    seconds = f"{left:.3f}"
    if len(seconds) == 5:
        seconds = "0" + seconds
    
    return f"{hours}:{minutes}:{seconds}"

In [11]:
new_annot_df[new_annot_df.class_id==tap_class_id]

Unnamed: 0,participant_id,video_id,start_timestamp,stop_timestamp,start_sample,stop_sample,class,class_id
16460,P03,P03-20240217-131219,00:39:51.354,00:40:11.800,114784991,115766400,tap water,44
50968,P08,P08-20240614-085000,00:13:54.107,00:13:58.082,40037136,40227936,tap water,44
50969,P08,P08-20240614-085000,00:14:57.119,00:15:04.732,43061712,43427136,tap water,44
50970,P08,P08-20240614-085000,00:15:13.957,00:15:16.900,43869936,44011200,tap water,44
50971,P08,P08-20240614-085000,00:15:19.657,00:15:31.594,44143536,44716512,tap water,44
...,...,...,...,...,...,...,...,...
51679,P09,P09-20240624-165332,00:16:37.850,00:16:39.750,47896800,47988000,tap water,44
51680,P09,P09-20240624-165332,00:17:03.608,00:17:36.750,49133184,50724000,tap water,44
51681,P09,P09-20240624-165332,00:18:03.103,00:18:07.650,51988944,52207200,tap water,44
51682,P09,P09-20240624-165332,00:21:19.856,00:21:23.250,61433088,61596000,tap water,44


In [12]:
def get_done_count(done):
    done_ids = [v[11:-4] for v in done]
    count = 0
    for video_id, line in water_df.groupby("video_id").count().iterrows():
        if video_id in done_ids:
            count += line.participant_id
    return count

In [13]:
def remove_from_done(video_file):
    with shelve.open("shelves/done_vids") as shf:
        done = shf.get("done", [])
        if video_file in done:
            done.remove(video_file)
            shf["done"]= done

In [14]:
import shelve
with shelve.open("shelves/done_vids") as shf:
    done = shf.get("done", [])

try: 
    new_annot_df = pd.read_csv(new_annot_filename) 
except:
    new_annot_df = pd.read_csv("HD_EPIC_Sounds.csv")

done_vid_list = new_annot_df[new_annot_df.class_id==tap_class_id].video_id.unique()

rest_videos = [vid_filename for vid_filename in allVideos if vid_filename not in done]

for i, video in enumerate(rest_videos):
    new_annot_df = new_annot_df.drop_duplicates()
    new_annot_df.to_csv(new_annot_filename, index=False)
    print(len(rest_videos) - i, "videos left,", get_done_count(done), "of 1098 annotations checked")
    annotate_single_video(video)
    print("finished video", video)
    new_annot_df = new_annot_df.drop_duplicates()
    new_annot_df.to_csv(new_annot_filename, index=False)
    print(len(new_annot_df[new_annot_df.class_id==tap_class_id]), "annotations added so far")
    with shelve.open("shelves/done_vids") as shf:
        done = shf.get("done", [])
        done.append(video)
        shf["done"] = done
if len(rest_videos) == 0:
    print("no videos left, everything is done!")

no videos left, everything is done!


In [15]:
tmp_df = new_annot_df.copy()

Found errors:

- Wishking an egg labeled as "water"
- Pour annotated as water instead of pour
- Stiring a pot annotated as water, e.g. P08-20240618-171546 but happens multiple times, even if there is no "watery" sound
- overlapping water labels (one for general water, one for tap water, e.g. P01-20240202-175627, 87s-93s is contained), "nested annotations"
- Moving food into a container labeled as water
- water in far background, e.g. P01-20240203-152323 3-14s
- single water drops in background labeled as water, but maybe not consistently (check: maybe cause i dropped the labels, eg. P02 first video)

- P01 water "in drain / pipes" annotated, not for P02 194141
- P02: P02-20240209-194141 - water annotations inconsistent while doing dishes
- Sizzeling pan in background annotated as water P02-20240210-113925, 490.982,530.482 or P03-20240217-192543, 985.078,990.185
- Duplicated annotations: e.g. P03-20240217-210958 - 19:20-19:55
- Plastic Bag handling sounds annotated as pour, P04-20240414-162750: 1:19 onward
- Steam cooker letting out steam annotated as water P04-20240414-165333 - technically correct but different form and sound
- plating (non-liquid) food from a pan labeled as water P04-20240414-175337
- In P06-20240510-100047, there is a "pour" annotation for someone frothing milk
- 
  

Found explanations for problems:

- P02:
  - very silent tap, sometimes running in the background far away
  - sometimes only a drizzle, almost inaudible, e.g. P02-20240211-17513 190-226
- P03:
  - P03-20240216-185832, 28:42 : extractor hood very loud in the background, similar noise to water, water barely hearable
- P04:
  - P04-20240414-065311: Tap technically on, but only as a very slight drizzle - almost inaudible, no "noise"
- P06:
  - P06-20240510-104642: Loud noise in background (dunstabzug)
  - 