Mount Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Load balanced train segments from AudioSet

In [None]:
import pandas as pd
import subprocess
import os

# path to AudioSet CSV
audioset_csv_path = '/content/drive/MyDrive/db/balanced_train_segments.csv'

# Function to parse a line from the AudioSet CSV
def parse_audioset_line(line):
    parts = line.strip().split(',')
    ytid = parts[0].strip('"')
    start_seconds = float(parts[1])
    end_seconds = float(parts[2])
    positive_labels = ','.join(parts[3:])  # Join all remaining parts as they are part of the labels
    return ytid, start_seconds, end_seconds, positive_labels

# Read and parse the AudioSet CSV
parsed_lines = []
with open(audioset_csv_path, 'r') as file:
    next(file)  # Skip header line
    next(file)  # Skip second line
    next(file)  # Skip third line (actual header of the data)
    for line in file:
        parsed_line = parse_audioset_line(line)
        parsed_lines.append(parsed_line)

# Create a DataFrame from the parsed data
audioset_df = pd.DataFrame(parsed_lines, columns=['YTID', 'start_seconds', 'end_seconds', 'positive_labels'])

# Now audioset_df should correctly contain all your data including rows with multiple positive labels
audioset_df

Unnamed: 0,YTID,start_seconds,end_seconds,positive_labels
0,--PJHxphWEs,30.0,40.0,"""/m/09x0r,/t/dd00088"""
1,--ZhevVpy1s,50.0,60.0,"""/m/012xff"""
2,--aE2O5G5WE,0.0,10.0,"""/m/03fwl,/m/04rlf,/m/09x0r"""
3,--aO5cdqSAg,30.0,40.0,"""/t/dd00003,/t/dd00005"""
4,--aaILOrkII,200.0,210.0,"""/m/032s66,/m/073cg4"""
...,...,...,...,...
22155,zyqg4pYEioQ,20.0,30.0,"""/m/09x0r,/m/0llzx"""
22156,zz0ddNfz0h0,30.0,40.0,"""/m/012f08,/m/03cl9h,/m/07yv9,/m/0k4j"""
22157,zz8TGV83nkE,80.0,90.0,"""/m/012f08,/m/02mk9,/m/04_sv,/m/07yv9"""
22158,zzlK8KDqlr0,370.0,380.0,"""/m/01m2v,/m/07qc9xj,/m/09x0r,/t/dd00125"""


Trim dataframe<br>
<font color='red'>Issue 1: some videos in dataset are deleted or private</font>

In [None]:
test_df = audioset_df[2:4]
test_df

Unnamed: 0,YTID,start_seconds,end_seconds,positive_labels
2,--aE2O5G5WE,0.0,10.0,"""/m/03fwl,/m/04rlf,/m/09x0r"""
3,--aO5cdqSAg,30.0,40.0,"""/t/dd00003,/t/dd00005"""


pakages to download and trim the audio to the specified segments

In [None]:
!pip install -U yt-dlp
!apt-get install ffmpeg

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 32 not upgraded.


Download wav files to Colab environment

In [None]:
import os
from subprocess import call

output_directory = 'downloaded_music'
os.makedirs(output_directory, exist_ok=True)

for index, row in test_df.iterrows():
  ytid = row['YTID']
  start_seconds = row['start_seconds']
  end_seconds = row['end_seconds']
  output_filename = os.path.join(output_directory, f"{ytid}_{start_seconds}_{end_seconds}.wav")

  # Command to download the audio segment using yt-dlp and ffmpeg
  command = f"yt-dlp --extract-audio --audio-format wav --postprocessor-args '-ss {start_seconds} -to {end_seconds}' -o '{output_filename}' https://www.youtube.com/watch?v={ytid}"

  try:
      result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
      print(result.stdout.decode())
  except subprocess.CalledProcessError as e:
      print(f"Error occurred: {e.stderr.decode()}")


[youtube] Extracting URL: https://www.youtube.com/watch?v=--aE2O5G5WE
[youtube] --aE2O5G5WE: Downloading webpage
[youtube] --aE2O5G5WE: Downloading ios player API JSON
[youtube] --aE2O5G5WE: Downloading android player API JSON
[youtube] --aE2O5G5WE: Downloading m3u8 information
[youtube] --aE2O5G5WE: Downloading MPD manifest
[info] --aE2O5G5WE: Downloading 1 format(s): 251
[download] Destination: downloaded_music/--aE2O5G5WE_0.0_10.0.webm
[download]   0.2% of  400.17KiB at  545.14KiB/s ETA 00:00[download]   0.7% of  400.17KiB at  986.51KiB/s ETA 00:00[download]   1.7% of  400.17KiB at    1.82MiB/s ETA 00:00[download]   3.7% of  400.17KiB at    3.28MiB/s ETA 00:00[download]   7.7% of  400.17KiB at    2.35MiB/s ETA 00:00[download]  15.7% of  400.17KiB at    2.39MiB/s ETA 00:00[download]  31.7% of  400.17KiB at    3.21MiB/s ETA 00:00[download]  63.7% of  400.17KiB at    4.70MiB/s ETA 00:00[download] 100.0% of  400.17KiB at    6.30MiB/s ETA 00:00[download] 100% of  400.17KiB in 0

Play a sound clip

In [None]:
from IPython.display import Audio

# Replace this with the actual path to your downloaded WAV file
wav_file_path = '/content/downloaded_music/--aE2O5G5WE_0.0_10.0.wav'

Audio(wav_file_path)


Download sound clips to local machine

In [None]:
import shutil
from google.colab import files

# Path to the directory containing your WAV files
downloaded_audio_directory = '/content/downloaded_music'

# Path for the output ZIP file (can be anywhere in your Colab workspace)
zip_file_path = '/content/downloaded_music_wav.zip'

# Compress the directory containing your WAV files
shutil.make_archive(zip_file_path.replace('.zip', ''), 'zip', downloaded_audio_directory)

# Download the ZIP file
files.download(zip_file_path)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>