In [1]:
import numpy as np
import pandas as pd

In [2]:
audio_metadata = pd.read_csv('/content/training_metadata.csv')
audio_metadata['labels'] = audio_metadata['labels'].apply(eval)
audio_metadata

Unnamed: 0,video_id,start_time_seconds,end_time_seconds,labels
0,wqoOX8K8DEU,30.0,40.0,"[396, 397]"
1,wqH6Sj_h948,120.0,130.0,"[0, 441, 443]"
2,wq1098my4zA,130.0,140.0,"[27, 137, 271]"
3,wqR7LHho-WE,10.0,20.0,"[0, 22, 25]"
4,wq6Me-UUbSc,360.0,370.0,[413]
...,...,...,...,...
21777,2w6tV5kDGWo,240.0,250.0,[178]
21778,2wZCoeq9Ppc,80.0,90.0,"[137, 138, 185, 195, 196, 198, 268]"
21779,2wajg-UP-Gs,0.0,10.0,[459]
21780,lZavPVn7O4Q,180.0,190.0,"[137, 258, 260, 273]"


In [3]:
!git clone https://github.com/audioset/ontology.git

Cloning into 'ontology'...
remote: Enumerating objects: 14, done.[K
remote: Total 14 (delta 0), reused 0 (delta 0), pack-reused 14[K
Receiving objects: 100% (14/14), 87.20 KiB | 3.63 MiB/s, done.
Resolving deltas: 100% (2/2), done.


In [4]:
label_index = pd.read_csv('/content/class_labels_indices.csv')
label_index

Unnamed: 0,index,mid,display_name
0,0,/m/09x0r,Speech
1,1,/m/05zppz,"Male speech, man speaking"
2,2,/m/02zsn,"Female speech, woman speaking"
3,3,/m/0ytgt,"Child speech, kid speaking"
4,4,/m/01h8n0,Conversation
...,...,...,...
522,522,/m/07p_0gm,Throbbing
523,523,/m/01jwx6,Vibration
524,524,/m/07c52,Television
525,525,/m/06bz3,Radio


In [9]:
# Build a dictionary to map mid to index
mid_to_index = {row['mid']: row['index'] for _, row in label_index.iterrows()}
# Build a dictionary to map index to class
index_to_class = {row['index']: row['display_name'] for _, row in label_index.iterrows()}

In [6]:
import json

# Load the ontology JSON data
ontology_path = "/content/ontology/ontology.json"
with open(ontology_path, 'r') as file:
    ontology_data = json.load(file)

# Build a dictionary to map child ID to parent ID
child_to_parent = {}
for item in ontology_data:
    for child_id in item["child_ids"]:
      child_index = mid_to_index.get(child_id, None)
      parent_index = mid_to_index.get(item["id"], None)
      if not parent_index or not child_index:
        continue
      child_to_parent[child_index] = parent_index

child_to_parent

{9: 8,
 10: 8,
 11: 8,
 12: 8,
 13: 8,
 17: 16,
 18: 16,
 19: 16,
 20: 16,
 21: 16,
 23: 22,
 24: 22,
 28: 138,
 29: 27,
 30: 254,
 32: 27,
 33: 27,
 34: 27,
 35: 27,
 36: 27,
 31: 30,
 42: 41,
 43: 41,
 44: 41,
 45: 41,
 46: 41,
 48: 47,
 62: 61,
 63: 61,
 65: 64,
 73: 72,
 86: 72,
 108: 72,
 74: 73,
 81: 73,
 75: 74,
 76: 74,
 77: 122,
 78: 74,
 79: 122,
 80: 74,
 82: 81,
 83: 81,
 84: 296,
 85: 81,
 87: 86,
 90: 86,
 93: 86,
 95: 86,
 97: 86,
 98: 86,
 88: 491,
 89: 87,
 91: 90,
 92: 200,
 94: 93,
 96: 97,
 99: 98,
 102: 98,
 104: 98,
 106: 98,
 100: 99,
 101: 99,
 103: 102,
 105: 104,
 107: 106,
 109: 108,
 111: 108,
 122: 108,
 123: 108,
 126: 108,
 132: 108,
 134: 108,
 136: 108,
 110: 109,
 112: 111,
 115: 111,
 117: 111,
 119: 111,
 121: 111,
 113: 112,
 114: 112,
 116: 115,
 118: 117,
 120: 119,
 124: 123,
 125: 123,
 127: 126,
 128: 126,
 129: 126,
 131: 126,
 130: 131,
 133: 132,
 135: 134,
 138: 137,
 139: 138,
 152: 138,
 161: 138,
 184: 138,
 185: 138,
 189: 138,
 195: 13

In [7]:
node_to_root = {}
# Function to find root label
def find_root(node):
  while node in child_to_parent:
    node = child_to_parent[node]
  return node

for k in child_to_parent:
  node_to_root[k] = find_root(k)

node_to_root

{9: 8,
 10: 8,
 11: 8,
 12: 8,
 13: 8,
 17: 16,
 18: 16,
 19: 16,
 20: 16,
 21: 16,
 23: 22,
 24: 22,
 28: 137,
 29: 27,
 30: 254,
 32: 27,
 33: 27,
 34: 27,
 35: 27,
 36: 27,
 31: 254,
 42: 41,
 43: 41,
 44: 41,
 45: 41,
 46: 41,
 48: 47,
 62: 61,
 63: 61,
 65: 64,
 73: 72,
 86: 72,
 108: 72,
 74: 72,
 81: 72,
 75: 72,
 76: 72,
 77: 72,
 78: 72,
 79: 72,
 80: 72,
 82: 72,
 83: 72,
 84: 288,
 85: 72,
 87: 72,
 90: 72,
 93: 72,
 95: 72,
 97: 72,
 98: 72,
 88: 491,
 89: 72,
 91: 72,
 92: 137,
 94: 72,
 96: 72,
 99: 72,
 102: 72,
 104: 72,
 106: 72,
 100: 72,
 101: 72,
 103: 72,
 105: 72,
 107: 72,
 109: 72,
 111: 72,
 122: 72,
 123: 72,
 126: 72,
 132: 72,
 134: 72,
 136: 72,
 110: 72,
 112: 72,
 115: 72,
 117: 72,
 119: 72,
 121: 72,
 113: 72,
 114: 72,
 116: 72,
 118: 72,
 120: 72,
 124: 72,
 125: 72,
 127: 72,
 128: 72,
 129: 72,
 131: 72,
 130: 72,
 133: 72,
 135: 72,
 138: 137,
 139: 137,
 152: 137,
 161: 137,
 184: 137,
 185: 137,
 189: 137,
 195: 137,
 199: 137,
 200: 137,
 208: 1

In [11]:
for i, row in audio_metadata.iterrows():
  labels = row['labels']
  new_labels = set()
  for label in labels:
    new_labels.add(node_to_root.get(label, label))
  class_labels = [index_to_class[new_label] for new_label in new_labels]
  audio_metadata.at[i, 'labels'] = list(class_labels)

In [12]:
audio_metadata

Unnamed: 0,video_id,start_time_seconds,end_time_seconds,labels
0,wqoOX8K8DEU,30.0,40.0,[Alarm]
1,wqH6Sj_h948,120.0,130.0,"[Speech, Glass]"
2,wq1098my4zA,130.0,140.0,"[Music, Singing, Lullaby]"
3,wqR7LHho-WE,10.0,20.0,"[Speech, Wail, moan, Crying, sobbing]"
4,wq6Me-UUbSc,360.0,370.0,[Mechanisms]
...,...,...,...,...
21777,2w6tV5kDGWo,240.0,250.0,[Music]
21778,2wZCoeq9Ppc,80.0,90.0,"[Music, Theme music]"
21779,2wajg-UP-Gs,0.0,10.0,[Arrow]
21780,lZavPVn7O4Q,180.0,190.0,"[Music, Christian music, Music of Asia, Christ..."


In [13]:
from google.colab import files
# save updated metadata
audio_metadata.to_csv('/content/training_metadata_rootclass.csv', index=False)
files.download('/content/training_metadata_rootclass.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>