**You can use
any packages that come as a default option with Anaconda. If you use an
artificial neural network, the preference is Pytorch, but you are free to you use
other packages. I need to be able to run your implementation on my machine
or online (e.g., Google Colab). So, be sure to get approval from me for any
special packages!**

In [42]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [43]:
import os

# Change this path to your shared location. Make sure to add shortcut of the shared
# folder "Group Project" to the path "/content/drive/MyDrive/Group Project"
SHARED_PATH = "/content/drive/MyDrive/Group Project"

# Ensure the directory exists
os.makedirs(SHARED_PATH, exist_ok=True)

print(f"Shared path is ready at: {SHARED_PATH}")

%cd /content/drive/MyDrive/Group\ Project/DCASE23_Task2_Vehicle_Sound_Event_Detection

Shared path is ready at: /content/drive/MyDrive/Group Project
/content/drive/.shortcut-targets-by-id/1WIqvp-iVEF7lgpodL82Np2HHkI81rUym/Group Project/DCASE23_Task2_Vehicle_Sound_Event_Detection


In [57]:
import os
from pathlib import Path

# ---------------------------
# Paths
# ---------------------------
train_tar_path = "/content/drive/MyDrive/Group Project/DCASE23_Task2_Vehicle_Sound_Event_Detection/data/dcase23_dataset/data/dev_train.tar.gz"
test_tar_path = "/content/drive/MyDrive/Group Project/DCASE23_Task2_Vehicle_Sound_Event_Detection/data/dcase23_dataset/data/dev_test.tar.gz"
metadata_csv_path = "/content/drive/MyDrive/Group Project/DCASE23_Task2_Vehicle_Sound_Event_Detection/data/dcase23_dataset/data/dev_metadata.csv"
scores_csv_path = "/content/drive/MyDrive/Group Project/DCASE23_Task2_Vehicle_Sound_Event_Detection/data/dcase23_dataset/data/scores.csv"
train_embeddings_path = "/content/drive/MyDrive/Group Project/DCASE23_Task2_Vehicle_Sound_Event_Detection/data/dcase23_dataset/data/MIT_ast-finetuned-audioset-10-10-0.4593-embeddings_dev_train.npz"
test_embeddings_path = "/content/drive/MyDrive/Group Project/DCASE23_Task2_Vehicle_Sound_Event_Detection/data/dcase23_dataset/data/MIT_ast-finetuned-audioset-10-10-0.4593-embeddings_dev_test.npz"

extract_dir = "extracted_dataset"  # where we'll extract WAV files
os.makedirs(extract_dir, exist_ok=True)

In [50]:
import pandas as pd
import numpy as np

# Load CSV files
metadata_df = pd.read_csv(metadata_csv_path)
scores_df = pd.read_csv(scores_csv_path)

# Load embeddings
train_embeddings = dict(np.load(train_embeddings_path, allow_pickle=True)["arr_0"].item())
test_embeddings = dict(np.load(test_embeddings_path, allow_pickle=True)["arr_0"].item())

/content/drive/.shortcut-targets-by-id/1WIqvp-iVEF7lgpodL82Np2HHkI81rUym/Group Project/DCASE23_Task2_Vehicle_Sound_Event_Detection


In [70]:
metadata_df.head()

Unnamed: 0,d1p,d1v,path,class,domain,section,label,d2p,d2v,d3p,d3v
0,pat,0,train/valve_section_00_source_train_normal_000...,valve,source,0,normal,,,,
1,pat,1,train/valve_section_00_source_train_normal_000...,valve,source,0,normal,,,,
2,pat,1,train/valve_section_00_source_train_normal_000...,valve,source,0,normal,,,,
3,pat,1,train/valve_section_00_source_train_normal_000...,valve,source,0,normal,,,,
4,pat,1,train/valve_section_00_source_train_normal_000...,valve,source,0,normal,,,,


In [54]:
import tarfile

def extract_tar(tar_path, extract_to):
    with tarfile.open(tar_path, "r:gz") as tar:
        tar.extractall(path=extract_to)
    print(f"Extracted {tar_path} -> {extract_to}")

# Extract train and test archives
extract_tar(train_tar_path, os.path.join(extract_dir, "train"))
extract_tar(test_tar_path, os.path.join(extract_dir, "test"))

  tar.extractall(path=extract_to)


Extracted /content/drive/MyDrive/Group Project/DCASE23_Task2_Vehicle_Sound_Event_Detection/data/dcase23_dataset/data/dev_test.tar.gz -> extracted_dataset/test


In [58]:
import pandas as pd
import numpy as np

# Load CSV files
metadata_df = pd.read_csv(metadata_csv_path)
scores_df = pd.read_csv(scores_csv_path)

# Load embeddings
train_embeddings = dict(np.load(train_embeddings_path, allow_pickle=True)["arr_0"].item())
test_embeddings = dict(np.load(test_embeddings_path, allow_pickle=True)["arr_0"].item())

In [95]:
def create_master_df(split, embeddings_dict):
    split_dir = Path(extract_dir) / split / split
    if split == "train":
        df_split = metadata_df[metadata_df['path'].str.contains('source_train|target_train', case=False, na=False)]
    else:
        df_split = metadata_df[metadata_df['path'].str.contains('source_test|target_test', case=False, na=False)]
    dataset = []

    for _, row in df_split.iterrows():
        file_path = split_dir / Path(row['path']).name
        if not file_path.exists():
            continue

        entry = row.to_dict()
        entry["audio_path"] = str(file_path)

        # Add LOF scores
        score_row = scores_df[scores_df["path"] == row["path"]]
        if not score_row.empty:
            entry.update(score_row.squeeze().to_dict())

        # Add embeddings
        for emb_name, emb_data in embeddings_dict.items():
            emb = embeddings_dict[emb_name].get(row["path"], None)
            if emb is not None:
                entry[emb_name] = np.array(emb).tolist()

        dataset.append(entry)

    return pd.DataFrame(dataset)

In [96]:
df_train = create_master_df("train", {"ast-finetuned-audioset-10-10-0.4593": train_embeddings})
df_test = create_master_df("test", {"ast-finetuned-audioset-10-10-0.4593": test_embeddings})

In [97]:
df_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1400 entries, 0 to 1399
Data columns (total 17 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   d1p                                  1400 non-null   object 
 1   d1v                                  1400 non-null   object 
 2   path                                 1400 non-null   object 
 3   class                                1400 non-null   object 
 4   domain                               1400 non-null   object 
 5   section                              1400 non-null   int64  
 6   label                                1400 non-null   object 
 7   d2p                                  1000 non-null   object 
 8   d2v                                  1000 non-null   object 
 9   d3p                                  400 non-null    object 
 10  d3v                                  400 non-null    float64
 11  audio_path                    

In [98]:
df_train.to_csv(os.path.join(extract_dir, "train_master.csv"), index=False)
df_test.to_csv(os.path.join(extract_dir, "test_master.csv"), index=False)

In [102]:
import os
import librosa
import numpy as np
import pandas as pd
from tqdm import tqdm

def append_audio_arrays(df, base_dir, path_col="path", extension=".wav"):
    """
    Loads audio files listed in df[path_col] and appends them as NumPy arrays.

    Args:
        df (pd.DataFrame): DataFrame containing at least one column with audio file paths.
        base_dir (str): Base directory where audio files are stored.
        path_col (str): Column in df containing relative file paths.
        extension (str): File extension (default: '.wav').

    Returns:
        pd.DataFrame: DataFrame with an added 'audio_array' and 'sample_rate' column.
    """
    audio_arrays, sample_rates = [], []

    for rel_path in tqdm(df[path_col], desc="Loading audio files"):
        # Construct full path
        full_path = os.path.join(base_dir, rel_path)
        if not full_path.endswith(extension):
            full_path += extension

        try:
            audio, sr = librosa.load(full_path, sr=None, mono=True)  # keep original sampling rate
            audio_arrays.append(audio)
            sample_rates.append(sr)
        except Exception as e:
            print(f"⚠️ Error loading {full_path}: {e}")
            audio_arrays.append(np.array([]))
            sample_rates.append(None)

    df_new = df.copy()
    df_new["audio_array"] = audio_arrays
    df_new["sample_rate"] = sample_rates
    return df_new

In [103]:
# df_train = pd.read_csv("/content/drive/MyDrive/Group Project/DCASE23_Task2_Vehicle_Sound_Event_Detection/extracted_dataset/train_master.csv")
# df_test = pd.read_csv("/content/drive/MyDrive/Group Project/DCASE23_Task2_Vehicle_Sound_Event_Detection/extracted_dataset/test_master.csv")

# Define where your audio files live
base_audio_dir = "/content/drive/MyDrive/Group Project/DCASE23_Task2_Vehicle_Sound_Event_Detection/extracted_dataset/"

# Append the numpy arrays and sampling rates
df_train_audio = append_audio_arrays(df_train, base_audio_dir+"train", path_col="path")
df_test_audio = append_audio_arrays(df_test, base_audio_dir+"test", path_col="path")

Loading audio files: 100%|██████████| 7000/7000 [01:47<00:00, 64.92it/s]
Loading audio files: 100%|██████████| 1400/1400 [00:18<00:00, 77.37it/s]


In [109]:
train_pickle_path = "/content/drive/MyDrive/Group Project/DCASE23_Task2_Vehicle_Sound_Event_Detection/extracted_dataset/train_with_audio.pkl"
test_pickle_path = "/content/drive/MyDrive/Group Project/DCASE23_Task2_Vehicle_Sound_Event_Detection/extracted_dataset/test_with_audio.pkl"

# Save as pickle (faster to load than CSV)
df_train_audio.to_pickle(train_pickle_path)
df_test_audio.to_pickle(test_pickle_path)

print(f"Saved train data with audio arrays at: {train_pickle_path}")
print(f"Saved test data with audio arrays at: {test_pickle_path}")

# Load the pickle files to work with the dataset
# df_train_audio = pd.read_pickle(train_pickle_path)
# df_test_audio = pd.read_pickle(test_pickle_path)

Saved train data with audio arrays at: /content/drive/MyDrive/Group Project/DCASE23_Task2_Vehicle_Sound_Event_Detection/extracted_dataset/train_with_audio.pkl
Saved test data with audio arrays at: /content/drive/MyDrive/Group Project/DCASE23_Task2_Vehicle_Sound_Event_Detection/extracted_dataset/test_with_audio.pkl
