In [None]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

%cd drive/MyDrive/Colab Notebooks/dcase2023_task2_baseline_ae


!pip install -r requirements.txt
!pip install librosa
!pip install soundfile


print("Opened drive successfully.")

In [None]:
!rm -rf "/content/drive/MyDrive/Colab Notebooks/dcase2023_task2_baseline_ae/results/dev_data/baseline/summarize/DCASE2023T2"
!rm -rf "/content/drive/MyDrive/Colab Notebooks/dcase2023_task2_baseline_ae/results/dev_data/baseline_MAHALA"
!rm -rf "/content/drive/MyDrive/Colab Notebooks/dcase2023_task2_baseline_ae/results/dev_data/baseline_MSE"
!rm -rf "/content/drive/MyDrive/Colab Notebooks/dcase2023_task2_baseline_ae/data/dcase2023t2/dev_data/processed/ToyCar/test"
!rm -rf "/content/drive/MyDrive/Colab Notebooks/dcase2023_task2_baseline_ae/data/dcase2023t2/dev_data/processed/ToyCar/train"

In [None]:
import os
import torch
import soundfile as sf
from concurrent.futures import ThreadPoolExecutor, as_completed

def load_audio(audio_path):
    y, sr = sf.read(audio_path)
    return y, sr

def pitch_shift(y, sr, pitch_semitones):
    pitch_factor = 2 ** (pitch_semitones / 12.0)
    new_length = int(y.shape[0] * pitch_factor)
    y_tensor = torch.tensor(y, device='cuda').unsqueeze(0).unsqueeze(0)
    y_pitch_shifted = torch.nn.functional.interpolate(y_tensor, size=new_length, mode='linear', align_corners=False).squeeze()
    return y_pitch_shifted.cpu().numpy()

def time_stretch(y, sr, rate):
    y_tensor = torch.tensor(y, device='cuda').unsqueeze(0).unsqueeze(0)
    new_length = int(y_tensor.shape[-1] / rate)
    y_stretched = torch.nn.functional.interpolate(y_tensor, size=new_length, mode='linear', align_corners=False).squeeze()
    return y_stretched.cpu().numpy()

def process_audio_file(audio_path, pitch_semitones, stretch_rate, output_directory):
    y, sr = load_audio(audio_path)

    if pitch_semitones is not None:
        y_pitch_shifted = pitch_shift(y, sr, pitch_semitones)
        file_name, file_ext = os.path.splitext(os.path.basename(audio_path))
        output_file_name = f"{file_name}_pitch_shifted{file_ext}"
        output_path = os.path.join(output_directory, output_file_name)
        sf.write(output_path, y_pitch_shifted, sr)

    if stretch_rate is not None:
        y_stretched = time_stretch(y, sr, stretch_rate)
        file_name, file_ext = os.path.splitext(os.path.basename(audio_path))
        output_file_name = f"{file_name}_time_stretched{file_ext}"
        output_path = os.path.join(output_directory, output_file_name)
        sf.write(output_path, y_stretched, sr)

    return f"Processed {os.path.basename(audio_path)}"

def main():
    audio_directory = "/content/drive/MyDrive/Colab Notebooks/dcase2023_task2_baseline_ae/data/dcase2023t2/dev_data/raw/ToyCar/train"
    output_directory = audio_directory

    pitch_semitones_list = [2]
    stretch_rates = [0.8]

    audio_files = [os.path.join(audio_directory, f) for f in os.listdir(audio_directory) if f.endswith(".wav")]

    batch_size = 10
    with ThreadPoolExecutor(max_workers=batch_size) as executor:
        futures = []
        for audio_file in audio_files:
            for pitch_semitones in pitch_semitones_list:
                futures.append(executor.submit(process_audio_file, audio_file, pitch_semitones, None, output_directory))
            for stretch_rate in stretch_rates:
                futures.append(executor.submit(process_audio_file, audio_file, None, stretch_rate, output_directory))

        for future in as_completed(futures):
            print(future.result())

if __name__ == "__main__":
    main()


In [None]:
!bash 01_train_2023t2.sh -d

In [None]:
!bash 02a_test_2023t2.sh -d

In [None]:
!bash 02b_test_2023t2.sh -d

In [None]:
!bash 03_summarize_results.sh DCASE2023T2 -d