In [63]:
import torch
import torchaudio
import soundfile as sf
import matplotlib.pyplot as plt
import librosa
import os
from audio_dataset import audio_segment
import numpy as np
import random
import pandas as pd
from tqdm import tqdm
from collections import Counter

working_dir = os.path.join(os.getcwd().split('Text2BGAudio')[0],'Text2BGAudio')
os.chdir(working_dir)
print(f"{working_dir=}")

working_dir='c:\\Users\\amitmils\\Documents\\Repo\\Text2BGAudio'


In [64]:
path_all_data = r"_Data\Music\Music Data New"
train_ratio = 0.8
val_ratio = 0.1
test_ratio = 0.1

data = list()
for label in os.listdir(path_all_data):
    if not os.path.isdir(os.path.join(path_all_data, label)):
        continue
    label_folder = os.path.join(path_all_data, label)
    audio_files = [os.path.join(label_folder,f) for f in os.listdir(label_folder) if f.endswith('.wav')]
    num_audios = len(audio_files)

    num_for_train = int(train_ratio* num_audios)
    num_for_val = int(val_ratio * num_audios)

    random.shuffle(audio_files)
    train_audios = audio_files[:num_for_train]
    val_audios = audio_files[num_for_train:num_for_train+num_for_val]
    test_audios = audio_files[num_for_train+num_for_val:]

    for audio_List,dataset_name in zip([train_audios,val_audios,test_audios],['train','val','test']):
        for audio in audio_List:
            data.append({"file_name" : os.path.basename(audio).split('.')[0], "label": label.lower(), 'dataset' : dataset_name, "file_path" : audio})

df = pd.DataFrame(data)
df.to_csv(os.path.join(path_all_data,'train_val_test_split.csv'), index=False)
counts = df.groupby(['dataset', 'label']).size().reset_index(name='count')
print(counts)

   dataset     label  count
0     test     anger     60
1     test      fear     52
2     test       joy     95
3     test      love     84
4     test   sadness     65
5     test  surprise     42
6    train     anger    468
7    train      fear    407
8    train       joy    755
9    train      love    668
10   train   sadness    513
11   train  surprise    330
12     val     anger     58
13     val      fear     50
14     val       joy     94
15     val      love     83
16     val   sadness     64
17     val  surprise     41


### Music Dataset

In [65]:
data_set = {"train" : [], "val" : [], "test" : []}
for _,row in tqdm(df.iterrows(),desc="Loading audio files",total=len(df), leave=True):
   waveform, sr = librosa.load(row.file_path)
   data_set[row.dataset].append((waveform.astype(np.float32),sr,row.label,row.file_path))

len_str = "_".join([f"{name}{len(data_set[dataset])}" for name,dataset in zip(['tr','val','te'],data_set)])
torch.save(data_set, os.path.join(path_all_data, f"music_dataset_fixed_{os.path.basename(path_all_data)}_{len_str}.pt"))

Loading audio files: 100%|██████████| 3929/3929 [00:18<00:00, 214.80it/s]


In [25]:
data_set = []
for label in os.listdir(music_data_raw_folder):
    curr_size = len(data_set)
    title = f"Processing label: {label}"
    print("#" * len(title))
    print(title)
    label_folder_path = os.path.join(music_data_raw_folder, label)
    for audio_file in os.listdir(label_folder_path):
        audio_file_path = os.path.join(label_folder_path, audio_file)
        waveform, orig_sample_rate = librosa.load(audio_file_path)
        waveform = torch.tensor(waveform)
        if SR != orig_sample_rate:
            waveform = torchaudio.transforms.Resample(orig_sample_rate, SR)(waveform)
        segment_length = 10 * SR
        batches = waveform.split(segment_length)
        for batch_seg in batches:
            if len(batch_seg) < segment_length:
                continue
            data_set.append((batch_seg.numpy().astype(np.float32),SR,label,audio_file_path))
    print(f"Created {len(data_set) - curr_size} segments")
    print("#" * len(title)  + "\n\n")

torch.save(data_set, os.path.join(save_folder, f"music_dataset_{os.path.basename(music_data_raw_folder)}_size{len(data_set)}.pt"))

#######################
Processing label: Angry
Created 10 segments
#######################


#####################
Processing label: Joy
Created 2 segments
#####################


######################
Processing label: Love
Created 10 segments
######################


#####################
Processing label: Sad
Created 3 segments
#####################


#######################
Processing label: Scary
Created 4 segments
#######################


##########################
Processing label: Surprise
Created 10 segments
##########################




In [14]:
data_set = []
output_folder = r"_Data/Music/train_segments"
for label in os.listdir(music_data_raw_folder):
    num_segments = 0
    title = f"Processing label: {label}"
    print("#" * len(title))
    print(title)
    label_folder_path = os.path.join(music_data_raw_folder, label)
    output_label_folder = os.path.join(output_folder, label)
    for audio_file in os.listdir(label_folder_path):
        audio_file_path = os.path.join(label_folder_path, audio_file)
        waveform, orig_sample_rate = librosa.load(audio_file_path)
        waveform = torch.tensor(waveform)
        if SR != orig_sample_rate:
            waveform = torchaudio.transforms.Resample(orig_sample_rate, SR)(waveform)
        segment_length = 10 * SR
        batches = waveform.split(segment_length)

        for i,batch_seg in enumerate(batches):
            if len(batch_seg) < segment_length:
                continue
            segment_name = os.path.join(output_label_folder,f"{audio_file.split('.')[0]}_{i}.wav")
            os.makedirs(output_label_folder, exist_ok=True)
            sf.write(segment_name, batch_seg.numpy().astype(np.float32), SR)
        num_segments+=i+1
    print(f"Created {num_segments} segments")
    print("#" * len(title)  + "\n\n")


#######################
Processing label: Angry
Created 1131 segments
#######################


#####################
Processing label: Joy
Created 1662 segments
#####################


######################
Processing label: Love
Created 1939 segments
######################


#####################
Processing label: Sad
Created 1233 segments
#####################


#######################
Processing label: Scary
Created 1231 segments
#######################


##########################
Processing label: Surprise
Created 360 segments
##########################




In [16]:
import pandas as pd
import os
import random


In [23]:
data_tsv = list()
for root, dirs, files in os.walk(r"C:\Users\amitmils\Documents\Repo\Text2BGAudio\_Data\Music\train_segments"):
    for file in files:
        data_tsv.append(
            {
                "name": file.split(".")[0],
                "dataset": "MoodAudio",
                "caption": os.path.basename(root).lower(),
                "audio_path": os.path.join("/content/drive/MyDrive/Colab Notebooks/AmitM/Make-An-Audio/data",root.split('Music\\')[-1], file).replace('\\','/'),
            }
        )
random.shuffle(data_tsv)
df = pd.DataFrame(data_tsv)
df.to_csv(r"C:\Users\amitmils\Documents\Repo\Text2BGAudio\_Data\Music\train_segments.tsv", sep="\t", index=False)