# This is a file where we show how we setup the dataset
This process has been done on local, since loading thousands of audio tracks on drive wasn't feasible. We setup the dataset and directly loaded the images to use on colab.

In this file the data is loaded on an initial sample of 1000 tracks for both classes. \\
The final project uses 3000 tracks for each class.


The result of this process is the images dataset. \\
This file saves the data inside "./dataset". \\
The final dataset is inside "./6k_samples_dataset"

In [None]:
from google.colab import drive

In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd /content/drive/MyDrive/Homeworks FDS/Progetto

/content/drive/.shortcut-targets-by-id/1Vzw8m8Ha_VscaXjjIlDjZrQtp1m45M3N/Homeworks FDS/Progetto


In [None]:
%ls

[0m[01;34maam-cropped[0m/  [01;34mdataset[0m/  dataset_setup.ipynb  [01;34mfma_small_cropped[0m/


In [None]:
%matplotlib inline

import os
import random
import shutil
import IPython.display as ipd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as skl
import sklearn.utils, sklearn.preprocessing, sklearn.decomposition, sklearn.svm
import librosa
import librosa.display

plt.rcParams['figure.figsize'] = (17, 5)

## Save 1000 natural tracks

From the total dataset inside "./fma_small" folder, containing 8000 tracks, we randomly selected 1000 on them (3000 for the final dataset), and put them inside "./fma_small_cropped".

In [None]:
source_dir = './fma_small'
target_dir = './fma_small_cropped'
num_files = 1000

if not os.path.exists(target_dir):
    os.makedirs(target_dir)
else:
    shutil.rmtree(target_dir)
    os.makedirs(target_dir)

selected_tracks = []
for subfolder in os.listdir(source_dir):
    sub_path = os.path.join(source_dir, subfolder)
    if os.path.isdir(sub_path):
        for track in os.listdir(sub_path):
            track_path = os.path.join(sub_path, track)
            selected_tracks.append(track_path)

selected_tracks = random.sample(selected_tracks, num_files)

for track in selected_tracks:
    shutil.copy(track, target_dir)

print(f"selected {len(selected_tracks)} files and copied them to {target_dir}")

## Crop the artificial tracks at random 30s segments

From the total artificial dataset inside "./0001-1000-audio-mixes" folder, containing 1000 tracks (3000 for the final datset), we selected a random 30 seconds segmens from each of them and saved into "./aam-cropped".

In [None]:
!pip install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [None]:
from pydub import AudioSegment

source_dir = './0001-1000-audio-mixes'
target_dir = './aam-cropped'

if not os.path.exists(target_dir):
    os.makedirs(target_dir)
else:
    shutil.rmtree(target_dir)
    os.makedirs(target_dir)

segment_duration = 30 * 1000

for track in os.listdir(source_dir):
    track_path = source_dir + '/' + track
    try:
        audio = AudioSegment.from_file(track_path, format='flac')

        if len(audio) > segment_duration:
            start = random.randint(0, len(audio) - segment_duration)
            end = start + segment_duration

            segment = audio[start:end]
            segment.export(os.path.join(target_dir, track), format='wav')
        else:
            audio.export(os.path.join(target_dir, track), format='wav')
    except Exception as e:
        print(f"error processing {track_path}: {e}")

print(f"processed {len(os.listdir(target_dir))} files and copied them to {target_dir}")

## Dataset creation

### Process each track, generate the spectrogram and save the images into "./dataset" (6k_samples_dataset for the final dataset).

In [None]:
def save_spectrogram_image(filename, save_dir, label):
    x, sr = librosa.load(filename, sr=None, mono=True)

    stft = np.abs(librosa.stft(x, n_fft=2048, hop_length=512))

    mel = librosa.feature.melspectrogram(sr=sr, S=stft**2)
    log_mel = librosa.amplitude_to_db(mel)

    plt.figure(figsize=(10, 4))
    librosa.display.specshow(log_mel, sr=sr, x_axis='time', y_axis='mel', hop_length=512)
    plt.axis('off')

    base_name = os.path.splitext(os.path.basename(filename))[0]
    save_path = os.path.join(save_dir, f"{label}_{base_name}.png")

    plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
    plt.close()

    return save_path

In [None]:
def process_dataset(audio_dir, save_dir, label):
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    for track in os.listdir(audio_dir):
        track_path = os.path.join(audio_dir, track)
        save_spectrogram_image(track_path, save_dir, label)

    print(f"processed {len(os.listdir(save_dir))} files and saved them to {save_dir}")

In [None]:
process_dataset('./fma_small_cropped', './dataset/natural', label=1)
process_dataset('./aam-cropped', './dataset/artificial', label=0)