In [None]:
# ----
# Prepare training data from Metadata file
#----

import pandas as pd

# read metadata file
metadata_file_path = 'UrbanSound8K/metadata/UrbanSound8K.csv'
df = pd.read_csv(metadata_file_path)
df.head()


# Construct file path by concatinating 'fold' and 'slice_file_name'
df['relative_file_path'] = '/fold' + df['fold'].astype(str) + '/' + df['slice_file_name'].astype(str)

# Take relevant columns
df = df[['relative_file_path', 'classID']]
df.head()


#### Read audio from a file
#### Convert to two channels
#### Standardize sampling rate
#### Resize to the same length
#### Data Augmentation : time shift
#### Mel Spectrogram
#### Data Augmentation : time and frequency masking

import math, random
import torch
import torchaudio
from torchaudio import transforms
from IPython.display import Audio

class AudioUtil():
    # ----
    # Load an audio file. Return the signal as a tensor and the sample rate.
    # ----
    @staticmethod
    def open(audio_file):
        sig, sr = torchaudio.load(audio_file)
        return (sig, sr)
    
    # ----
    # Convert the given audio to the desired number of channels.
    # ----
    @staticmethod
    def rechannel(aud, new_channel):
        sig, sr = aud

        if (sig.shape[0] == new_channel):
            # Nothing to do
            return aud
        
        if (new_channel == 1):
            # Convert from stereo to mono by selecting only the first channel
            resig = sig[:1, :]
        else:
            # Convert from mono to stereo by duplicating the first channel
            resig = torch.cat([sig,sig])
        
        return ((resig, sr))
    
    # ----
    # Since resammple applies to a single channel, we resammple one channel at a time
    # ----
    @staticmethod
    def resample(aud, newsr):
        sig, sr = aud

        if (sr == newsr):
            # Nothing to do
            return aud
        
        num_channels = sig.shape[0]
        # Resample first channel
        resig = torchaudio.transforms.Resample(sr, newsr)(sig[:1,:])
        if (num_channels > 1):
            # Resample the second channel and merge both channels
            retwo = torchaudio.transforms.Resample(sr, newsr)(sig[1:,:])
            resig = torch.cat([resig, retwo])

        return ((resig, newsr))