# classifying music styles

In [68]:
import os
import pandas as pd
import torch, torchaudio
from pydub import AudioSegment

## preparations

download GTZAN dataset before: http://marsyas.info/downloads/datasets.html#

save dataset in project directory (<code>path_root</code>)

In [69]:
# paths 
path_root = '/home/christian/Documents/sound_classifier' #change root directory here
path_orgw = os.path.join(path_root, 'genres_original')
path_chnk = os.path.join(path_root, 'audio_chunks')
path_spec = os.path.join(path_root, 'spectrograms')

# dictionary of all genres with tracks
genres = os.listdir(path_orgw)
tracks = {x:os.listdir(os.path.join(path_orgw, x)) for x in genres}

# make folder structure
os.mkdir('audio_chunks')
os.mkdir('spectrograms')

for g in genres:
    os.mkdir(os.path.join(path_root, 'audio_chunks', g))
    os.mkdir(os.path.join(path_root, 'spectrograms', g))

## split into chunks of 3 seconds

In [70]:
# setting size of audio snipplets in ms
chunk_size = 3000

for genre in genres:
    for counttrack, file in enumerate(tracks[genre]):
        try: 
            infile = os.path.join(path_orgw, genre, file)
            audio = AudioSegment.from_wav(infile)
            audio_length = len(audio)

            chunk_nmbr = audio_length//chunk_size
            chunk_pcs  = np.arange(0, chunk_nmbr+1)*chunk_size

            for countfile, i in enumerate(range(len(chunk_pcs)-1)):
                outfile = str(genre+'_'+str(counttrack)+'_'+str(countfile)+'.wav')
                outfile = os.path.join(path_chnk, genre, outfile)
                audio[chunk_pcs[i]:chunk_pcs[i+1]].export(outfile, format='wav')
        except:
            print('error: ' + str(file))

## create spectrograms

In [71]:
trans_spec = torchaudio.transforms.Spectrogram(n_fft=515, power=None, return_complex=True)

for genre in genres:
    chunks = os.listdir(os.path.join(path_chnk, genre))
    
    for chunk in chunks:
        infile = os.path.join(path_chnk, genre, chunk)
        waveform, sr_input = torchaudio.load(infile)
        spec = trans_spec(waveform)
        outfile = chunk[:-4] + '.pt'
        outfile = os.path.join(path_spec, genre, outfile) 
        torch.save(spec, outfile)

## create dataset .csv file for dataloader

In [79]:
from sklearn.model_selection import train_test_split

dx = []

for genre_no, genre in enumerate(genres):
    filenames = os.listdir(os.path.join(path_spec, genre))
    for file in filenames:
        dx.append([str('spectrograms/'+genre+'/'+file), genre_no])

dx = pd.DataFrame(dx, columns=['file', 'category'])

dx_train, dx_test = train_test_split(dx, test_size=.2)

dx_train.to_csv(os.path.join(path_root, 'dataset_files_train.csv'), index=None)
dx_test.to_csv(os.path.join(path_root, 'dataset_files_test.csv'), index=None)