# classifying music styles

In [31]:
import os
import numpy as np
import pandas as pd
import torch, torchaudio
from pydub import AudioSegment

## preparations

download GTZAN dataset before: http://marsyas.info/downloads/datasets.html#

In [32]:
# path to local project folder
root_path = '/home/christian/Documents/sound_classifier'

# path to root wav folder (that with the genres subfolders)
wavfoldrs = 'genres_original'

# dictionary of all genres with tracks
genres = os.listdir(os.path.join(root_path, wavfoldrs))
tracks = {x:os.listdir(os.path.join(root_path, wavfoldrs, x)) for x in genres}

# setting size of audio snipplets in ms
chunk_size = 3000

## split into chunks of 3 seconds

In [33]:
error = []

os.mkdir(os.path.join(root_path, 'audio_chunks'))

for genre in genres:
    os.mkdir(os.path.join(root_path, 'audio_chunks', genre))
    for counttrack, file in enumerate(tracks[genre]):
        try: 
            audio = AudioSegment.from_wav(os.path.join(root_path, wavfoldrs, genre, file))
            audio_length = round((audio.duration_seconds * 1000), 0)

            chunk_nmbr = audio_length//chunk_size
            chunk_pcs  = np.arange(0, chunk_nmbr+1)*chunk_size

            for countfile, i in enumerate(range(len(chunk_pcs)-1)):
                outfile = root_path + '/audio_chunks/' + genre + '/' + genre + '_' + str(counttrack) + '_' + str(countfile) + '.wav'
                audio[chunk_pcs[i]:chunk_pcs[i+1]].export(outfile, format='wav')
        except:
            error.append(file)

## create spectrograms

Those are simple spectrograms. we can use MEL spectrograms later by changing the torchaudio.transforms class

In [34]:
trans_spec = torchaudio.transforms.Spectrogram(n_fft=800, power=None, return_complex=True)

os.mkdir(os.path.join(root_path, 'spectrograms'))
for genre in genres:
    path = os.path.join(root_path, 'spectrograms', genre)
    os.mkdir(path)
    chunks = os.listdir(os.path.join(root_path, 'audio_chunks', genre))
    
    for chunk in chunks:
        path_to_file = os.path.join(root_path, 'audio_chunks', genre, chunk)
        waveform, sr_input = torchaudio.load(path_to_file)
        spec = trans_spec(waveform)
        outfile = str(os.path.join(root_path, 'spectrograms', genre)) + '/' + chunk[:-4] + '.pt'
        torch.save(spec, outfile)

## create dataset .csv file for dataloader

In [35]:
dx = []

for genre_no, genre in enumerate(genres):
    filenames = os.listdir(os.path.join(root_path, 'audio_chunks', genre))
    for file in filenames:
        dx.append([str('spectrograms/'+genre+'/'+file), genre_no])

pd.DataFrame(dx, columns=['file', 'category']).to_csv(os.path.join(root_path, 'dataset_files.csv'), index=None)