In [19]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from pydub import AudioSegment, effects  
import librosa
import librosa.display
%matplotlib inline

Die CSV der GTZAN Collection wird über pandas geladen.

In [20]:
df30sec = pd.read_csv('../../gtzan/features_30_sec.csv')
df30sec.head(5)

Unnamed: 0,filename,length,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,blues.00000.wav,661794,0.350088,0.088757,0.130228,0.002827,1784.16585,129774.064525,2002.44906,85882.761315,...,52.42091,-1.690215,36.524071,-0.408979,41.597103,-2.303523,55.062923,1.221291,46.936035,blues
1,blues.00001.wav,661794,0.340914,0.09498,0.095948,0.002373,1530.176679,375850.073649,2039.036516,213843.755497,...,55.356403,-0.731125,60.314529,0.295073,48.120598,-0.283518,51.10619,0.531217,45.786282,blues
2,blues.00002.wav,661794,0.363637,0.085275,0.17557,0.002746,1552.811865,156467.643368,1747.702312,76254.192257,...,40.598766,-7.729093,47.639427,-1.816407,52.382141,-3.43972,46.63966,-2.231258,30.573025,blues
3,blues.00003.wav,661794,0.404785,0.093999,0.141093,0.006346,1070.106615,184355.942417,1596.412872,166441.494769,...,44.427753,-3.319597,50.206673,0.636965,37.31913,-0.619121,37.259739,-3.407448,31.949339,blues
4,blues.00004.wav,661794,0.308526,0.087841,0.091529,0.002303,1835.004266,343399.939274,1748.172116,88445.209036,...,86.099236,-5.454034,75.269707,-0.916874,53.613918,-4.404827,62.910812,-11.703234,55.19516,blues


Die relevanten Informationen werden in einen anderen Dataframe überführt.

In [21]:
df30sec_cut = pd.concat([df30sec['filename'], df30sec['label']], axis=1, keys=['filename','label'])
df30sec_cut.head(5)

Unnamed: 0,filename,label
0,blues.00000.wav,blues
1,blues.00001.wav,blues
2,blues.00002.wav,blues
3,blues.00003.wav,blues
4,blues.00004.wav,blues


Für Tests wird vorrübergehend eine Song ID hinzugefügt.

In [22]:
df30sec_cut['songid'] = np.arange(df30sec_cut.shape[0])
df30sec_cut = df30sec_cut[['songid', 'filename', 'label']]
df30sec_cut.to_csv('../../gtzan/labels.csv', index = False)
df30sec_cut.head(5)

Unnamed: 0,songid,filename,label
0,0,blues.00000.wav,blues
1,1,blues.00001.wav,blues
2,2,blues.00002.wav,blues
3,3,blues.00003.wav,blues
4,4,blues.00004.wav,blues


Ersellung der One-Hot-Kodierung für jedes Excerpt.

In [23]:
genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']
df_Y = df30sec_cut
for genre in genres:
    df_Y[genre] = 0
    
for index, row in df_Y.iterrows():
    df_Y.iloc[index, df_Y.columns.get_loc(row['label'])] = 1
    
df_Y.head(5)

Unnamed: 0,songid,filename,label,blues,classical,country,disco,hiphop,jazz,metal,pop,reggae,rock
0,0,blues.00000.wav,blues,1,0,0,0,0,0,0,0,0,0
1,1,blues.00001.wav,blues,1,0,0,0,0,0,0,0,0,0
2,2,blues.00002.wav,blues,1,0,0,0,0,0,0,0,0,0
3,3,blues.00003.wav,blues,1,0,0,0,0,0,0,0,0,0
4,4,blues.00004.wav,blues,1,0,0,0,0,0,0,0,0,0


In [24]:
# Remove label column 
df_Y = df_Y.drop(columns=['label']) 
df_Y.head(5)

y = df_Y.values[:,2:]
print(y,y.shape)
np.save('one_hot_gtzan.npy', y)

[[1 0 0 ... 0 0 0]
 [1 0 0 ... 0 0 0]
 [1 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 0 1]] (1000, 10)


In [13]:
#filenames
filenames = df_Y.values[:,1:-10]
print(filenames,filenames.shape)

[['blues.00000.wav' 'blues']
 ['blues.00001.wav' 'blues']
 ['blues.00002.wav' 'blues']
 ...
 ['rock.00097.wav' 'rock']
 ['rock.00098.wav' 'rock']
 ['rock.00099.wav' 'rock']] (1000, 2)


In [17]:
# mel-spectrogram settings
sr_value = 12000
hop_length = 256
n_mels = 96
n_fft = 512

# returns the filepaths of the excerpts
def get_filespaths(filenames_df, path):
    filepath_list = []
    for file in filenames_df:
        filepath = path + file[1] + '/' + file[0]
        filepath_list.append(filepath)
    return filepath_list


# calculates and returns the log-mel-spectrogram
def extract_mel_spec(filepath_list, save_filename):
    log_mel_specs = []
    for filepath in filepath_list:
        y, sr = librosa.load(filepath, sr=sr_value, mono=True)
        #y = librosa.resample(y, sr, sr_value)
        print(filepath) 
        '''norm='slaney' is already default-value in librosa.filters.mel.
        ref=1.0 is already default-value in librosa power_to_db.'''
        mel_spec = librosa.feature.melspectrogram(y=y, sr=sr_value, n_fft=n_fft, n_mels=n_mels, hop_length=hop_length) #hop_length=hop_length n_fft=n_fft
        log_mel_spec = librosa.power_to_db(mel_spec, ref=1.0) # ref=1.0 is default
        log_mel_specs.append(log_mel_spec)
    print('all extracted')
    np.save(save_filename, log_mel_specs)
    print('specs saved')
    return log_mel_specs
 

# checks if all excerpts have the given length 
def list_durations(filepath_list, print_else_than=0): 
    index = 0
    for filepath in filepath_list:
        #load audio file
        f, sr = librosa.load(filepath, sr=None, mono=True)
        duration = f.shape[-1] / sr
        all_wanted_length = bool(True)
        if print_else_than == 0:
            print(index)
            print(duration)
        else:
            if duration < print_else_than or duration > print_else_than:
                all_wanted_length = bool(False)
                print(index)
                print(duration)
        index = index + 1
    if all_wanted_length:
        print("All songs have the wanted length.")
        

# crops the excerpts
def cut_and_export_wav(filenames_df, source_path, destination_path, startSec, endSec):
    '''
    Cuts the audio linked to given start and end in seconds.
    filenames_df is a dataframe with the audio-filenames and genre-directory. example: ['blues.00000.wav' 'blues']
    distination_path is the savingfolder.
    Cutted audio will be saved as >>filename<< + '.cut' wav.
    '''
    startMin = 0
    startSec = startSec
    endMin = 0
    endSec = endSec
    # Time to miliseconds
    startTime = startMin*60*1000+startSec*1000
    endTime = endMin*60*1000+endSec*1000    
    for file in filenames_df:
        filename = str(file[0])
        folder = file[1]
        # Opening file and extracting segment
        song = AudioSegment.from_wav(source_path + folder + '/' + filename)
        extract = song[startTime:endTime]
        # Saving
        extract.export(destination_path + folder + '/' + filename, format="wav") # gtzan/audio_cut/
        print('exported:')
        print(destination_path + folder + '/' + filename)

The Excerpts are croped to 29 seconds with cut_and_export_wav().

In [14]:
source = 'gtzan/audio/'
destination = 'gtzan/audio-cut/'
cut_and_export_wav(filenames, source, destination, 0, 29)

exported:
gtzan/audio-cut/blues/blues.00000.wav
exported:
gtzan/audio-cut/blues/blues.00001.wav
exported:
gtzan/audio-cut/blues/blues.00002.wav
exported:
gtzan/audio-cut/blues/blues.00003.wav
exported:
gtzan/audio-cut/blues/blues.00004.wav
exported:
gtzan/audio-cut/blues/blues.00005.wav
exported:
gtzan/audio-cut/blues/blues.00006.wav
exported:
gtzan/audio-cut/blues/blues.00007.wav
exported:
gtzan/audio-cut/blues/blues.00008.wav
exported:
gtzan/audio-cut/blues/blues.00009.wav
exported:
gtzan/audio-cut/blues/blues.00010.wav
exported:
gtzan/audio-cut/blues/blues.00011.wav
exported:
gtzan/audio-cut/blues/blues.00012.wav
exported:
gtzan/audio-cut/blues/blues.00013.wav
exported:
gtzan/audio-cut/blues/blues.00014.wav
exported:
gtzan/audio-cut/blues/blues.00015.wav
exported:
gtzan/audio-cut/blues/blues.00016.wav
exported:
gtzan/audio-cut/blues/blues.00017.wav
exported:
gtzan/audio-cut/blues/blues.00018.wav
exported:
gtzan/audio-cut/blues/blues.00019.wav
exported:
gtzan/audio-cut/blues/blues.00

exported:
gtzan/audio-cut/classical/classical.00076.wav
exported:
gtzan/audio-cut/classical/classical.00077.wav
exported:
gtzan/audio-cut/classical/classical.00078.wav
exported:
gtzan/audio-cut/classical/classical.00079.wav
exported:
gtzan/audio-cut/classical/classical.00080.wav
exported:
gtzan/audio-cut/classical/classical.00081.wav
exported:
gtzan/audio-cut/classical/classical.00082.wav
exported:
gtzan/audio-cut/classical/classical.00083.wav
exported:
gtzan/audio-cut/classical/classical.00084.wav
exported:
gtzan/audio-cut/classical/classical.00085.wav
exported:
gtzan/audio-cut/classical/classical.00086.wav
exported:
gtzan/audio-cut/classical/classical.00087.wav
exported:
gtzan/audio-cut/classical/classical.00088.wav
exported:
gtzan/audio-cut/classical/classical.00089.wav
exported:
gtzan/audio-cut/classical/classical.00090.wav
exported:
gtzan/audio-cut/classical/classical.00091.wav
exported:
gtzan/audio-cut/classical/classical.00092.wav
exported:
gtzan/audio-cut/classical/classical.00

exported:
gtzan/audio-cut/disco/disco.00046.wav
exported:
gtzan/audio-cut/disco/disco.00047.wav
exported:
gtzan/audio-cut/disco/disco.00048.wav
exported:
gtzan/audio-cut/disco/disco.00049.wav
exported:
gtzan/audio-cut/disco/disco.00050.wav
exported:
gtzan/audio-cut/disco/disco.00051.wav
exported:
gtzan/audio-cut/disco/disco.00052.wav
exported:
gtzan/audio-cut/disco/disco.00053.wav
exported:
gtzan/audio-cut/disco/disco.00054.wav
exported:
gtzan/audio-cut/disco/disco.00055.wav
exported:
gtzan/audio-cut/disco/disco.00056.wav
exported:
gtzan/audio-cut/disco/disco.00057.wav
exported:
gtzan/audio-cut/disco/disco.00058.wav
exported:
gtzan/audio-cut/disco/disco.00059.wav
exported:
gtzan/audio-cut/disco/disco.00060.wav
exported:
gtzan/audio-cut/disco/disco.00061.wav
exported:
gtzan/audio-cut/disco/disco.00062.wav
exported:
gtzan/audio-cut/disco/disco.00063.wav
exported:
gtzan/audio-cut/disco/disco.00064.wav
exported:
gtzan/audio-cut/disco/disco.00065.wav
exported:
gtzan/audio-cut/disco/disco.00

exported:
gtzan/audio-cut/jazz/jazz.00015.wav
exported:
gtzan/audio-cut/jazz/jazz.00016.wav
exported:
gtzan/audio-cut/jazz/jazz.00017.wav
exported:
gtzan/audio-cut/jazz/jazz.00018.wav
exported:
gtzan/audio-cut/jazz/jazz.00019.wav
exported:
gtzan/audio-cut/jazz/jazz.00020.wav
exported:
gtzan/audio-cut/jazz/jazz.00021.wav
exported:
gtzan/audio-cut/jazz/jazz.00022.wav
exported:
gtzan/audio-cut/jazz/jazz.00023.wav
exported:
gtzan/audio-cut/jazz/jazz.00024.wav
exported:
gtzan/audio-cut/jazz/jazz.00025.wav
exported:
gtzan/audio-cut/jazz/jazz.00026.wav
exported:
gtzan/audio-cut/jazz/jazz.00027.wav
exported:
gtzan/audio-cut/jazz/jazz.00028.wav
exported:
gtzan/audio-cut/jazz/jazz.00029.wav
exported:
gtzan/audio-cut/jazz/jazz.00030.wav
exported:
gtzan/audio-cut/jazz/jazz.00031.wav
exported:
gtzan/audio-cut/jazz/jazz.00032.wav
exported:
gtzan/audio-cut/jazz/jazz.00033.wav
exported:
gtzan/audio-cut/jazz/jazz.00034.wav
exported:
gtzan/audio-cut/jazz/jazz.00035.wav
exported:
gtzan/audio-cut/jazz/jaz

exported:
gtzan/audio-cut/metal/metal.00092.wav
exported:
gtzan/audio-cut/metal/metal.00093.wav
exported:
gtzan/audio-cut/metal/metal.00094.wav
exported:
gtzan/audio-cut/metal/metal.00095.wav
exported:
gtzan/audio-cut/metal/metal.00096.wav
exported:
gtzan/audio-cut/metal/metal.00097.wav
exported:
gtzan/audio-cut/metal/metal.00098.wav
exported:
gtzan/audio-cut/metal/metal.00099.wav
exported:
gtzan/audio-cut/pop/pop.00000.wav
exported:
gtzan/audio-cut/pop/pop.00001.wav
exported:
gtzan/audio-cut/pop/pop.00002.wav
exported:
gtzan/audio-cut/pop/pop.00003.wav
exported:
gtzan/audio-cut/pop/pop.00004.wav
exported:
gtzan/audio-cut/pop/pop.00005.wav
exported:
gtzan/audio-cut/pop/pop.00006.wav
exported:
gtzan/audio-cut/pop/pop.00007.wav
exported:
gtzan/audio-cut/pop/pop.00008.wav
exported:
gtzan/audio-cut/pop/pop.00009.wav
exported:
gtzan/audio-cut/pop/pop.00010.wav
exported:
gtzan/audio-cut/pop/pop.00011.wav
exported:
gtzan/audio-cut/pop/pop.00012.wav
exported:
gtzan/audio-cut/pop/pop.00013.wav


exported:
gtzan/audio-cut/reggae/reggae.00087.wav
exported:
gtzan/audio-cut/reggae/reggae.00088.wav
exported:
gtzan/audio-cut/reggae/reggae.00089.wav
exported:
gtzan/audio-cut/reggae/reggae.00090.wav
exported:
gtzan/audio-cut/reggae/reggae.00091.wav
exported:
gtzan/audio-cut/reggae/reggae.00092.wav
exported:
gtzan/audio-cut/reggae/reggae.00093.wav
exported:
gtzan/audio-cut/reggae/reggae.00094.wav
exported:
gtzan/audio-cut/reggae/reggae.00095.wav
exported:
gtzan/audio-cut/reggae/reggae.00096.wav
exported:
gtzan/audio-cut/reggae/reggae.00097.wav
exported:
gtzan/audio-cut/reggae/reggae.00098.wav
exported:
gtzan/audio-cut/reggae/reggae.00099.wav
exported:
gtzan/audio-cut/rock/rock.00000.wav
exported:
gtzan/audio-cut/rock/rock.00001.wav
exported:
gtzan/audio-cut/rock/rock.00002.wav
exported:
gtzan/audio-cut/rock/rock.00003.wav
exported:
gtzan/audio-cut/rock/rock.00004.wav
exported:
gtzan/audio-cut/rock/rock.00005.wav
exported:
gtzan/audio-cut/rock/rock.00006.wav
exported:
gtzan/audio-cut/ro

In [15]:
audiopath_cut = '../../gtzan/audio-cut/'
filepaths_cut = []
filepaths_cut = get_filespaths(filenames, audiopath_cut)
np.save('../filepaths/filepaths_audio-cut.npy', filepaths_cut)

Calculate all log-mel-spectrograms with extract_mel_spec(). The filefaphts_cut is a list and contains the path to all (cutted) excerpts. Spectrograms are saved unter the given filename (numpy object).

In [18]:
mels = extract_mel_spec(filepaths_cut, '../inputs/gtzan_29s_12khz_ref1_512_256_96.npy')

../../gtzan/audio-cut/blues/blues.00000.wav
../../gtzan/audio-cut/blues/blues.00001.wav
../../gtzan/audio-cut/blues/blues.00002.wav
../../gtzan/audio-cut/blues/blues.00003.wav
../../gtzan/audio-cut/blues/blues.00004.wav
../../gtzan/audio-cut/blues/blues.00005.wav
../../gtzan/audio-cut/blues/blues.00006.wav
../../gtzan/audio-cut/blues/blues.00007.wav
../../gtzan/audio-cut/blues/blues.00008.wav
../../gtzan/audio-cut/blues/blues.00009.wav
../../gtzan/audio-cut/blues/blues.00010.wav
../../gtzan/audio-cut/blues/blues.00011.wav
../../gtzan/audio-cut/blues/blues.00012.wav
../../gtzan/audio-cut/blues/blues.00013.wav
../../gtzan/audio-cut/blues/blues.00014.wav
../../gtzan/audio-cut/blues/blues.00015.wav
../../gtzan/audio-cut/blues/blues.00016.wav
../../gtzan/audio-cut/blues/blues.00017.wav
../../gtzan/audio-cut/blues/blues.00018.wav
../../gtzan/audio-cut/blues/blues.00019.wav
../../gtzan/audio-cut/blues/blues.00020.wav
../../gtzan/audio-cut/blues/blues.00021.wav
../../gtzan/audio-cut/blues/blue

../../gtzan/audio-cut/classical/classical.00073.wav
../../gtzan/audio-cut/classical/classical.00074.wav
../../gtzan/audio-cut/classical/classical.00075.wav
../../gtzan/audio-cut/classical/classical.00076.wav
../../gtzan/audio-cut/classical/classical.00077.wav
../../gtzan/audio-cut/classical/classical.00078.wav
../../gtzan/audio-cut/classical/classical.00079.wav
../../gtzan/audio-cut/classical/classical.00080.wav
../../gtzan/audio-cut/classical/classical.00081.wav
../../gtzan/audio-cut/classical/classical.00082.wav
../../gtzan/audio-cut/classical/classical.00083.wav
../../gtzan/audio-cut/classical/classical.00084.wav
../../gtzan/audio-cut/classical/classical.00085.wav
../../gtzan/audio-cut/classical/classical.00086.wav
../../gtzan/audio-cut/classical/classical.00087.wav
../../gtzan/audio-cut/classical/classical.00088.wav
../../gtzan/audio-cut/classical/classical.00089.wav
../../gtzan/audio-cut/classical/classical.00090.wav
../../gtzan/audio-cut/classical/classical.00091.wav
../../gtzan/

../../gtzan/audio-cut/disco/disco.00046.wav
../../gtzan/audio-cut/disco/disco.00047.wav
../../gtzan/audio-cut/disco/disco.00048.wav
../../gtzan/audio-cut/disco/disco.00049.wav
../../gtzan/audio-cut/disco/disco.00050.wav
../../gtzan/audio-cut/disco/disco.00051.wav
../../gtzan/audio-cut/disco/disco.00052.wav
../../gtzan/audio-cut/disco/disco.00053.wav
../../gtzan/audio-cut/disco/disco.00054.wav
../../gtzan/audio-cut/disco/disco.00055.wav
../../gtzan/audio-cut/disco/disco.00056.wav
../../gtzan/audio-cut/disco/disco.00057.wav
../../gtzan/audio-cut/disco/disco.00058.wav
../../gtzan/audio-cut/disco/disco.00059.wav
../../gtzan/audio-cut/disco/disco.00060.wav
../../gtzan/audio-cut/disco/disco.00061.wav
../../gtzan/audio-cut/disco/disco.00062.wav
../../gtzan/audio-cut/disco/disco.00063.wav
../../gtzan/audio-cut/disco/disco.00064.wav
../../gtzan/audio-cut/disco/disco.00065.wav
../../gtzan/audio-cut/disco/disco.00066.wav
../../gtzan/audio-cut/disco/disco.00067.wav
../../gtzan/audio-cut/disco/disc

../../gtzan/audio-cut/jazz/jazz.00029.wav
../../gtzan/audio-cut/jazz/jazz.00030.wav
../../gtzan/audio-cut/jazz/jazz.00031.wav
../../gtzan/audio-cut/jazz/jazz.00032.wav
../../gtzan/audio-cut/jazz/jazz.00033.wav
../../gtzan/audio-cut/jazz/jazz.00034.wav
../../gtzan/audio-cut/jazz/jazz.00035.wav
../../gtzan/audio-cut/jazz/jazz.00036.wav
../../gtzan/audio-cut/jazz/jazz.00037.wav
../../gtzan/audio-cut/jazz/jazz.00038.wav
../../gtzan/audio-cut/jazz/jazz.00039.wav
../../gtzan/audio-cut/jazz/jazz.00040.wav
../../gtzan/audio-cut/jazz/jazz.00041.wav
../../gtzan/audio-cut/jazz/jazz.00042.wav
../../gtzan/audio-cut/jazz/jazz.00043.wav
../../gtzan/audio-cut/jazz/jazz.00044.wav
../../gtzan/audio-cut/jazz/jazz.00045.wav
../../gtzan/audio-cut/jazz/jazz.00046.wav
../../gtzan/audio-cut/jazz/jazz.00047.wav
../../gtzan/audio-cut/jazz/jazz.00048.wav
../../gtzan/audio-cut/jazz/jazz.00049.wav
../../gtzan/audio-cut/jazz/jazz.00050.wav
../../gtzan/audio-cut/jazz/jazz.00051.wav
../../gtzan/audio-cut/jazz/jazz.00

../../gtzan/audio-cut/pop/pop.00021.wav
../../gtzan/audio-cut/pop/pop.00022.wav
../../gtzan/audio-cut/pop/pop.00023.wav
../../gtzan/audio-cut/pop/pop.00024.wav
../../gtzan/audio-cut/pop/pop.00025.wav
../../gtzan/audio-cut/pop/pop.00026.wav
../../gtzan/audio-cut/pop/pop.00027.wav
../../gtzan/audio-cut/pop/pop.00028.wav
../../gtzan/audio-cut/pop/pop.00029.wav
../../gtzan/audio-cut/pop/pop.00030.wav
../../gtzan/audio-cut/pop/pop.00031.wav
../../gtzan/audio-cut/pop/pop.00032.wav
../../gtzan/audio-cut/pop/pop.00033.wav
../../gtzan/audio-cut/pop/pop.00034.wav
../../gtzan/audio-cut/pop/pop.00035.wav
../../gtzan/audio-cut/pop/pop.00036.wav
../../gtzan/audio-cut/pop/pop.00037.wav
../../gtzan/audio-cut/pop/pop.00038.wav
../../gtzan/audio-cut/pop/pop.00039.wav
../../gtzan/audio-cut/pop/pop.00040.wav
../../gtzan/audio-cut/pop/pop.00041.wav
../../gtzan/audio-cut/pop/pop.00042.wav
../../gtzan/audio-cut/pop/pop.00043.wav
../../gtzan/audio-cut/pop/pop.00044.wav
../../gtzan/audio-cut/pop/pop.00045.wav


../../gtzan/audio-cut/rock/rock.00011.wav
../../gtzan/audio-cut/rock/rock.00012.wav
../../gtzan/audio-cut/rock/rock.00013.wav
../../gtzan/audio-cut/rock/rock.00014.wav
../../gtzan/audio-cut/rock/rock.00015.wav
../../gtzan/audio-cut/rock/rock.00016.wav
../../gtzan/audio-cut/rock/rock.00017.wav
../../gtzan/audio-cut/rock/rock.00018.wav
../../gtzan/audio-cut/rock/rock.00019.wav
../../gtzan/audio-cut/rock/rock.00020.wav
../../gtzan/audio-cut/rock/rock.00021.wav
../../gtzan/audio-cut/rock/rock.00022.wav
../../gtzan/audio-cut/rock/rock.00023.wav
../../gtzan/audio-cut/rock/rock.00024.wav
../../gtzan/audio-cut/rock/rock.00025.wav
../../gtzan/audio-cut/rock/rock.00026.wav
../../gtzan/audio-cut/rock/rock.00027.wav
../../gtzan/audio-cut/rock/rock.00028.wav
../../gtzan/audio-cut/rock/rock.00029.wav
../../gtzan/audio-cut/rock/rock.00030.wav
../../gtzan/audio-cut/rock/rock.00031.wav
../../gtzan/audio-cut/rock/rock.00032.wav
../../gtzan/audio-cut/rock/rock.00033.wav
../../gtzan/audio-cut/rock/rock.00

[array([[ -5.211255 ,  -8.881482 ,  -9.301212 , ...,  -8.59846  ,
         -15.800537 ,  -4.396285 ],
        [-11.120578 ,  -7.829511 ,  -8.52235  , ...,  -3.95694  ,
          -6.8388844,  -7.3025284],
        [-17.946602 , -13.247652 , -11.529088 , ...,  -3.747635 ,
          -7.9725847,  -5.5841312],
        ...,
        [-39.049934 , -40.370518 , -41.062843 , ..., -39.678577 ,
         -42.006706 , -32.226395 ],
        [-43.786682 , -44.311146 , -43.24292  , ..., -40.875854 ,
         -47.40652  , -32.492374 ],
        [-46.666225 , -56.663822 , -49.21058  , ..., -49.396587 ,
         -55.281647 , -33.92875  ]], dtype=float32),
 array([[-13.002632 , -11.886089 ,   0.3601417, ...,  -8.437267 ,
           5.6588097,  10.641075 ],
        [-16.74211  , -12.815641 ,   7.335582 , ...,  -1.952644 ,
           8.8895855,   6.675249 ],
        [-15.417154 , -16.954319 ,   9.287715 , ...,  -1.9567829,
           7.1324515,   0.8658931],
        ...,
        [-54.47494  , -60.832207 , -39.

In [16]:
# check if all excerpts have a length of 29 sekonds
list_durations(filepaths_cut, 29)

All songs have the wanted length.
