In [1]:
import librosa
import numpy as np
import pandas as pd
import os
from scipy import signal

In [2]:
model_name = 'model'

In [3]:
test = pd.DataFrame({}, columns=['name', 'tempo', 'beats', 'chroma_stft', 'rmse',
       'spectral_centroid', 'spectral_bandwidth', 'rolloff',
       'zero_crossing_rate', 'mfcc1', 'mfcc2', 'mfcc3', 'mfcc4', 'mfcc5',
       'mfcc6', 'mfcc7', 'mfcc8', 'mfcc9', 'mfcc10', 'mfcc11', 'mfcc12',
       'mfcc13', 'mfcc14', 'mfcc15', 'mfcc16', 'mfcc17', 'mfcc18', 'mfcc19',
       'mfcc20'])
for filename in os.listdir(f'./music'):
    songname = f'./music/{filename}'
    y, sr = librosa.load(songname, mono=True, duration=30)
    onset_env = librosa.onset.onset_strength(y=y, sr=sr)
    tempo = librosa.feature.tempo(y=y, sr=sr)[0]
    tempo, beats = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
    rmse = librosa.feature.rms(y=y)
    spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
    spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y)
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    to_append = f'{filename} {tempo} {beats.shape[0]} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
    for e in mfcc:
        to_append += f' {np.mean(e)}'
    
    test.loc[len(test)] = to_append.split()  

test = test.astype({
    'tempo': 'float64',
    'beats': 'float64',
    'chroma_stft': 'float64',
    'rmse': 'float64',
    'spectral_centroid': 'float64',
    'spectral_bandwidth': 'float64',
    'rolloff': 'float64',
    'zero_crossing_rate': 'float64',
    'mfcc1': 'float64',
    'mfcc2': 'float64',
    'mfcc3': 'float64',
    'mfcc4': 'float64',
    'mfcc5': 'float64',
    'mfcc6': 'float64',
    'mfcc7': 'float64',
    'mfcc8': 'float64',
    'mfcc9': 'float64',
    'mfcc10': 'float64',
    'mfcc11': 'float64',
    'mfcc12': 'float64',
    'mfcc13': 'float64',
    'mfcc14': 'float64',
    'mfcc15': 'float64',
    'mfcc16': 'float64',
    'mfcc17': 'float64',
    'mfcc18': 'float64',
    'mfcc19': 'float64',
    'mfcc20': 'float64'
})

In [4]:
import tensorflow as tf

In [5]:
test

Unnamed: 0,name,tempo,beats,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,...,mfcc11,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20
0,berlin.wav,95.703125,47.0,0.388258,0.152671,2609.502486,2667.328136,5814.63984,0.106817,-63.743458,...,-5.728826,2.037902,-2.214674,0.676991,-7.188995,-1.026533,-2.011793,5.966482,1.23631,3.133298
1,debu.wav,117.453835,46.0,0.296668,0.014093,941.479497,1343.324192,1727.189556,0.044004,-527.645386,...,2.305381,5.097184,4.808884,4.342441,4.101294,5.725268,7.969269,7.520432,5.266461,4.808077
2,mosh.wav,129.199219,63.0,0.428104,0.090143,2070.221209,2066.403091,4029.283969,0.11829,-160.491364,...,-2.95556,2.238296,-3.043408,4.909586,0.559986,1.023464,-4.343736,3.109872,-3.141147,1.520382
3,test.wav,129.199219,64.0,0.324389,0.228825,1725.551816,2273.978975,3826.926859,0.05706,-86.372803,...,2.01215,3.300573,-0.988298,4.484867,0.792854,4.111753,0.510068,5.315296,0.034456,1.717046
4,test2.wav,129.199219,64.0,0.324389,0.228825,1725.551816,2273.978975,3826.926859,0.05706,-86.372803,...,2.01215,3.300573,-0.988298,4.484867,0.792854,4.111753,0.510068,5.315296,0.034456,1.717046
5,vival.wav,151.999081,78.0,0.248827,0.014843,2145.908366,2137.13888,4109.57515,0.111811,-357.361176,...,-1.30025,7.062743,4.911943,5.11607,0.805621,6.653475,1.500841,4.673848,1.591335,7.203427


In [6]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(0, 1))
scaled_df = pd.DataFrame(scaler.fit_transform(test.drop('name', axis=1)), columns=['tempo', 'beats', 'chroma_stft', 'rmse', 'spectral_centroid',
       'spectral_bandwidth', 'rolloff', 'zero_crossing_rate', 'mfcc1', 'mfcc2',
       'mfcc3', 'mfcc4', 'mfcc5', 'mfcc6', 'mfcc7', 'mfcc8', 'mfcc9', 'mfcc10',
       'mfcc11', 'mfcc12', 'mfcc13', 'mfcc14', 'mfcc15', 'mfcc16', 'mfcc17',
       'mfcc18', 'mfcc19', 'mfcc20'])

In [7]:
scaled_df

Unnamed: 0,tempo,beats,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,...,mfcc11,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20
0,0.0,0.03125,0.77774,0.645352,1.0,1.0,1.0,0.845551,1.0,0.0,...,0.0,0.0,0.104173,0.0,0.0,0.0,0.189389,0.647675,0.520654,0.283812
1,0.386364,0.0,0.266854,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,1.0,0.608832,0.987045,0.825723,1.0,0.87914,1.0,1.0,1.0,0.578509
2,0.595,0.53125,1.0,0.35416,0.676694,0.54613,0.56321,1.0,0.791448,0.333738,...,0.345182,0.039881,0.0,0.953485,0.68634,0.266926,0.0,0.0,0.0,0.0
3,0.595,0.5625,0.421484,1.0,0.470061,0.702909,0.513703,0.17575,0.95122,0.437865,...,0.963502,0.251286,0.25833,0.857808,0.706966,0.669047,0.394201,0.500033,0.377706,0.034605
4,0.595,0.5625,0.421484,1.0,0.470061,0.702909,0.513703,0.17575,0.95122,0.437865,...,0.963502,0.251286,0.25833,0.857808,0.706966,0.669047,0.394201,0.500033,0.377706,0.034605
5,1.0,1.0,0.0,0.003492,0.72207,0.599556,0.582854,0.912776,0.367069,0.073549,...,0.551215,1.0,1.0,1.0,0.708097,1.0,0.474667,0.354598,0.562881,1.0


In [10]:
model = tf.keras.models.load_model(model_name+'.h5')

predicted = model.predict(scaled_df)
np.argmax(predicted, axis=1)



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 330ms/step


array([0, 1, 0, 0, 0, 0], dtype=int64)

In [11]:
#mapper = {0:'blues', 1:'classical', 2:'country', 3:'disco', 4:'hiphop', 5:'jazz', 6:'metal', 7:'pop', 8:'reggae', 9:'rock'}
mapper = {0:'pop', 1:'classical'}

mapped = [mapper[i] for i in np.argmax(predicted, axis=1)]
print(mapped)

['pop', 'classical', 'pop', 'pop', 'pop', 'pop']
