In [8]:
import librosa
import matplotlib.pyplot as plt
import librosa.display
import os
import pandas as pandas
from opt_einsum.backends import tensorflow
from tensorflow import keras
import tensorflow as tf

def load(file, emotion): 
    y , sr = librosa.load(file, mono=True, duration = 10)

    return extract(file, emotion, y, sr)

def extract(file, emotion, y, sr):    
    out = {
        'filename': file,
        'emotion': emotion,
        'spectral_centroid': numpy.mean(librosa.feature.spectral_centroid(y=y, sr=sr)),
        'spectral_bandwidth': numpy.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr)),
        'spectral_rolloff': numpy.mean(librosa.feature.spectral_rolloff(y=y, sr=sr)),
        'zero_crossing_rate': numpy.mean(librosa.feature.zero_crossing_rate(y=y)),
        'rms': numpy.mean(librosa.feature.rms(y=y)),
        'flux': numpy.mean(librosa.onset.onset_strength(y=y, sr=sr)),
        'tempo': numpy.mean(librosa.beat.tempo(onset_envelope=librosa.onset.onset_strength(y=y, sr=sr), sr=sr))
    }
    
    counter = 1
    for mfcc in (librosa.feature.mfcc(y=y, sr=sr)):
        out[('mfcc%s' % counter)] = numpy.mean(mfcc)
        counter = counter + 1
            
    counter = 1
    for chroma_stft in (librosa.feature.chroma_stft(y=y, sr=sr)):
        out[('chroma_stft%s' % counter)] = numpy.mean(chroma_stft)
        counter = counter + 1
        
    counter = 1
    for chroma_cens in (librosa.feature.chroma_cens(y=y, sr=sr)):
        out[('chroma_cens%s' % counter)] = numpy.mean(chroma_cens)
        counter = counter + 1
        
    counter = 1
    for contrast in (librosa.feature.spectral_contrast(y=y, sr=sr)):
        out[('contrast%s' % counter)] = numpy.mean(contrast)
        counter = counter + 1
        
    return out

def loadR(file): 
    y , sr = librosa.load(file, mono=True, duration = 10)

    return extractR(y, sr)

def extractR(y, sr):
    out = {
        'spectral_centroid': numpy.mean(librosa.feature.spectral_centroid(y=y, sr=sr)),
        'spectral_bandwidth': numpy.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr)),
        'spectral_rolloff': numpy.mean(librosa.feature.spectral_rolloff(y=y, sr=sr)),
        'zero_crossing_rate': numpy.mean(librosa.feature.zero_crossing_rate(y=y)),
        'rms': numpy.mean(librosa.feature.rms(y=y)),
        'flux': numpy.mean(librosa.onset.onset_strength(y=y, sr=sr)),
        'tempo': numpy.mean(librosa.beat.tempo(onset_envelope=librosa.onset.onset_strength(y=y, sr=sr), sr=sr))
    }
    
    counter = 1
    for mfcc in (librosa.feature.mfcc(y=y, sr=sr)):
        out[('mfcc%s' % counter)] = numpy.mean(mfcc)
        counter = counter + 1
            
    counter = 1
    for chroma_stft in (librosa.feature.chroma_stft(y=y, sr=sr)):
        out[('chroma_stft%s' % counter)] = numpy.mean(chroma_stft)
        counter = counter + 1
        
    counter = 1
    for chroma_cens in (librosa.feature.chroma_cens(y=y, sr=sr)):
        out[('chroma_cens%s' % counter)] = numpy.mean(chroma_cens)
        counter = counter + 1
        
    counter = 1
    for contrast in (librosa.feature.spectral_contrast(y=y, sr=sr)):
        out[('contrast%s' % counter)] = numpy.mean(contrast)
        counter = counter + 1
        
    return out


loaded = True
datafile = './data.json'

import json
import numpy as np
class NumpyEncoder(json.JSONEncoder):
    """ Special json encoder for numpy types """
    def default(self, obj):
        if isinstance(obj, (np.int_, np.intc, np.intp, np.int8,
            np.int16, np.int32, np.int64, np.uint8,
            np.uint16, np.uint32, np.uint64)):
            return int(obj)
        elif isinstance(obj, (np.float_, np.float16, np.float32, 
            np.float64)):
            return float(obj)
        elif isinstance(obj,(np.ndarray,)): #### This is the fix
            return obj.tolist()
        return json.JSONEncoder.default(self, obj)
        
if not loaded:
    tracks = [];
    basedir = './emotions'
    
    for emotion in os.listdir(basedir):   
        child = (basedir + '/%s') % (emotion)
        
        for audio in os.listdir(child):
            print(len(tracks))
            #if len(tracks) < 15:
            tracks.append(load(child + '/' + audio, emotion))
    
    with open(datafile, 'w') as file_out:
        json.dump(tracks , file_out, cls=NumpyEncoder)
else:
    data = pandas.read_json(datafile)
    #data = data.head(450)
    data = data.drop(['filename'],axis=1)
    
    # Preprocessing
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
    from sklearn.utils import shuffle
    data = shuffle(data)
    
    import numpy
    genre_list = data.iloc[:, 0].replace({'Angry' : 'Sad', 'Relaxed': 'Happy'})
    print(genre_list)
    encoder = LabelEncoder()
    y = encoder.fit_transform(genre_list)
    
    scaler = StandardScaler()
    X = scaler.fit_transform(numpy.array(data.iloc[:, 1:], dtype = float))
     
    from keras import models
    from keras import layers
    
    tf.keras.backend.clear_session()
    
    model = models.Sequential([
        layers.Dense(128, activation='relu', input_shape=(X.shape[1],)),
        layers.Dense(2, activation='softmax')
    ]);
    
    model.compile(optimizer=keras.optimizers.Adam(),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
    
    history = model.fit(X,
                    y,
                    batch_size=900,
                    epochs=10 , verbose=True)
    results = model.evaluate(X, y, batch_size=32)
    print('test loss, test acc:', results)

   ## results = model.evaluate(X_test, y_test)  
    
    #print('Training results:')
   # print(results)
   # print('-----')
    
    
    #testData = pandas.DataFrame([loadR('./test.mp3')])
    files = ["fast car.mp3", "sultans of swing.mp3", "relaxed.mp3", "happy.mp3", "ph happy.mp3"]
    
    
    
    def do(file):
        rawRestData = loadR('./%s' % file)
        #print(rawRestData)
        
        testData = pandas.DataFrame.from_dict(rawRestData, orient='index')
        scaler = StandardScaler()
        NEWDATA = testData.T
                
        DATA_NO_EMOTION = data.iloc[:, 1:]
        DATA_NO_EMOTION = DATA_NO_EMOTION.append(NEWDATA)
       
        XNEW = numpy.array(DATA_NO_EMOTION, dtype=float)
        
        XNEW = scaler.fit_transform(XNEW)
        
        #print("XNEW, FIRST")
        #print(XNEW[0])
        #print("XNEW, LAST")
        #print(XNEW[-1])

        XT = numpy.array([XNEW[-1]])#scaler.transform(numpy.array(trans))
        #print("X:")
        #print(X_test[:1])
        #print('----------')
        #print(X_test[:1]) 
        predictions = model.predict(XT)
       
        #moods = ["Angry", "Happy", "Relaxed", "Sad"]
        
        moods = ["Sad", "Happy"]
        def toMood(int):
            return moods[int]
            
        def mood(prediction):
            return toMood(numpy.argmax(prediction))
        
        for prediction in predictions:
            print("Got mood: %s with accuracy %s" % (mood(prediction), prediction))
    
    for file in files:
        print("Song: %s" % file)
        do(file)

839      Sad
293    Happy
115      Sad
25       Sad
344    Happy
       ...  
592    Happy
228    Happy
526    Happy
838      Sad
657    Happy
Name: emotion, Length: 900, dtype: object
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
test loss, test acc: [0.6051529150538975, 0.6433333158493042]
Song: fast car.mp3
Got mood: Sad with accuracy [0.7785811  0.22141889]
Song: sultans of swing.mp3
Got mood: Sad with accuracy [0.74598384 0.25401607]
Song: relaxed.mp3
Got mood: Happy with accuracy [0.26606715 0.7339329 ]
Song: happy.mp3
Got mood: Sad with accuracy [0.5713638  0.42863616]
Song: ph happy.mp3
Got mood: Sad with accuracy [0.6939863 0.3060137]


  hop_length=hop_length))
  hop_length=hop_length))
  hop_length=hop_length))
  hop_length=hop_length))
  hop_length=hop_length))
