In [2]:
import json
import numpy as np
from  sklearn.model_selection import train_test_split
import tensorflow.keras as keras

DATASET_PATH="data_dog_cat.json"
def load_data(dataset_path):
    with open(dataset_path, "r") as fp:
        data=json.load(fp)
        
    # convert a list into numpy arrays  
    inputs=np.array(data["mfcc"])
    targets=np.array(data["labels"])
    return inputs, targets

if __name__== "__main__":
    inputs,targets=load_data(DATASET_PATH)
    inputs_train,inputs_test,targets_train,targets_test=train_test_split(inputs,targets,test_size=0.3)
    
    model=keras.Sequential([
        keras.layers.Flatten(input_shape=(inputs.shape[1],inputs.shape[2])),
        #1st hidden layer
        keras.layers.Dense(512,activation="relu"),
        #2nd hidden layer
        keras.layers.Dense(256,activation="relu"),
        #3rd hidden layer
        keras.layers.Dense(64,activation="relu"),
        #output layer
        keras.layers.Dense(3,activation="softmax")
        
        
    ])
    
    #compile
    optimizer=keras.optimizers.Adam(learning_rate=0.0001)
    model.compile(optimizer=optimizer,
                 loss="sparse_categorical_crossentropy",metrics=["accuracy"])
    model.summary()
    
    model.fit(inputs_train,targets_train,
              validation_data=(inputs_test,targets_test),epochs=100,batch_size=128)
    
    

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 65)                0         
_________________________________________________________________
dense_4 (Dense)              (None, 512)               33792     
_________________________________________________________________
dense_5 (Dense)              (None, 256)               131328    
_________________________________________________________________
dense_6 (Dense)              (None, 64)                16448     
_________________________________________________________________
dense_7 (Dense)              (None, 3)                 195       
Total params: 181,763
Trainable params: 181,763
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 

Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [3]:
model.save("Dog_Cat_Sound_Prediction_ANN_Model")

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: Dog_Cat_Sound_Prediction_ANN_Model\assets


In [14]:
import os
import librosa
import math
import json
#DATASET_PATH="genre_dataset_reduced"
DATASET_PATH="testcase2"
JSON_PATH="datatest2.json"
SAMPLE_RATE=22050
DURATION=1
SAMPLES_PER_TRACK=SAMPLE_RATE*DURATION
def save_mfcc(dataset_path, json_path, n_mfcc=13, n_fft=2048,hop_length=512,num_segments=5):
    #dictionary
    data={"mapping":[],
         "mfcc":[],
         "labels":[]
         }
    num_samples_per_segment=int(SAMPLES_PER_TRACK / num_segments)
    expected_num_mfcc_vectors_per_segment=math.ceil(num_samples_per_segment/hop_length) #round off to higher number
    #loop through the genres
    for i, (dirpath,dirnames, filenames) in enumerate(os.walk(dataset_path)):
        #ensure we are at root level
        if dirpath is not dataset_path:
            #save the semantic label
            
            #dirpath_compopents=dirpath.split("/")
            dirpath_components=os.path.split("/") 
            #genre/blues=>["genre","blues"]
            semantic_label=dirpath_components[-1]
            data["mapping"].append(semantic_label)
            print("\nProcessing {}".format(semantic_label))
            #process files for a specific genre
            for f in filenames:
                #loading the audio file
                file_path=os.path.join(dirpath,f)
                signal,sr=librosa.load(file_path, sr=SAMPLE_RATE)
                #process segments extracting mfcc and storing the data
                for s in range(num_segments):
                    start_sample=num_samples_per_segment*s
                    finish_sample=start_sample+num_samples_per_segment
                    
                    mfcc=librosa.feature.mfcc(signal[start_sample:finish_sample],
                                             sr=sr,
                                             n_fft=n_fft,
                                             n_mfcc=n_mfcc,
                                             hop_length=hop_length)
                    mfcc=mfcc.T
                    #store mfcc for segment if it has the expected length
                    if len(mfcc)==expected_num_mfcc_vectors_per_segment:
                        data["mfcc"].append(mfcc.tolist())
                        data["labels"].append(i-1)
                        print("{}, segment:{}".format(file_path,s+1))
    with open(json_path,"w") as fp:
        json.dump(data,fp,indent=4)
        
if __name__=="__main__":
    save_mfcc(DATASET_PATH,JSON_PATH,num_segments=10)             


Processing 
testcase2\dog\dog_barking_92.wav, segment:1
testcase2\dog\dog_barking_92.wav, segment:2
testcase2\dog\dog_barking_92.wav, segment:3
testcase2\dog\dog_barking_92.wav, segment:4
testcase2\dog\dog_barking_92.wav, segment:5
testcase2\dog\dog_barking_92.wav, segment:6
testcase2\dog\dog_barking_92.wav, segment:7
testcase2\dog\dog_barking_92.wav, segment:8
testcase2\dog\dog_barking_92.wav, segment:9
testcase2\dog\dog_barking_92.wav, segment:10


In [15]:
import json
import numpy as np
from  sklearn.model_selection import train_test_split
import tensorflow.keras as keras

DATASET_PATH="datatest2.json"
def load_data(dataset_path):
    with open(dataset_path, "r") as fp:
        data=json.load(fp)
        
    # convert a list into numpy arrays  
    inputs=np.array(data["mfcc"])
    targets=np.array(data["labels"])
    return inputs, targets

if __name__== "__main__":
    inputs3,targets3=load_data(DATASET_PATH)
    #inputs_train,inputs_test,targets_train,targets_test=train_test_split(inputs,targets,test_size=0)
   

In [11]:
inputs2

array([[[-1.89237595e+02,  8.46444702e+01, -5.16652451e+01,
          6.52460194e+00,  1.77207413e+01,  1.13418140e+01,
         -3.55195427e+00,  1.57988987e+01,  1.88736572e+01,
          2.30899658e+01, -3.54947901e+00,  6.54856014e+00,
         -1.08661060e+01],
        [-1.75578156e+02,  8.65371552e+01, -5.43128166e+01,
          4.49637413e+00,  1.37100143e+01,  1.00806751e+01,
         -1.06029618e+00,  1.27890663e+01,  6.18375874e+00,
          2.30690575e+01,  6.14277124e+00,  1.44394436e+01,
         -8.09738064e+00],
        [-1.48806641e+02,  6.62508698e+01, -5.44973907e+01,
          8.34444141e+00,  4.37361908e+00,  2.11313801e+01,
         -4.45383310e+00,  4.42159414e+00, -6.01513433e+00,
          2.14216118e+01,  1.80483036e+01,  2.29022369e+01,
         -3.90705633e+00],
        [-8.59435349e+01,  3.88542557e+01, -6.84862671e+01,
          1.23071985e+01,  4.84583378e+00,  2.64675941e+01,
         -1.47675819e+01,  7.36727667e+00, -3.58147502e+00,
          2.3904176

In [7]:
model.predict(inputs)

array([[1.78335213e-20, 9.99895573e-01, 1.04403902e-04],
       [2.95313280e-13, 9.99998927e-01, 1.09385485e-06],
       [4.07564408e-16, 1.00000000e+00, 2.18942131e-09],
       [2.49651060e-15, 9.99999881e-01, 7.08583059e-08],
       [2.45365533e-11, 8.40907574e-01, 1.59092471e-01],
       [1.08489206e-13, 9.99883175e-01, 1.16831186e-04],
       [5.70826210e-13, 9.99977827e-01, 2.22303206e-05],
       [9.18411024e-17, 1.00000000e+00, 9.54987556e-09],
       [7.67214783e-17, 1.00000000e+00, 3.97246042e-10],
       [1.04679184e-16, 1.00000000e+00, 8.05642011e-12]], dtype=float32)

In [16]:
ynew = model.predict(inputs3)
# show the inputs and predicted outputs
for i in range(len(inputs2)):
	print("X=%s, Predicted=%s" % (inputs2[i], ynew[i]))

X=[[-189.2375946    84.64447021  -51.66524506    6.52460194   17.72074127
    11.34181404   -3.55195427   15.7988987    18.87365723   23.08996582
    -3.54947901    6.54856014  -10.86610603]
 [-175.57815552   86.53715515  -54.31281662    4.49637413   13.71001434
    10.08067513   -1.06029618   12.78906631    6.18375874   23.06905746
     6.14277124   14.43944359   -8.09738064]
 [-148.80664062   66.25086975  -54.49739075    8.34444141    4.37361908
    21.13138008   -4.4538331     4.42159414   -6.01513433   21.42161179
    18.0483036    22.90223694   -3.90705633]
 [ -85.94353485   38.85425568  -68.48626709   12.30719852    4.84583378
    26.46759415  -14.76758194    7.36727667   -3.58147502   23.90417671
    20.23849869   26.90174866  -15.95324612]
 [ -35.70110321   32.70062637  -81.17460632    5.00966644   14.87989426
    30.48334312  -17.34090233    9.96431923    2.09447289   19.10254097
    10.66889763   23.5088768   -24.56945992]], Predicted=[0.00000000e+00 1.47142655e-05 9.99985337

Cat

In [17]:
import os
import librosa
import math
import json
#DATASET_PATH="genre_dataset_reduced"
DATASET_PATH="testcase"
JSON_PATH="datatest.json"
SAMPLE_RATE=22050
DURATION=1
SAMPLES_PER_TRACK=SAMPLE_RATE*DURATION
def save_mfcc(dataset_path, json_path, n_mfcc=13, n_fft=2048,hop_length=512,num_segments=5):
    #dictionary
    data={"mapping":[],
         "mfcc":[],
         "labels":[]
         }
    num_samples_per_segment=int(SAMPLES_PER_TRACK / num_segments)
    expected_num_mfcc_vectors_per_segment=math.ceil(num_samples_per_segment/hop_length) #round off to higher number
    #loop through the genres
    for i, (dirpath,dirnames, filenames) in enumerate(os.walk(dataset_path)):
        #ensure we are at root level
        if dirpath is not dataset_path:
            #save the semantic label
            
            #dirpath_compopents=dirpath.split("/")
            dirpath_components=os.path.split("/") 
            #genre/blues=>["genre","blues"]
            semantic_label=dirpath_components[-1]
            data["mapping"].append(semantic_label)
            print("\nProcessing {}".format(semantic_label))
            #process files for a specific genre
            for f in filenames:
                #loading the audio file
                file_path=os.path.join(dirpath,f)
                signal,sr=librosa.load(file_path, sr=SAMPLE_RATE)
                #process segments extracting mfcc and storing the data
                for s in range(num_segments):
                    start_sample=num_samples_per_segment*s
                    finish_sample=start_sample+num_samples_per_segment
                    
                    mfcc=librosa.feature.mfcc(signal[start_sample:finish_sample],
                                             sr=sr,
                                             n_fft=n_fft,
                                             n_mfcc=n_mfcc,
                                             hop_length=hop_length)
                    mfcc=mfcc.T
                    #store mfcc for segment if it has the expected length
                    if len(mfcc)==expected_num_mfcc_vectors_per_segment:
                        data["mfcc"].append(mfcc.tolist())
                        data["labels"].append(i-1)
                        print("{}, segment:{}".format(file_path,s+1))
    with open(json_path,"w") as fp:
        json.dump(data,fp,indent=4)
        
if __name__=="__main__":
    save_mfcc(DATASET_PATH,JSON_PATH,num_segments=10)             


Processing 
testcase\cat\katzen_viele.wav, segment:1
testcase\cat\katzen_viele.wav, segment:2
testcase\cat\katzen_viele.wav, segment:3
testcase\cat\katzen_viele.wav, segment:4
testcase\cat\katzen_viele.wav, segment:5
testcase\cat\katzen_viele.wav, segment:6
testcase\cat\katzen_viele.wav, segment:7
testcase\cat\katzen_viele.wav, segment:8
testcase\cat\katzen_viele.wav, segment:9
testcase\cat\katzen_viele.wav, segment:10


In [18]:
import json
import numpy as np
from  sklearn.model_selection import train_test_split
import tensorflow.keras as keras

DATASET_PATH="datatest.json"
def load_data(dataset_path):
    with open(dataset_path, "r") as fp:
        data=json.load(fp)
        
    # convert a list into numpy arrays  
    inputs=np.array(data["mfcc"])
    targets=np.array(data["labels"])
    return inputs, targets

if __name__== "__main__":
    inputs3,targets3=load_data(DATASET_PATH)
    #inputs_train,inputs_test,targets_train,targets_test=train_test_split(inputs,targets,test_size=0)
   

The prediction label 3 is highest

In [19]:
ynew = model.predict(inputs3)
# show the inputs and predicted outputs
for i in range(len(inputs2)):
	print("X=%s, Predicted=%s" % (inputs2[i], ynew[i]))

X=[[-189.2375946    84.64447021  -51.66524506    6.52460194   17.72074127
    11.34181404   -3.55195427   15.7988987    18.87365723   23.08996582
    -3.54947901    6.54856014  -10.86610603]
 [-175.57815552   86.53715515  -54.31281662    4.49637413   13.71001434
    10.08067513   -1.06029618   12.78906631    6.18375874   23.06905746
     6.14277124   14.43944359   -8.09738064]
 [-148.80664062   66.25086975  -54.49739075    8.34444141    4.37361908
    21.13138008   -4.4538331     4.42159414   -6.01513433   21.42161179
    18.0483036    22.90223694   -3.90705633]
 [ -85.94353485   38.85425568  -68.48626709   12.30719852    4.84583378
    26.46759415  -14.76758194    7.36727667   -3.58147502   23.90417671
    20.23849869   26.90174866  -15.95324612]
 [ -35.70110321   32.70062637  -81.17460632    5.00966644   14.87989426
    30.48334312  -17.34090233    9.96431923    2.09447289   19.10254097
    10.66889763   23.5088768   -24.56945992]], Predicted=[1.7833521e-20 9.9989557e-01 1.0440390e-0