In [1]:
import numpy as np
import librosa.display, os
import matplotlib.pyplot as plt
%matplotlib inline

def create_spectrogram(audio_file, image_file):
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    fig.subplots_adjust(left=0, right=1, bottom=0, top=1)

    y, sr = librosa.load(audio_file)
    ms = librosa.feature.melspectrogram(y = y, sr = sr)
    log_ms = librosa.power_to_db(ms, ref=np.max)
    librosa.display.specshow(log_ms, sr=sr)

    fig.savefig(image_file)
    plt.close(fig)
    
def create_pngs_from_wavs(input_path, output_path):
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    dir = os.listdir(input_path)

    for i, file in enumerate(dir):
        if '.txt' in file: continue
        input_file = os.path.join(input_path, file)
        output_file = os.path.join(output_path, file.replace('.wav', '.png'))
        create_spectrogram(input_file, output_file)

In [2]:
create_pngs_from_wavs('Dataset/IRMAS_Training_Data/cel', 'Spectrograms/cel')

In [3]:
create_pngs_from_wavs('Dataset/IRMAS_Training_Data/cla', 'Spectrograms/cla')

In [4]:
create_pngs_from_wavs('Dataset/IRMAS_Training_Data/flu', 'Spectrograms/flu')

In [5]:
create_pngs_from_wavs('Dataset/IRMAS_Training_Data/gac', 'Spectrograms/gac')

In [6]:
create_pngs_from_wavs('Dataset/IRMAS_Training_Data/gel', 'Spectrograms/gel')

In [7]:
create_pngs_from_wavs('Dataset/IRMAS_Training_Data/org', 'Spectrograms/org')

In [8]:
create_pngs_from_wavs('Dataset/IRMAS_Training_Data/pia', 'Spectrograms/pia')

In [9]:
create_pngs_from_wavs('Dataset/IRMAS_Training_Data/sax', 'Spectrograms/sax')

In [10]:
create_pngs_from_wavs('Dataset/IRMAS_Training_Data/tru', 'Spectrograms/tru')

In [11]:
create_pngs_from_wavs('Dataset/IRMAS_Training_Data/vio', 'Spectrograms/vio')

In [12]:
create_pngs_from_wavs('Dataset/IRMAS_Training_Data/voi', 'Spectrograms/voi')

In [2]:
# from keras.preprocessing import image
import keras.utils as image
import os
import matplotlib.pyplot as plt
from random import randint

NUM_INSTRUMENTS = 11

instruments = ["cel", "cla", "flu", "gac", "gel", "org", "pia", "sax", "tru", "vio", "voi"]

inst_dict = {}
for i in range(len(instruments)): 
    inst_dict[instruments[i]] = i

def load_images_from_path(path, x_array, y_array, index, label = None, path_label = None):
    images = []
    labels = []
    temp_label = [0 for _ in range(NUM_INSTRUMENTS)]
    if not path_label:
        temp_label[label] = 1     
        
        
    for file in os.listdir(path):
        x_array[index] = image.img_to_array(image.load_img(os.path.join(path, file), target_size=(224, 224, 3)))
        
        # images.append(image.img_to_array(image.load_img(os.path.join(path, file), target_size=(224, 224, 3))))
        if path_label:
            temp_label = [False for _ in range(NUM_INSTRUMENTS)]
            with open("{}/{}.txt".format(path_label, file[:-4]), "r") as f:
                for inst in f:
                    inst = inst.replace('\n', '').replace('\t', '')
                    temp_label[inst_dict[inst]] = 1
        # labels.append((temp_label))
        y_array[index] = temp_label
        index += 1
        
        
    # print(len(images), len(os.listdir(path)), len(images[0]))
    # return images, labels
    return index

def show_images(images):
    fig, axes = plt.subplots(1, 8, figsize=(20, 20), subplot_kw={'xticks': [], 'yticks': []})

    for i, ax in enumerate(axes.flat):
        ax.imshow(images[i] / 255)
        

In [3]:
len_train = 0
for i in range(len(instruments)):
    len_train += len(os.listdir('Spectrograms/{}'.format(instruments[i])))
print(len_train)

x = np.array([])
y = np.array([])
x = np.empty(shape = (len_train, 224, 224, 3))
y = np.empty(shape = (len_train, NUM_INSTRUMENTS))
index = 0
for i in range(len(instruments)):
    index = load_images_from_path(path = 'Spectrograms/{}'.format(instruments[i]), label = i, x_array = x, y_array = y, index = index)
    # images, labels = load_images_from_path(path = 'Spectrograms/{}'.format(instruments[i]), label = i)
    # show_images(images)
    
    # np.append(x, images)
    # np.append(y, labels)

print(len(x))
print(len(y))

7210
7210
7210


In [None]:
create_pngs_from_wavs('Dataset/IRMAS_Validation_Data', 'Spectrograms/TEST')

In [None]:
print(x[::100])
print(y[::100])

In [4]:
x_test = np.empty(shape = (len(os.listdir('Spectrograms/TEST')), 224, 224, 3))
y_test = np.empty(shape = (len(os.listdir('Spectrograms/TEST')), NUM_INSTRUMENTS))
load_images_from_path(path = 'Spectrograms/TEST', path_label = 'Dataset/IRMAS_Validation_Data', x_array = x_test, y_array = y_test, index = 0)

2874

In [5]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Flatten, Dense

model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))
model.add(MaxPooling2D(2, 2))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(2, 2))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(2, 2))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(2, 2))
model.add(Flatten())
model.add(Dense(1024, activation='relu'))
model.add(Dense(11, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 222, 222, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 111, 111, 32)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 109, 109, 128)     36992     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 54, 54, 128)      0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 52, 52, 128)       147584    
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 26, 26, 128)      0

In [8]:
print(len(x), len(y), len(x_test), len(y_test))

hist = model.fit(x, y, validation_data=(x_test, y_test), batch_size=10, epochs=20, shuffle = True, use_multiprocessing= True, workers = 4)

7210 7210 2874 2874
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [11]:
model.save('Model')



INFO:tensorflow:Assets written to: Model\assets


INFO:tensorflow:Assets written to: Model\assets


In [14]:
from keras.models import load_model
new_model = load_model('Model')

In [22]:
for i in [432]:
    print(model.predict(np.array([x_test[i]])), y_test[i])

[[2.6086919e-02 1.4914972e-02 6.3624345e-03 4.0933765e-03 6.0240128e-03
  7.3785953e-02 2.1311190e-04 3.0365810e-03 1.3265050e-03 1.9803082e-03
  8.6217576e-01]] [0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0.]
