In [None]:
from sklearn.metrics import ConfusionMatrixDisplay, accuracy_score
from sklearn.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras import models, layers
import tensorflow as tf
import ast
import librosa
from keras_tuner import RandomSearch, HyperParameters, BayesianOptimization

In [None]:
train_csv = pd.read_csv('training_data.csv')
test_csv = pd.read_csv('test_data.csv')

labels = train_csv['instrument'].unique()
#spelling error in the test_data we need to account for
label_encoder = {'Sound_Guiatr' : 0}
for index, label in enumerate(labels):
    label_encoder[label] = index

print(labels)
train_csv['instrument'].value_counts()

In [None]:
#testing our data has survived
train_csv.head()
flat_mel_spec = np.loadtxt(f'./train_mel_spec/00.txt')
mel_spec = flat_mel_spec.reshape(128,44)
plt.figure(figsize=(14,5))

librosa.display.specshow(mel_spec, sr=22050, x_axis='time', y_axis='mel')
plt.title('pls work')
plt.colorbar()

In [None]:
#preparing training data

guitar_df = train_csv[train_csv['instrument'] == 'Sound_Guitar']
piano_df = train_csv[train_csv['instrument'] == 'Sound_Drum']
drum_df = train_csv[train_csv['instrument'] == 'Sound_Violin']
violin_df = train_csv[train_csv['instrument'] == 'Sound_Piano']

#750 examples from each for intail training
data_combind_train_validation = guitar_df['mel spec ref'].tolist()[:750]
labels_combind_train_validation = guitar_df['instrument'].tolist()[:750]

data_combind_train_validation.extend(piano_df['mel spec ref'].tolist()[:750])
labels_combind_train_validation.extend(piano_df['instrument'].tolist()[:750])

data_combind_train_validation.extend(drum_df['mel spec ref'].tolist()[:750])
labels_combind_train_validation.extend(drum_df['instrument'].tolist()[:750])

data_combind_train_validation.extend(violin_df['mel spec ref'].tolist()[:750])
labels_combind_train_validation.extend(violin_df['instrument'].tolist()[:750])


#mel spec data and reshape it
for index, data in enumerate(data_combind_train_validation): 
    data_combind_train_validation[index] = np.loadtxt(f'./train_mel_spec/{data}').reshape((128, 44))
 
#change labels to 0-4   
for index, data in enumerate(labels_combind_train_validation):
    labels_combind_train_validation[index] = label_encoder[data]
    

In [None]:
#testing the conversion has worked
plt.figure(figsize=(14,5))
librosa.display.specshow(data_combind_train_validation[0], sr=22050, x_axis='time', y_axis='mel')
plt.title(labels_combind_train_validation[0])
plt.colorbar()

In [None]:
test_csv['instrument'].value_counts()

In [None]:
#spelling mistake in test data - should be guitar not guiatr
guitar_df = test_csv[test_csv['instrument'] == 'Sound_Guiatr']
piano_df = test_csv[test_csv['instrument'] == 'Sound_Drum']
drum_df = test_csv[test_csv['instrument'] == 'Sound_Violin']
violin_df = test_csv[test_csv['instrument'] == 'Sound_Piano']

data_test = guitar_df['mel spec ref'].tolist()
labels_test = guitar_df['instrument'].tolist()

data_test.extend(piano_df['mel spec ref'].tolist())
labels_test.extend(piano_df['instrument'].tolist())

data_test.extend(drum_df['mel spec ref'].tolist())
labels_test.extend(drum_df['instrument'].tolist())

data_test.extend(violin_df['mel spec ref'].tolist())
labels_test.extend(violin_df['instrument'].tolist())

#mel spec data and reshape it
for index, data in enumerate(data_test): 
    data_test[index] = np.loadtxt(f'./test_mel_spec/{data}').reshape((128, 44))
 
#change labels to 0-4   
for index, data in enumerate(labels_test):
    labels_test[index] = label_encoder[data]
    

In [None]:
def report(history,y_pred,y_true):
    plt.plot(history.epoch, history.history["accuracy"],history.history['val_accuracy'])
    plt.xlabel('epochs')
    plt.ylabel('accuracy')
    plt.suptitle('Training (blue) and validation (orange) History')
    plt.show()
    
    predicted= np.argmax(y_pred,axis=1)
    actual = np.argmax(y_true, axis=1)
    print(f'Test Accuracy {accuracy_score(actual, predicted) * 100}%')
    cm = ConfusionMatrixDisplay.from_predictions(actual, predicted)
    
    #,display_labels=train_csv['instrument'].unique()) 

x_train,x_val,y_train,y_val= train_test_split(data_combind_train_validation, labels_combind_train_validation,
                                                test_size=0.125,
                                                shuffle=True,
                                                stratify=labels_combind_train_validation)

def build_model(hp: HyperParameters):
    with tf.device('/CPU:0'):
        input_shape=(128,44,1)
        model = models.Sequential()
        model.add(layers.Conv2D(hp.Int('conv_1_units', min_value=32, max_value=128, step=32), (5, 5), activation='relu', input_shape=input_shape))
        model.add(layers.MaxPooling2D((2, 2)))
        model.add(layers.Dropout(0.2))
        model.add(layers.Conv2D(hp.Int('conv_2_units', min_value=32, max_value=128, step=32), (5, 5), activation='relu'))
        model.add(layers.MaxPooling2D((2, 2)))
        model.add(layers.Dropout(0.2))
        model.add(layers.Conv2D(hp.Int('conv_3_units', min_value=32, max_value=128, step=32), (5, 5), activation='relu'))
        model.add(layers.Flatten())
        model.add(layers.Dense(hp.Int('dense_1_units', min_value=32, max_value=128, step=32), activation='relu'))
        model.add(layers.Dropout(0.2))
        model.add(layers.Dense(hp.Int('dense_2_units', min_value=32, max_value=128, step=32), activation='relu'))
        model.add(layers.Dense(4, activation='softmax'))
        model.compile(optimizer='adam',
                      loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
                      metrics=['accuracy'])
    return model

tuner = BayesianOptimization(
    build_model,
    objective='val_accuracy',
    max_trials=4,
    executions_per_trial=2,
    directory='bayesian_optimization',
    project_name='audio_cnn'
)

tuner.search_space_summary()

# tuner.search(np.array(x_train), np.array(y_train),
#                  epochs=10, validation_data=(np.array(x_val), np.array(y_val)))

with tf.device('/CPU:0'):
    tuner.search(np.array(x_train), np.array(y_train),
                 epochs=10, validation_data=(np.array(x_val), np.array(y_val)))

tuner.results_summary()

# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

# Build the model with the optimal hyperparameters and train it on the data
# model = tuner.hypermodel.build(best_hps)
# history = model.fit(np.array(x_train), np.array(y_train),
#                     epochs=10, validation_data=(np.array(x_val), np.array(y_val)))

with tf.device('/CPU:0'):
    model = tuner.hypermodel.build(best_hps)
    history = model.fit(np.array(x_train), np.array(y_train),
                        epochs=10, validation_data=(np.array(x_val), np.array(y_val)))

y_pred = model.predict(np.array(data_test))
report(history, y_pred, labels_test)

model.save('./models/instrument_model/1/', save_format='tf')