# Guitar Image Classification

### This notebook is part of the AiLab School AI cours. Contains the code to train a model to classify guitar images, part of the final project of the course.3

The dataset used in this project is derivated from the publication Fuzzy Edge-Detection as a Preprocessing Layer in Deep Neural Networks for Guitar Classification, contains images of 6 different guitar types.
<li>Acoustic</li>
<li>Double Cut</li>
<li>Single Cut</li>
<li>S-Style</li>
<li>T-Style</li>
<li>Ukulele</li>
The dataset was created from a web search and contains 900 images of each guitar type. The images were resized to 224x224 pixel.

In [None]:
# Tensorflow imports for CNN model
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from utils import ProcessingUtils
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import datetime

In [None]:
# check if gpus are available to tensorflow
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
print("Num CPUs Available: ", len(tf.config.experimental.list_physical_devices('CPU')))

print('='*90)
# check gpus specs with nvidia-smi
!nvidia-smi

In [None]:
# Load the dataset
DATA_PATH = 'Y://Datasets//CGD//CGD_no_split//'
CLASES = {0: 'Acoustic', 1: 'Double_cut', 2: 'Single_cut', 3: 'S_style', 4: 'T_style', 5: 'Ukulele'}
class_idx = [0, 1, 2, 3, 4, 5]
class_idx = [str(i) for i in class_idx]
# X,y = ProcessingUtils.load_dataset(path=DATA_PATH, classes=class_idx, img_size=(224, 224), shuffle=True, seed=1313, verbose=True)
X,y = ProcessingUtils.load_dataset(path=DATA_PATH, classes=class_idx, img_size=(150,150), shuffle=True, seed=1313, verbose=True)

### Data analysys

In [None]:
ProcessingUtils.sample_dataset(X, y, CLASES, n=5)

#### Observations
As que can apreciate, the dataset contains noise (images that contain items besides guitars), and images that are not centered in the guitar. This can be a problem for the model training.
The dataset is balanced, contains 900 images of each guitar type.
The guitar styles (cathegories) present similarities with other styles, for example, the acoustic guitar is similar to the ukulele, and the double cut is similar to the single cut, T-Style and S-Style. This may pressent a problem for the model training.


#### Data Preprocessing

In [None]:
from utils import ProcessingUtils

In [None]:
# Normalize the dataset
X = ProcessingUtils.normalize_data(X)

In [None]:
# Split the dataset
X_train, X_test, X_val, y_train, y_test, y_val = ProcessingUtils.split_data(X, y, train_size=0.7, test_size=0.2, val_size=0.1, seed=1313)


In [None]:
# check the shape of the data
print('Train data shape:', X_train.shape)
print('Train labels shape:', y_train.shape)
print('Test data shape:', X_test.shape)
print('Test labels shape:', y_test.shape)
print('Val data shape:', X_val.shape)
print('Val labels shape:', y_val.shape)

## Model creation

In [None]:
# Create convolution blocks
class ConvBlock(tf.keras.layers.Layer):
    def __init__(self, filters, kernel_size, strides, padding, pool_size, pool_strides, pool_padding, activation):
        super(ConvBlock, self).__init__()
        self.conv = Conv2D(filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, activation=activation)
        self.pool = MaxPooling2D(pool_size=pool_size, strides=pool_strides, padding=pool_padding)
        self.batch_norm = BatchNormalization()
        
    def call(self, inputs):
        x = self.conv(inputs)
        x = self.pool(x)
        x = self.batch_norm(x)
        return x

    def get_config(self):
        return super().get_config()
    
    def from_config(cls, config):
        return cls(**config)
    
    
    
# Create the model

model = Sequential([
    ConvBlock(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same', pool_size=(2, 2), pool_strides=(2, 2), pool_padding='valid', activation='relu'),
    ConvBlock(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same', pool_size=(2, 2), pool_strides=(2, 2), pool_padding='valid', activation='relu'),
    ConvBlock(filters=128, kernel_size=(3, 3), strides=(1, 1), padding='same', pool_size=(2, 2), pool_strides=(2, 2), pool_padding='valid', activation='relu'),
    ConvBlock(filters=128, kernel_size=(3, 3), strides=(1, 1), padding='same', pool_size=(2, 2), pool_strides=(2, 2), pool_padding='valid', activation='relu'),
    Dropout(0.75),
    Flatten(),
    Dense(512, activation='relu'),
    Dense(6, activation='sigmoid')
])

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [None]:
# utilize tensorboard for visualization
# log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
# tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

# Train the model
# history = model.fit(X_train, y_train, epochs=100, batch_size=64, validation_data=(X_val, y_val), callbacks=[tensorboard_callback])

# early stopping callback
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=15, verbose=2, mode='min', restore_best_weights=True)

history = model.fit(X_train, y_train, epochs=100, batch_size=64, validation_data=(X_val, y_val), verbose=2.  callbacks=[early_stopping])

In [None]:
# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print('Test accuracy:', test_acc)

#confusion_matrix
from sklearn.metrics import confusion_matrix
import seaborn as sns

y_pred = model.predict(X_test)
y_pred = np.argmax(y_pred, axis=1)
cm = confusion_matrix(y_test, y_pred)
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
sns.heatmap(cm, annot=True, cmap='Blues', xticklabels=CLASES.values(), yticklabels=CLASES.values())
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()


In [None]:
# Plot the accuracy and loss curves
plt.figure(figsize=(12, 8))
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Accuracy and Loss Curves')
plt.legend()
plt.show()

In [None]:
# Save the model
model.save('./models/base_model.h5')

In [None]:
# Clear the session and delete the model to free memory
tf.keras.backend.clear_session()
del model
del X
del y
del history

### Hyperparameter tunning
Utilize hyperopt to find the best hyperparameters for the model.

In [None]:
# hyperopt and mlflow imports
import mlflow
import mlflow.tensorflow
import mlflow.keras
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials
from hyperopt.pyll import scope
from hyperopt.pyll.stochastic import sample
from hyperopt import space_eval
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

mlflow.set_experiment("Guitar Classification")

# define the search space
space = {
    'conv1_filters': hp.choice('conv1_filters', [32, 64, 128, 256]),
    'conv2_filters': hp.choice('conv2_filters', [32, 64, 128, 256]),
    'conv3_filters': hp.choice('conv3_filters', [32, 64, 128, 256]),
    'conv4_filters': hp.choice('conv4_filters', [32, 64, 128, 256]),
    'dropout': hp.uniform('dropout', 0.1, 0.9),
    'dense': hp.choice('dense', [256, 512, 1024]),
    'batch_size': hp.choice('batch_size', [32, 64, 128]),
    'epochs': hp.choice('epochs', [50, 100, 150]),
    'learning_rate': hp.choice('learning_rate',[0.0001, 0.001, 0.01, 0.1])
}

# define the objective function to optimize f1_score, accuracy, precision, recall
def objective(space):
    with mlflow.start_run():
        model = Sequential([
            ConvBlock(filters=space['conv1_filters'], kernel_size=(3, 3), strides=(1, 1), padding='same', pool_size=(2, 2), pool_strides=(2, 2), pool_padding='valid', activation='relu'),
            ConvBlock(filters=space['conv2_filters'], kernel_size=(3, 3), strides=(1, 1), padding='same', pool_size=(2, 2), pool_strides=(2, 2), pool_padding='valid', activation='relu'),
            ConvBlock(filters=space['conv3_filters'], kernel_size=(3, 3), strides=(1, 1), padding='same', pool_size=(2, 2), pool_strides=(2, 2), pool_padding='valid', activation='relu'),
            ConvBlock(filters=space['conv4_filters'], kernel_size=(3, 3), strides=(1, 1), padding='same', pool_size=(2, 2), pool_strides=(2, 2), pool_padding='valid', activation='relu'),
            Dropout(space['dropout']),
            Flatten(),
            Dense(space['dense'], activation='relu'),
            Dense(6, activation='sigmoid')
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=space['learning_rate']), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
        model = model.fit(X_train, y_train, epochs=100, batch_size=64, validation_data=(X_val, y_val), verbose=0, callbacks=[early_stopping])
        loss, accuracy = model.evaluate(X_test, y_test)
        mlflow.log_metric('loss', loss)
        mlflow.log_metric('accuracy', accuracy)
        y_pred = model.predict(X_test)
        y_pred = np.argmax(y_pred, axis=1)
        f1 = f1_score(y_test, y_pred, average='weighted')
        precision = precision_score(y_test, y_pred, average='weighted')
        recall = recall_score(y_test, y_pred, average='weighted')
        mlflow.log_metric('f1', f1)
        mlflow.log_metric('precision', precision)
        mlflow.log_metric('recall', recall)
        mlflow.keras.log_model(model, 'model')
        mlflow.log_metric('epochs', space['epochs'])
        mlflow.log_metric('batch_size', space['batch_size'])
        mlflow.log_metric('learning_rate', space['learning_rate'])
        mlflow.log_metric('conv1_filters', space['conv1_filters'])
        mlflow.log_metric('conv2_filters', space['conv2_filters'])
        mlflow.log_metric('conv3_filters', space['conv3_filters'])
        mlflow.log_metric('conv4_filters', space['conv4_filters'])
        return {'loss': loss, 'status': STATUS_OK, 'model': model}
    
# define the trials object
trials = Trials()
best = fmin(fn=objective, 
            space=space, 
            algo=tpe.suggest, 
            max_evals=500, 
            trials=trials)

            
# get the best hyperparameters
best_params = space_eval(space, best)

# create the best model
hyper_params_dict = {
    'conv1_filters': [32, 64, 128, 256],
    'conv2_filters': [32, 64, 128, 256],
    'conv3_filters': [32, 64, 128, 256],
    'conv4_filters': [32, 64, 128, 256],
    'dropout': range(0.1, 0.9),
    'dense': [256, 512, 1024],
    'batch_size': [32, 64, 128],
    'epochs': [50, 100, 150],
    'learning_rate': [0.0001, 0.001, 0.01, 0.1]
}


# create the best model
best_model = Sequential([
    ConvBlock(filters=best_params['conv1_filters'], kernel_size=(3, 3), strides=(1, 1), padding='same', pool_size=(2, 2), pool_strides=(2, 2), pool_padding='valid', activation='relu'),
    ConvBlock(filters=best_params['conv2_filters'], kernel_size=(3, 3), strides=(1, 1), padding='same', pool_size=(2, 2), pool_strides=(2, 2), pool_padding='valid', activation='relu'),
    ConvBlock(filters=best_params['conv3_filters'], kernel_size=(3, 3), strides=(1, 1), padding='same', pool_size=(2, 2), pool_strides=(2, 2), pool_padding='valid', activation='relu'),
    ConvBlock(filters=best_params['conv4_filters'], kernel_size=(3, 3), strides=(1, 1), padding='same', pool_size=(2, 2), pool_strides=(2, 2), pool_padding='valid', activation='relu'),
    Dropout(best_params['dropout']),
    Flatten(),
    Dense(best_params['dense'], activation='relu'),
    Dense(6, activation='sigmoid')
])

# compile the best model
best_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=best_params['learning_rate']), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# train the best model
history = best_model.fit(X_train, y_train, epochs=100, batch_size=64, validation_data=(X_val, y_val), verbose=2, callbacks=[early_stopping])

# evaluate the best model
loss, accuracy = best_model.evaluate(X_test, y_test)
y_pred = best_model.predict(X_test)
y_pred = np.argmax(y_pred, axis=1)
f1 = f1_score(y_test, y_pred, average='weighted')
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')

# seaborn confusion matrix
cm = confusion_matrix(y_test, y_pred)
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
plt.figure(figsize=(10, 10))
labels = CLASES.keys()
sns.heatmap(cm, annot=True, square=True, cmap='Blues', xticklabels=labels, yticklabels=labels)

# save the best model with timestamp
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
best_model.save(f'./models/{timestamp}_best_model.h5')


