In [2]:
import pandas as pd
import os
import librosa
import pickle
import librosa
import numpy as np

from sklearn.metrics import f1_score,precision_score,recall_score, accuracy_score, roc_curve, auc
from sklearn.model_selection import StratifiedKFold
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

from datetime import datetime



In [3]:
def read_serialized_file(file_path):
#   directory = '/kaggle/working/' + file_path # Change 'directory' Accordingly
  directory = file_path 
  infile = open(directory,'rb')
  loaded_data = pickle.load(infile)
  infile.close()
  return loaded_data

In [4]:
def generate_mel_spec(audio):
  mel = librosa.power_to_db(librosa.feature.melspectrogram(y=audio, sr=22050, n_mels=128, n_fft=2048, hop_length=512))
  return mel

def generate_mfcc(audio):
  mfcc = librosa.feature.mfcc(y=audio, n_mfcc=128, n_fft=2048, hop_length=512)
  return mfcc

def generate_chroma(audio):
  chroma = librosa.feature.chroma_stft(y=audio, sr=22050, n_chroma=128, n_fft=2048, hop_length=512)
  return chroma

In [5]:
def create_augmented_features_imbalanced_audio():
  labelled_features = []

#   audio_array = read_serialized_file("label_enc_audio_aug.pkl")
  audio_array = read_serialized_file("/kaggle/input/975-audio-serialized/label_enc_audio_975_norm.pkl")
  for raw_audio, label, file in audio_array:
    melsp = generate_mel_spec(raw_audio)
    mfcc = generate_mfcc(raw_audio)
    chroma = generate_chroma(raw_audio)

    three_chanel = np.stack((melsp, mfcc, chroma), axis=2)

    labelled_features.append([three_chanel, label, file])
    
    if(label == 3 or label == 6):
        # Shift the pitch
        y_shifted = librosa.effects.pitch_shift(raw_audio, sr=22050, n_steps=1)
        melsp_shifted = generate_mel_spec(y_shifted)
        mfcc_shifted = generate_mfcc(y_shifted)
        chroma_shifted = generate_chroma(y_shifted)
        three_chanel_shifted = np.stack((melsp_shifted, mfcc_shifted, chroma_shifted), axis=2)
        labelled_features.append([three_chanel_shifted, label, file])

  print(len(labelled_features))

  return labelled_features

In [6]:
def get_all_accuracies(model, X_test, y_test):
    test_values = [np.count_nonzero(y_test == i) for i in np.unique(y_test)]
    min_value = min(test_values)
    test_weights = [min_value / value for value in test_values]
    test_class_weights = {label: weight for label, weight in enumerate(test_weights)}
    total_weights = sum(test_class_weights.values())
    
    classwise_accuracy = {}
    b = model.predict(X_test)
    y_pred_labels = np.argmax(b, axis=1)
    
    for class_label in np.unique(y_test):
        class_indices = np.where(y_test == class_label)[0]
        class_accuracy = accuracy_score(y_test[class_indices], y_pred_labels[class_indices])
        classwise_accuracy[class_label] = class_accuracy
    
    class_wise_acc_array = []
    total_acc = 0
    for class_label, accuracy in classwise_accuracy.items():
        print(f"Class {class_label}: Accuracy = {accuracy}")
        class_wise_acc_array.append(accuracy)
        total_acc += accuracy*test_class_weights[class_label]

    average_accuracy = total_acc/total_weights
    print("Weighted Accuracy = ", average_accuracy)
    
    return [average_accuracy] + class_wise_acc_array 

In [7]:
def get_all_metrics(model, X_test, y_test):
    X_test = np.array(X_test)
    actual = y_test
    y_test = np.array(y_test)
    
    test_loss, test_accuracy = model.evaluate(X_test, y_test)

    pred = model.predict(X_test)
    predicted = np.argmax(pred, axis=1)
    
    labels = [0, 1, 2, 3,4, 5, 6, 7, 8]
    
    f1 = f1_score(actual, predicted,labels=labels, average=None)
    precision = precision_score(actual, predicted,labels=labels, average=None)
    recall = recall_score(actual, predicted,labels=labels,average=None)
    
    average_f1 = f1_score(actual, predicted, average="weighted")
    average_precision = precision_score(actual, predicted, average="weighted")
    average_recall = recall_score(actual, predicted, average="weighted")
    
    print('Classwise metrics')
    print("f1 \n",f1)
    print("precision \n",precision)
    print("recall \n",recall)
    
    print('\nOverall metrics')
    print("f1 \n",average_f1)
    print("precision \n",average_precision)
    print("recall \n",average_recall)
    
    return [average_precision,average_recall,average_f1] + precision.tolist() + recall.tolist() + f1.tolist()
    

In [8]:
stacked_labeled_features = create_augmented_features_imbalanced_audio()

  return pitch_tuning(
  return pitch_tuning(


1080


In [9]:
stacked_features_path = '/kaggle/working/stacked_specs_975_norm.pkl'
with open(stacked_features_path, "wb") as file:
    pickle.dump(stacked_labeled_features, file)

In [10]:
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten,  BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras import regularizers
from tensorflow.keras.optimizers import Adam, SGD

from keras import callbacks
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

In [11]:
mpl.rcParams['figure.figsize'] = (12, 10)
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

def plot_metrics(history, max_epochs=80):  # Add max_epochs as a parameter
  metrics = ['loss', 'accuracy']
  for n, metric in enumerate(metrics):
    name = metric.replace("_"," ").capitalize()
    plt.subplot(2,2,n+1)
    plt.plot(history.epoch[:max_epochs], history.history[metric][:max_epochs], color=colors[0], label='Train')  # Limit the data to the first 80 epochs
    plt.plot(history.epoch[:max_epochs], history.history['val_'+metric][:max_epochs],
             color='orange', label='Val')  # Limit the data to the first 80 epochs
    plt.xlabel('Epoch')
    plt.ylabel(name)
    if metric == 'loss':
      plt.ylim([0, plt.ylim()[1]])
    elif metric == 'auc':
      plt.ylim([0.8,1])
    else:
      ylim_max = 1.05  # Adjust the padding value as needed
      plt.ylim([0, ylim_max])
      y_ticks = np.arange(0, ylim_max, 0.2)  # Define y-axis ticks with 0.1 increments
      plt.yticks(y_ticks)
    plt.grid(linestyle='--', linewidth=0.5, color='gray')
    plt.legend()

In [12]:
stacked_spects = read_serialized_file("/kaggle/working/stacked_specs_975_norm.pkl")
feature_data = pd.DataFrame(stacked_spects,columns=['feature','class','file'])

In [13]:
X = np.array(feature_data['feature'].tolist())
y = np.array(feature_data['class'].tolist())
f = np.array(feature_data['file'].tolist())

In [14]:
X_train, X_, y_train, y_ = train_test_split(X,y,test_size=0.2,random_state=42, stratify=y)

X_test, X_validate, y_test, y_validate = train_test_split(X_,y_,test_size=0.5,random_state=42, stratify=y_)

In [15]:
input_shape = (128, 264, 3)

In [16]:
print(np.shape(X_train))
print(np.shape(X_test))
print(np.shape(X_validate))

(864, 128, 264, 3)
(108, 128, 264, 3)
(108, 128, 264, 3)


In [17]:
train_values = [np.count_nonzero(y_train == i) for i in np.unique(y_train)]
print(train_values)

[89, 84, 131, 85, 92, 91, 83, 128, 81]


In [18]:
disease_stats = [89, 84, 131, 85, 92, 91, 83, 128, 81]
min_value = min(disease_stats)
weights = [min_value / value for value in disease_stats]
class_weights = {label: weight for label, weight in enumerate(weights)}
sample_weight=np.array([class_weights[label] for label in y_train])

In [19]:
def create_model():
    model_holdout = Sequential()

    model_holdout.add(Conv2D(32, (3, 3), padding='same', activation='relu', input_shape=input_shape))
    model_holdout.add(BatchNormalization())
    model_holdout.add(MaxPooling2D((2, 2)))
    
    model_holdout.add(Conv2D(32, (3, 3), padding='same', activation='relu'))
#     model_holdout.add(BatchNormalization())
    model_holdout.add(MaxPooling2D((2, 2)))

    model_holdout.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
#     model_holdout.add(BatchNormalization())
    model_holdout.add(MaxPooling2D((2, 2)))
    
    model_holdout.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
#     model_holdout.add(BatchNormalization())
    model_holdout.add(MaxPooling2D((2, 2)))

    model_holdout.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
#     model_holdout.add(BatchNormalization())
    model_holdout.add(MaxPooling2D((2, 2)))

    model_holdout.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
#     model_holdout.add(BatchNormalization())
    model_holdout.add(MaxPooling2D((2, 2)))
    
    model_holdout.add(Conv2D(128, (3, 3), padding='same', activation='relu'))

    model_holdout.add(Conv2D(256, (3, 3), padding='same', activation='relu'))
    
#     model_holdout.add(Dropout(0.25))

    model_holdout.add(GlobalAveragePooling2D())

    model_holdout.add(Dense(9, activation='softmax'))
    model_holdout.compile(optimizer=Adam(learning_rate=1e-4), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model_holdout

In [20]:
model_holdout = create_model()
earlystopping = callbacks.EarlyStopping(monitor ="val_loss", mode ="min", patience = 10, restore_best_weights = True, verbose=1)

# baseline_history = model_holdout.fit(X_train, y_train, batch_size=32, epochs=100, validation_data=(X_validate, y_validate), callbacks =[earlystopping], sample_weight=sample_weight)
baseline_history = model_holdout.fit(X_train, y_train, batch_size=32, epochs=100, validation_data=(X_validate, y_validate), callbacks =[earlystopping], sample_weight=sample_weight)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 45: early stopping


In [21]:
model_holdout.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 128, 264, 32)      896       
                                                                 
 batch_normalization (Batch  (None, 128, 264, 32)      128       
 Normalization)                                                  
                                                                 
 max_pooling2d (MaxPooling2  (None, 64, 132, 32)       0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 64, 132, 32)       9248      
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 32, 66, 32)        0         
 g2D)                                                            
                                                        

In [22]:
print("Evaluation ",model_holdout.evaluate(x=X_test,y=y_test))
print()
accuracies = get_all_accuracies(model_holdout, X_test, y_test)
all_metrics = get_all_metrics(model_holdout, X_test, y_test)

Evaluation  [0.6751536726951599, 0.8425925970077515]

Class 0: Accuracy = 0.45454545454545453
Class 1: Accuracy = 1.0
Class 2: Accuracy = 1.0
Class 3: Accuracy = 0.9
Class 4: Accuracy = 0.36363636363636365
Class 5: Accuracy = 1.0
Class 6: Accuracy = 1.0
Class 7: Accuracy = 0.9375
Class 8: Accuracy = 0.8
Weighted Accuracy =  0.8178245690296607
Classwise metrics
f1 
 [0.55555556 0.88       0.94444444 0.9        0.53333333 0.81481481
 1.         0.85714286 0.88888889]
precision 
 [0.71428571 0.78571429 0.89473684 0.9        1.         0.6875
 1.         0.78947368 1.        ]
recall 
 [0.45454545 1.         1.         0.9        0.36363636 1.
 1.         0.9375     0.8       ]

Overall metrics
f1 
 0.8266617675876935
precision 
 0.8602278265107212
recall 
 0.8425925925925926


In [None]:
train_values = [np.count_nonzero(y_train == i) for i in np.unique(y_train)]
print(train_values)

In [42]:
n_splits = 5

# Initialize a KFold or StratifiedKFold object
kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

trained_models = []
all_accuracies = []
f1_scores = []
precisions = []
recalls = []

i=1
for train_index, val_index in kf.split(X, y):
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]
    
    #calculate sample weights
    train_values = [np.count_nonzero(y_train == i) for i in np.unique(y_train)]
    min_value = min(disease_stats)
    weights = [min_value / value for value in train_values]
    class_weights = {label: weight for label, weight in enumerate(weights)}
    sample_weight=np.array([class_weights[label] for label in y_train])
    
    # Create and compile a new model for each fold
    model = create_model()

    # Train the model on the training data
    model.fit(X_train, y_train, epochs=100, batch_size=32, sample_weight=sample_weight)  # Adjust epochs and batch size as needed

    # Evaluate the model on the validation data
    actual = y_val
    y_pred = model.predict(X_val)
    predicted = np.argmax(y_pred, axis=1)
    accuracy = accuracy_score(actual, predicted)
    
    f1 = f1_score(actual, predicted, average="weighted")
    precision = precision_score(actual, predicted, average="weighted")
    recall = recall_score(actual, predicted, average="weighted")
    
    all_accuracies.append(accuracy)
    trained_models.append(model)
    f1_scores.append(f1)
    precisions.append(precision)
    recalls.append(recall)
    
    print("fold",i,"executed") 
    i+=1

# average_accuracy = np.mean(all_accuracies)
# average_f1_score = np.mean(f1_scores)
# average_precision = np.mean(precisions)
# average_recall = np.mean(recalls)
# print(f'Average Accuracy: {average_accuracy}')
# print(f'Average F1 score: {average_f1_score}')
# print(f'Average Precision: {average_precision}')
# print(f'Average Recall: {average_recall}')

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [43]:
average_accuracy = np.mean(all_accuracies)
average_f1_score = np.mean(f1_scores)
average_precision = np.mean(precisions)
average_recall = np.mean(recalls)
print(f'Average Accuracy: {average_accuracy}')
print(f'Average F1 score: {average_f1_score}')
print(f'Average Precision: {average_precision}')
print(f'Average Recall: {average_recall}')

Average Accuracy: 0.8666666666666668
Average F1 score: 0.8619928089471502
Average Precision: 0.8778750658347552
Average Recall: 0.8666666666666668
