In [4]:
import numpy as np
import librosa
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow import keras
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, LeakyReLU
from keras.utils import to_categorical
from scipy.stats import loguniform, randint
import os 

# Function to extract features from an audio file
def extract_features(file_name):
    audio, sample_rate = librosa.load(file_name)
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs = np.mean(mfccs.T, axis=0)
    return mfccs

# Load and process your dataset
# X should be an array of feature vectors and y should be the corresponding labels
X = []  # Feature vectors
y = []  # Labels

# Example: Assuming you have a list of audio files and their corresponding labels
audio_files = []  # Replace with your actual file names
labels = []  # Replace with your actual labels

path=r'C:\Users\praha\Desktop\DIRECTED_STUDIES_PROJECT\archive\Data\genres_original'
for folder in os.listdir(path):
    folder_path = os.path.join(path,folder)
    for files in os.listdir(folder_path):
        file_path = os.path.join(folder_path,files)
        audio_files.append(file_path.replace('\\','/'))
        labels.append(folder)



In [12]:
l = LabelEncoder()
labels_encoded = l.fit_transform(labels)

for file,label in zip(audio_files,labels_encoded):
    try:
        features = extract_features(file)
        X.append(features)
        y.append(label)
    except:
        pass

X = np.array(X)
y = np.array(y)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape data for CNN
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1, 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1, 1)

# Convert labels to categorical
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

# Function to create model
def create_model(learning_rate=0.01, dropout_rate=0.3, filters=32, kernel_size=3, pool_size=2):
    model = Sequential()
    model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(X_train.shape[1], 1, 1), padding='same'))
    model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
    model.add(Flatten())
    model.add(Dense(128))
    model.add(LeakyReLU(0.01))
    model.add(Dense(64))
    model.add(LeakyReLU(0.01))
    model.add(Dense(y_train.shape[1], activation='softmax'))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

model = create_model()

# Define the parameter grid
param_dist_cnn = {
    'learning_rate': loguniform(1e-4, 1e-1),
    'dropout_rate': loguniform(0.2, 0.5),
    'filters': randint(16, 64),
    'kernel_size': randint(2, 5),
    'pool_size': randint(2, 3),
    'epochs': randint(50, 200),
    'batch_size': randint(10, 100)
}

model.fit(X_train, y_train,epochs=50,validation_data=[X_test,y_test])

  audio, sample_rate = librosa.load(file_name)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 23ms/step - accuracy: 0.1592 - loss: 3.3446 - val_accuracy: 0.3150 - val_loss: 1.9820
Epoch 2/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.3817 - loss: 1.7433 - val_accuracy: 0.4000 - val_loss: 1.6544
Epoch 3/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.4453 - loss: 1.4921 - val_accuracy: 0.4700 - val_loss: 1.4655
Epoch 4/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5227 - loss: 1.3429 - val_accuracy: 0.4400 - val_loss: 1.8172
Epoch 5/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5658 - loss: 1.3386 - val_accuracy: 0.5800 - val_loss: 1.2800
Epoch 6/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6158 - loss: 1.1308 - val_accuracy: 0.5050 - val_loss: 1.3792
Epoch 7/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1d229c9c8b0>

In [13]:
# Get the best model
y_pred_cnn = model.predict(X_test)
y_pred = np.argmax(y_pred_cnn,axis=1)
y_test_ = np.argmax(y_test,axis=1)

print(classification_report(y_pred,y_test_))

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
              precision    recall  f1-score   support

           0       0.76      0.64      0.70        25
           1       0.92      0.65      0.76        17
           2       0.67      0.59      0.63        27
           3       0.45      0.62      0.53        16
           4       0.80      0.60      0.69        20
           5       0.74      0.83      0.78        24
           6       0.89      0.84      0.86        19
           7       0.84      0.70      0.76        23
           8       0.64      0.74      0.68        19
           9       0.35      0.70      0.47        10

    accuracy                           0.69       200
   macro avg       0.71      0.69      0.69       200
weighted avg       0.73      0.69      0.70       200



In [14]:
model.save(r'C:\Users\praha\Desktop\DIRECTED_STUDIES_PROJECT\MODEL\model.h5')



In [25]:
import pickle 
with open(r'C:\Users\praha\Desktop\DIRECTED_STUDIES_PROJECT\MODEL\le.pkl',"wb") as f:
    pickle.dump(l,f)