# 1 - Music Classification - Raw Audio only

## 1. Do the necessary imports

In [1]:
import os
import librosa

## 2. Setup the important variables

In [2]:
DATASET_PATH = "Data/genres_original"

music_files, genres = [], []
# genres = sorted(os.listdir(DATASET_PATH))
genres = ["metal", "reggae"]
x, y = [], []

for genre in genres:
    genre_path = os.path.join(DATASET_PATH, genre)
    if os.path.isdir(genre_path):
        for file in os.listdir(genre_path):
            if file.endswith(".wav"):
                file_path = os.path.join(genre_path, file)
                try:
                    # Load audio file
                    signal, sr = librosa.load(file_path, sr=22050)
                    x.append(signal)
                    y.append(genres.index(genre))  # Store the genre's index instead of its name
                except Exception as e:
                    print(file_path)
                    print(e)
                    continue
    
len(x[0]), y[0]

(661504, 0)

## 3 - Preprocessing the Data

In [3]:
import numpy as np
from keras.utils import to_categorical
from keras.preprocessing.sequence import pad_sequences

# Find the maximum length of the audio signals
max_length = max([len(signal) for signal in x])

max_length = 22050 * 3

# Pad the sequences to make sure all audio signals have the same length
x_padded = pad_sequences(x, maxlen=max_length, dtype='float32', padding='post')

# Normalize the audio signals
x_padded = x_padded / np.max(np.abs(x_padded), axis=1, keepdims=True)

# Convert the genre labels to one-hot encoding
y_encoded = to_categorical(y, num_classes=len(genres))

# Convert x_padded to numpy array
x_padded = np.array(x_padded)

# Check the shapes of your data
print(f"Shape of x_padded: {x_padded.shape}")
print(f"Shape of y_encoded: {y_encoded.shape}")

Shape of x_padded: (200, 66150)
Shape of y_encoded: (200, 2)


## 4 - Split the Dataset

In [4]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x_padded, y_encoded, test_size=0.2, random_state=42)

print(f"Shape of x_train: {x_train.shape}")
print(f"Shape of x_test: {x_test.shape}")
print(f"Shape of y_train: {y_train.shape}")
print(f"Shape of y_test: {y_test.shape}")

len(x_train[0])

Shape of x_train: (160, 66150)
Shape of x_test: (40, 66150)
Shape of y_train: (160, 2)
Shape of y_test: (40, 2)


66150

## 5 - sklearn for testing the data

In [5]:
from sklearn.svm import SVC

x_train_flattened = x_train.reshape(x_train.shape[0], -1)
x_test_flattened = x_test.reshape(x_test.shape[0], -1)
y_train_flattened = np.argmax(y_train, axis=1)
y_test_flattened = np.argmax(y_test, axis=1)

model = SVC()
model.fit(x_train_flattened, y_train_flattened)

In [6]:
y_pred = model.predict(x_test_flattened)

In [7]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test_flattened, y_pred)

accuracy

0.725

In [8]:
from keras.models import Sequential
from keras import layers
from keras.optimizers import Adam

# Define the model architecture
model = Sequential([
    # Input layer: since the data is flattened, the input shape will be the number of features
    layers.Dense(512, activation='relu', input_shape=(x_train_flattened.shape[1],)),
    layers.Dropout(0.5),  # Add dropout for regularization

    # Hidden layers
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.5),  # Dropout to reduce overfitting

    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),  # Dropout again

    # Output layer: number of genres
    layers.Dense(len(genres), activation='softmax')
])

# Compile the model
model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Summarize the model architecture
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [9]:
model.fit(x_train_flattened, y_train_flattened, validation_data=(x_test_flattened, y_test_flattened), epochs=25, batch_size=32)

Epoch 1/25
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 236ms/step - accuracy: 0.4646 - loss: 0.9162 - val_accuracy: 0.4250 - val_loss: 0.7488
Epoch 2/25
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 191ms/step - accuracy: 0.5378 - loss: 1.8169 - val_accuracy: 0.4750 - val_loss: 0.7533
Epoch 3/25
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 200ms/step - accuracy: 0.7240 - loss: 1.7139 - val_accuracy: 0.4000 - val_loss: 0.7906
Epoch 4/25
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 198ms/step - accuracy: 0.7720 - loss: 1.6237 - val_accuracy: 0.4750 - val_loss: 0.8389
Epoch 5/25
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 188ms/step - accuracy: 0.7889 - loss: 1.7469 - val_accuracy: 0.5500 - val_loss: 0.8850
Epoch 6/25
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 183ms/step - accuracy: 0.8133 - loss: 1.3630 - val_accuracy: 0.5500 - val_loss: 0.8995
Epoch 7/25
[1m5/5[0m [32m━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x348473f10>

In [10]:
model.evaluate(x_test_flattened, y_test_flattened)

y_pred = model.predict(x_test_flattened)
y_pred = np.argmax(y_pred, axis=1)

accuracy = accuracy_score(y_test_flattened, y_pred)

accuracy

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - accuracy: 0.5437 - loss: 1.8970
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step


0.55