<a href="https://colab.research.google.com/github/Franelas5/Music-Gerre-Classification/blob/main/Music_Genre_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Step 1: Set Up Kaggle API
from google.colab import files
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, BatchNormalization, Dropout, Dense, Input
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
import random

# Upload kaggle.json for API access
files.upload()

# Configure Kaggle API
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Download GTZAN dataset from Kaggle
!kaggle datasets download -d 'achgls/gtzan-music-genre' --unzip

# Step 2: Find Audio Directory
audio_fpath = ""
for root, dirs, files in os.walk("/content"):
    if any(file.endswith(".wav") for file in files):
        audio_fpath = root
        break

if not audio_fpath:
    raise FileNotFoundError("No directory containing .wav files was found.")

print("Audio files directory:", audio_fpath)

# Step 3: Generate Spectrograms
spectrogram_dir = "/content/spectrograms"
os.makedirs(spectrogram_dir, exist_ok=True)

def save_spectrogram(y, sr, save_path):
    plt.figure(figsize=(2, 2))  # Adjust size if needed
    X = librosa.stft(y)
    Xdb = librosa.amplitude_to_db(abs(X))
    librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz')
    plt.axis('off')  # Clean plot
    plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
    plt.close()

# Iterate through audio files, creating spectrograms by genre
for genre_folder in os.listdir(audio_fpath):
    genre_path = os.path.join(audio_fpath, genre_folder)
    if os.path.isdir(genre_path):
        genre_spectrogram_dir = os.path.join(spectrogram_dir, genre_folder)
        os.makedirs(genre_spectrogram_dir, exist_ok=True)

        for audio_file in os.listdir(genre_path):
            if audio_file.endswith(".wav"):
                file_path = os.path.join(genre_path, audio_file)
                y, sr = librosa.load(file_path, sr=44100)
                save_path = os.path.join(genre_spectrogram_dir, f"{audio_file.split('.')[0]}.png")
                save_spectrogram(y, sr, save_path)

print("Spectrograms generated and saved by genre.")

# Step 4: Train the CNN Model
IMG_SIZE = 224
datagen = ImageDataGenerator(
    validation_split=0.2,
    rescale=1./255,
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_data = datagen.flow_from_directory(
    spectrogram_dir,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=64,
    class_mode='categorical',
    subset='training'
)

val_datagen = ImageDataGenerator(
    validation_split=0.2,
    rescale=1./255
)

val_data = val_datagen.flow_from_directory(
    spectrogram_dir,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=64,
    class_mode='categorical',
    subset='validation'
)

def create_model(input_shape, num_classes):
    base_model = ResNet50V2(include_top=False, weights='imagenet', input_shape=input_shape)
    base_model.trainable = False  # Freeze base model weights
    inputs = Input(shape=input_shape)
    x = base_model(inputs)
    x = GlobalAveragePooling2D()(x)
    x = BatchNormalization()(x)
    x = Dense(512, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    x = Dense(256, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)
    outputs = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs, outputs)
    return model

model = create_model((IMG_SIZE, IMG_SIZE, 3), train_data.num_classes)

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

callbacks = [
    ModelCheckpoint('best_model.keras', monitor='val_accuracy', mode='max', save_best_only=True, verbose=1),
    ReduceLROnPlateau(monitor='val_accuracy', factor=0.2, patience=5, min_lr=1e-6, verbose=1)
]

history = model.fit(train_data, validation_data=val_data, epochs=40, callbacks=callbacks)

# Step 5: Predict the Genre of New Audio Files
def predict_genre(file_path, model, train_data):
    y, sr = librosa.load(file_path, sr=44100)
    temp_path = "/content/temp_spectrogram.png"
    save_spectrogram(y, sr, temp_path)

    img = tf.keras.preprocessing.image.load_img(temp_path, target_size=(IMG_SIZE, IMG_SIZE))
    img_array = tf.keras.preprocessing.image.img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)

    predictions = model.predict(img_array)
    class_indices = train_data.class_indices
    class_indices = {v: k for k, v in class_indices.items()}
    predicted_genre = class_indices[np.argmax(predictions)]
    return predicted_genre

# Test predictions on random files
audio_files = [os.path.join(root, file) for root, _, files in os.walk(audio_fpath) for file in files if file.endswith(".wav")]

if audio_files:
    for _ in range(40):
        test_file_path = random.choice(audio_files)
        print(f"\nTest file: {test_file_path}")
        print("Predicted Genre:", predict_genre(test_file_path, model, train_data))
else:
    print("No audio files found.")







Saving kaggle.json to kaggle.json
Dataset URL: https://www.kaggle.com/datasets/achgls/gtzan-music-genre
License(s): GPL-2.0
Downloading gtzan-music-genre.zip to /content
 99% 1.57G/1.58G [00:19<00:00, 45.7MB/s]
100% 1.58G/1.58G [00:19<00:00, 88.7MB/s]
Audio files directory: /content/audio_data/pop
Spectrograms generated and saved by genre.
Found 800 images belonging to 10 classes.
Found 200 images belonging to 10 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50v2_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94668760/94668760[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/40


  self._warn_if_super_not_called()


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10s/step - accuracy: 0.1796 - loss: 2.8212 
Epoch 1: val_accuracy improved from -inf to 0.29500, saving model to best_model.keras
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m192s[0m 13s/step - accuracy: 0.1856 - loss: 2.7969 - val_accuracy: 0.2950 - val_loss: 1.9744 - learning_rate: 0.0010
Epoch 2/40
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10s/step - accuracy: 0.4394 - loss: 1.7830 
Epoch 2: val_accuracy improved from 0.29500 to 0.33000, saving model to best_model.keras
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 14s/step - accuracy: 0.4411 - loss: 1.7767 - val_accuracy: 0.3300 - val_loss: 1.8522 - learning_rate: 0.0010
Epoch 3/40
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10s/step - accuracy: 0.4634 - loss: 1.5732 
Epoch 3: val_accuracy improved from 0.33000 to 0.36000, saving model to best_model.keras
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━