<a href="https://colab.research.google.com/github/Franelas5/Music-Gerre-Classification/blob/main/Music_Genre_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Step 1: Set Up Kaggle API
from google.colab import files
files.upload()

# Move kaggle.json to the proper directory and set permissions
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Download the GTZAN dataset from Kaggle
!kaggle datasets download -d 'achgls/gtzan-music-genre' --unzip

# Step 2: Dynamically find the audio directory
import os

# Recursively search for the directory containing .wav files
audio_fpath = ""
for root, dirs, files in os.walk("/content"):
    if any(file.endswith(".wav") for file in files):
        audio_fpath = root
        break

if not audio_fpath:
    raise FileNotFoundError("No directory containing .wav files was found.")

print("Audio files directory:", audio_fpath)

# Step 3: Create spectrograms and save them by genre
import matplotlib.pyplot as plt
import librosa
import librosa.display
import numpy as np

spectrogram_dir = "/content/spectrograms"
os.makedirs(spectrogram_dir, exist_ok=True)

# Iterate through audio files, creating and saving spectrograms
for genre_folder in os.listdir(audio_fpath):
    genre_path = os.path.join(audio_fpath, genre_folder)
    if os.path.isdir(genre_path):  # Only process directories
        genre_spectrogram_dir = os.path.join(spectrogram_dir, genre_folder)
        os.makedirs(genre_spectrogram_dir, exist_ok=True)

        for audio_file in os.listdir(genre_path):
            if audio_file.endswith(".wav"):
                file_path = os.path.join(genre_path, audio_file)
                y, sr = librosa.load(file_path, sr=44100)

                # Generate spectrogram
                plt.figure(figsize=(2, 2))  # Small size for efficient storage
                X = librosa.stft(y)
                Xdb = librosa.amplitude_to_db(abs(X))
                librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz')
                plt.axis('off')  # No axis for clean image

                # Save the spectrogram image
                save_path = os.path.join(genre_spectrogram_dir, f"{audio_file.split('.')[0]}.png")
                plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
                plt.close()

print("Spectrograms generated and saved by genre.")

# Import necessary libraries
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (Conv2D, MaxPooling2D, GlobalAveragePooling2D,
                                     BatchNormalization, Dropout, Dense, Input)
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

# Enhanced data augmentation for better generalization
datagen = ImageDataGenerator(
    validation_split=0.2,
    rescale=1./255,
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Increased image size for better feature detection
IMG_SIZE = 224

# Training and validation generators
train_data = datagen.flow_from_directory(
    spectrogram_dir,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=64,
    class_mode='categorical',
    subset='training'
)

val_datagen = ImageDataGenerator(
    validation_split=0.2,
    rescale=1./255
)

val_data = val_datagen.flow_from_directory(
    spectrogram_dir,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=64,
    class_mode='categorical',
    subset='validation'
)

def create_model(input_shape, num_classes):
    # Use ResNet50V2 as base model
    base_model = ResNet50V2(
        include_top=False,
        weights='imagenet',
        input_shape=input_shape
    )

    # Freeze the base model layers
    base_model.trainable = False

    # Create new model on top
    inputs = Input(shape=input_shape)
    x = base_model(inputs)
    x = GlobalAveragePooling2D()(x)
    x = BatchNormalization()(x)
    x = Dense(512, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    x = Dense(256, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs, outputs)
    return model

# Create model
model = create_model((IMG_SIZE, IMG_SIZE, 3), train_data.num_classes)

# Simplified optimizer setup with fixed initial learning rate
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

model.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Callbacks for better training
callbacks = [
    ModelCheckpoint(
        'best_model.keras',
        monitor='val_accuracy',
        mode='max',
        save_best_only=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_accuracy',
        factor=0.2,
        patience=5,
        min_lr=1e-6,
        verbose=1
    )
]

# Train with 30 epochs
history = model.fit(
    train_data,
    validation_data=val_data,
    epochs=40,
    callbacks=callbacks
)

# Evaluate the model
loss, accuracy = model.evaluate(val_data)
print(f"Final Validation Accuracy: {accuracy * 100:.2f}%")

# Save the final model
model.save("genre_classification_model_improved.keras")

# Prediction function
def predict_genre(img_path):
    img = tf.keras.preprocessing.image.load_img(
        img_path,
        target_size=(IMG_SIZE, IMG_SIZE)
    )
    img_array = tf.keras.preprocessing.image.img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)

    predictions = model.predict(img_array)
    predicted_genre = train_data.class_indices
    predicted_genre = {v: k for k, v in predicted_genre.items()}
    return predicted_genre[np.argmax(predictions)]







Saving kaggle.json to kaggle.json
Dataset URL: https://www.kaggle.com/datasets/achgls/gtzan-music-genre
License(s): GPL-2.0
Downloading gtzan-music-genre.zip to /content
 99% 1.56G/1.58G [00:16<00:00, 87.4MB/s]
100% 1.58G/1.58G [00:16<00:00, 102MB/s] 
Audio files directory: /content/audio_data/country
Spectrograms generated and saved by genre.
Found 800 images belonging to 10 classes.
Found 200 images belonging to 10 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50v2_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94668760/94668760[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/60


  self._warn_if_super_not_called()


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10s/step - accuracy: 0.1712 - loss: 2.7206
Epoch 1: val_accuracy improved from -inf to 0.28500, saving model to best_model.keras
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m184s[0m 13s/step - accuracy: 0.1774 - loss: 2.6967 - val_accuracy: 0.2850 - val_loss: 1.9572 - learning_rate: 0.0010
Epoch 2/60
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10s/step - accuracy: 0.4180 - loss: 1.8887 
Epoch 2: val_accuracy did not improve from 0.28500
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 13s/step - accuracy: 0.4197 - loss: 1.8802 - val_accuracy: 0.2850 - val_loss: 1.9026 - learning_rate: 0.0010
Epoch 3/60
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10s/step - accuracy: 0.4972 - loss: 1.5777 
Epoch 3: val_accuracy did not improve from 0.28500
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m179s[0m 13s/step - accuracy: 0.4971 - loss: 1.5763 - val_ac