<a href="https://colab.research.google.com/github/Dvitee/internshipTask/blob/main/mgc.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Music Genre Classification using CNN

In [1]:
!pip install librosa --quiet
import os
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from google.colab import drive


In [2]:
drive.mount('/content/drive')
dataset_path = '/content/drive/MyDrive/Data/genres_original'

Mounted at /content/drive


In [3]:
def extract_features(dataset_path, max_pad_len=174):
    X, y = [], []
    genres = os.listdir(dataset_path)
    for genre in genres:
        genre_dir = os.path.join(dataset_path, genre)
        if not os.path.isdir(genre_dir):
            continue
        print(f"Processing: {genre}")
        for file in os.listdir(genre_dir):
            file_path = os.path.join(genre_dir, file)
            try:
                audio, sr = librosa.load(file_path, duration=30)
                mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
                if mfcc.shape[1] < max_pad_len:
                    pad_width = max_pad_len - mfcc.shape[1]
                    mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
                else:
                    mfcc = mfcc[:, :max_pad_len]
                X.append(mfcc)
                y.append(genre)
            except Exception as e:
                print(f"⚠️ Skipped {file_path}: {e}")
    return np.array(X), np.array(y)

X, y = extract_features(dataset_path)
print("✅ Features shape:", X.shape)
print("✅ Labels shape:", y.shape)


Processing: blues
Processing: classical
Processing: country
Processing: disco
Processing: hiphop
Processing: jazz


  audio, sr = librosa.load(file_path, duration=30)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


⚠️ Skipped /content/drive/MyDrive/Data/genres_original/jazz/jazz.00054.wav: 
Processing: metal
Processing: pop
Processing: reggae
Processing: rock
✅ Features shape: (999, 40, 174)
✅ Labels shape: (999,)


In [4]:
X = X[..., np.newaxis]  # Add channel dimension
le = LabelEncoder()
y_encoded = to_categorical(le.fit_transform(y))
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)


In [6]:
model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=X.shape[1:]),
    MaxPooling2D((2,2)),
    Dropout(0.3),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D((2,2)),
    Dropout(0.3),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(10, activation='softmax')  # 10 genres
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))
model.save("music_genre_model.keras")

Epoch 1/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 381ms/step - accuracy: 0.1455 - loss: 20.3223 - val_accuracy: 0.0750 - val_loss: 2.3025
Epoch 2/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 298ms/step - accuracy: 0.1471 - loss: 2.2782 - val_accuracy: 0.0900 - val_loss: 2.3026
Epoch 3/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 283ms/step - accuracy: 0.1848 - loss: 2.2254 - val_accuracy: 0.1550 - val_loss: 2.2312
Epoch 4/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 343ms/step - accuracy: 0.2089 - loss: 2.1934 - val_accuracy: 0.1250 - val_loss: 2.2819
Epoch 5/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 377ms/step - accuracy: 0.2568 - loss: 2.1486 - val_accuracy: 0.1550 - val_loss: 2.2459
Epoch 6/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 293ms/step - accuracy: 0.2472 - loss: 2.1149 - val_accuracy: 0.2550 - val_loss: 2.1139
Epoch 7/20
[1m25/25[0

In [7]:
from google.colab import files

def extract_mfcc(file_path, max_pad_len=174):
    try:
        audio, sample_rate = librosa.load(file_path, duration=30)
        mfcc = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        if mfcc.shape[1] < max_pad_len:
            pad_width = max_pad_len - mfcc.shape[1]
            mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
        else:
            mfcc = mfcc[:, :max_pad_len]
        return mfcc
    except Exception as e:
        print("❌ Error loading audio:", e)
        return None

def predict_genre(file_path, model, le):
    mfcc = extract_mfcc(file_path)
    if mfcc is not None:
        mfcc = mfcc[np.newaxis, ..., np.newaxis]
        prediction = model.predict(mfcc)
        genre_index = np.argmax(prediction)
        confidence = prediction[0][genre_index]
        genre = le.inverse_transform([genre_index])[0]
        return genre, confidence
    else:
        return None, 0.0

# Upload and predict
uploaded = files.upload()
file_path = list(uploaded.keys())[0]
genre, confidence = predict_genre(file_path, model, le)

if genre:
    print(f"🎶 Predicted Genre: {genre} ({confidence * 100:.2f}% confidence)")
else:
    print("❌ Prediction failed.")


Saving Selena Gomez - Feel Me - (SongsLover.com).mp3 to Selena Gomez - Feel Me - (SongsLover.com).mp3
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 108ms/step
🎶 Predicted Genre: hiphop (100.00% confidence)


In [8]:
from google.colab import files

def extract_mfcc(file_path, max_pad_len=174):
    try:
        audio, sample_rate = librosa.load(file_path, duration=30)
        mfcc = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        if mfcc.shape[1] < max_pad_len:
            pad_width = max_pad_len - mfcc.shape[1]
            mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
        else:
            mfcc = mfcc[:, :max_pad_len]
        return mfcc
    except Exception as e:
        print("❌ Error loading audio:", e)
        return None

def predict_genre(file_path, model, le):
    mfcc = extract_mfcc(file_path)
    if mfcc is not None:
        mfcc = mfcc[np.newaxis, ..., np.newaxis]
        prediction = model.predict(mfcc)
        genre_index = np.argmax(prediction)
        confidence = prediction[0][genre_index]
        genre = le.inverse_transform([genre_index])[0]
        return genre, confidence
    else:
        return None, 0.0

# Upload and predict
uploaded = files.upload()
file_path = list(uploaded.keys())[0]
genre, confidence = predict_genre(file_path, model, le)

if genre:
    print(f"🎶 Predicted Genre: {genre} ({confidence * 100:.2f}% confidence)")
else:
    print("❌ Prediction failed.")


Saving Lagdi hain thaai _ Kangana Ranaut _ Twirlwithjazz _ bridesmaids (320 kbps).mp3 to Lagdi hain thaai _ Kangana Ranaut _ Twirlwithjazz _ bridesmaids (320 kbps).mp3
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
🎶 Predicted Genre: classical (87.08% confidence)


In [9]:
from google.colab import files

def extract_mfcc(file_path, max_pad_len=174):
    try:
        audio, sample_rate = librosa.load(file_path, duration=30)
        mfcc = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        if mfcc.shape[1] < max_pad_len:
            pad_width = max_pad_len - mfcc.shape[1]
            mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
        else:
            mfcc = mfcc[:, :max_pad_len]
        return mfcc
    except Exception as e:
        print("❌ Error loading audio:", e)
        return None

def predict_genre(file_path, model, le):
    mfcc = extract_mfcc(file_path)
    if mfcc is not None:
        mfcc = mfcc[np.newaxis, ..., np.newaxis]
        prediction = model.predict(mfcc)
        genre_index = np.argmax(prediction)
        confidence = prediction[0][genre_index]
        genre = le.inverse_transform([genre_index])[0]
        return genre, confidence
    else:
        return None, 0.0

# Upload and predict
uploaded = files.upload()
file_path = list(uploaded.keys())[0]
genre, confidence = predict_genre(file_path, model, le)

if genre:
    print(f"🎶 Predicted Genre: {genre} ({confidence * 100:.2f}% confidence)")
else:
    print("❌ Prediction failed.")


Saving Sweetheart _ sangeet choreo _ wedding choreography #easydancesteps  #transition #bollywoodmusic (320 kbps).mp3 to Sweetheart _ sangeet choreo _ wedding choreography #easydancesteps  #transition #bollywoodmusic (320 kbps).mp3
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
🎶 Predicted Genre: metal (37.02% confidence)


In [10]:
from google.colab import files

def extract_mfcc(file_path, max_pad_len=174):
    try:
        audio, sample_rate = librosa.load(file_path, duration=30)
        mfcc = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        if mfcc.shape[1] < max_pad_len:
            pad_width = max_pad_len - mfcc.shape[1]
            mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
        else:
            mfcc = mfcc[:, :max_pad_len]
        return mfcc
    except Exception as e:
        print("❌ Error loading audio:", e)
        return None

def predict_genre(file_path, model, le):
    mfcc = extract_mfcc(file_path)
    if mfcc is not None:
        mfcc = mfcc[np.newaxis, ..., np.newaxis]
        prediction = model.predict(mfcc)
        genre_index = np.argmax(prediction)
        confidence = prediction[0][genre_index]
        genre = le.inverse_transform([genre_index])[0]
        return genre, confidence
    else:
        return None, 0.0

# Upload and predict
uploaded = files.upload()
file_path = list(uploaded.keys())[0]
genre, confidence = predict_genre(file_path, model, le)

if genre:
    print(f"🎶 Predicted Genre: {genre} ({confidence * 100:.2f}% confidence)")
else:
    print("❌ Prediction failed.")


Saving Dua Lipa - Levitating.mp3 to Dua Lipa - Levitating.mp3
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
🎶 Predicted Genre: hiphop (99.86% confidence)
