In [1]:
import numpy as np
import librosa
from tensorflow.keras.models import load_model
import os

# Load the trained CNN model
model = load_model("genre_cnn_model.h5")

# Define the genre labels (same order used during training)
GENRES = ['blues', 'classical', 'country', 'disco', 'hiphop',
          'jazz', 'metal', 'pop', 'reggae', 'rock']

# Function to extract MFCC features from audio
def extract_mfcc(file_path, max_pad_len=174):
    y, sr = librosa.load(file_path, duration=30)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)

    # Padding if MFCC shape is short
    if mfcc.shape[1] < max_pad_len:
        pad_width = max_pad_len - mfcc.shape[1]
        mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
    else:
        mfcc = mfcc[:, :max_pad_len]

    mfcc = np.mean(mfcc.T, axis=0)
    return mfcc.reshape(1, 40, 1)

# ------------------------------------------
# MAIN SCRIPT: Upload + Predict
# ------------------------------------------

file_path = input("📂 Enter path to your audio (.wav) file: ")

if not os.path.exists(file_path):
    print("❌ File not found. Please check the path.")
else:
    try:
        features = extract_mfcc(file_path)
        prediction = model.predict(features)
        predicted_index = np.argmax(prediction)
        predicted_genre = GENRES[predicted_index]

        print(f"\n🎵 Predicted Genre: {predicted_genre}")
    except Exception as e:
        print("❌ Error during prediction:", e)




FileNotFoundError: [Errno 2] Unable to synchronously open file (unable to open file: name = 'genre_cnn_model.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [2]:
import numpy as np
import os
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# ✅ Load trained CNN model
model = load_model("genre_image_cnn_model.h5")

# ✅ Genre labels (same order as training)
GENRES = ['blues', 'classical', 'country', 'disco', 'hiphop',
          'jazz', 'metal', 'pop', 'reggae', 'rock']

# ✅ Image size used during training
IMAGE_SIZE = (128, 128)

# ✅ Function to predict image genre
def predict_genre_from_image(img_path):
    if not os.path.exists(img_path):
        print("❌ File not found:", img_path)
        return

    try:
        # Load and preprocess image
        img = load_img(img_path, target_size=IMAGE_SIZE)
        img_array = img_to_array(img) / 255.0  # Normalize
        img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension

        # Predict
        prediction = model.predict(img_array)
        predicted_index = np.argmax(prediction)
        predicted_genre = GENRES[predicted_index]

        print(f"\n🖼️ File: {os.path.basename(img_path)}")
        print(f"🎵 Predicted Genre: {predicted_genre}")

    except Exception as e:
        print("❌ Error during prediction:", e)

# -------------------------------------
# ✅ Main script
# -------------------------------------
img_path = input("📂 Enter path to spectrogram image (.png or .jpg): ")
predict_genre_from_image(img_path)


FileNotFoundError: [Errno 2] Unable to synchronously open file (unable to open file: name = 'genre_image_cnn_model.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [4]:
import numpy as np
import os
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# ✅ Update the path to your saved model
MODEL_PATH = r"C:\Users\J.TINCKY PRIYA\Downloads\genre_image_cnn_model.h5"

# ✅ Load the CNN model (with error handling)
if not os.path.exists(MODEL_PATH):
    raise FileNotFoundError(f"❌ Model file not found at: {MODEL_PATH}")
model = load_model(MODEL_PATH)
print("✅ Model loaded successfully.")

# ✅ Genre labels used during training (must match training order)
GENRES = ['blues', 'classical', 'country', 'disco', 'hiphop',
          'jazz', 'metal', 'pop', 'reggae', 'rock']

# ✅ Image size used during training
IMAGE_SIZE = (128, 128)

# ✅ Function to predict genre from image
def predict_genre_from_image(img_path):
    if not os.path.exists("C:/Users/J.TINCKY PRIYA/Downloads\Data.zip"):
        print(f"❌ Image not found at: {"C:/Users/J.TINCKY PRIYA/Downloads/Data.zip"}")
        return

    try:
        # Load and preprocess image
        img = load_img(img_path, target_size=IMAGE_SIZE)
        img_array = img_to_array(img) / 255.0
        img_array = np.expand_dims(img_array, axis=0)

        # Predict
        prediction = model.predict(img_array)
        predicted_index = np.argmax(prediction)
        predicted_genre = GENRES[predicted_index]

        print(f"\n🖼️ File: {os.path.basename(img_path)}")
        print(f"🎵 Predicted Genre: {predicted_genre}")

    except Exception as e:
        print("❌ Error during prediction:", e)

# ✅ MAIN: Ask for image path and predict
img_path = input("📂 Enter full path to spectrogram image (.png or .jpg): ").strip()
predict_genre_from_image(img_path)


  if not os.path.exists("C:/Users/J.TINCKY PRIYA/Downloads\Data.zip"):
  if not os.path.exists("C:/Users/J.TINCKY PRIYA/Downloads\Data.zip"):


FileNotFoundError: ❌ Model file not found at: C:\Users\J.TINCKY PRIYA\Downloads\genre_image_cnn_model.h5

In [5]:
import os
import librosa
import numpy as np
from sklearn.preprocessing import LabelEncoder

# Path to the extracted GTZAN dataset (update this if needed)
DATASET_PATH = "genres/"  # Example: "C:/Users/YourName/Downloads/genres/"

# Genre labels (these should match the folder names inside the dataset)
GENRES = ['blues', 'classical', 'country', 'disco', 'hiphop',
          'jazz', 'metal', 'pop', 'reggae', 'rock']

# Initialize lists to hold feature vectors and genre labels
features = []
labels = []

# Loop through each genre folder
print("⏳ Loading and processing audio files...")
for genre in GENRES:
    genre_path = os.path.join(DATASET_PATH, genre)
    for filename in os.listdir(genre_path):
        if filename.endswith(".wav"):
            file_path = os.path.join(genre_path, filename)
            try:
                # Load audio file (cut to 30 seconds)
                y, sr = librosa.load(file_path, duration=30)
                
                # Extract MFCCs (13 coefficients)
                mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
                
                # Take the mean across time for each MFCC
                mfcc_mean = np.mean(mfcc.T, axis=0)
                
                features.append(mfcc_mean)
                labels.append(genre)
            except Exception as e:
                print(f"Error processing {file_path}: {e}")

print("✅ Finished loading files.")
print("📊 Total files loaded:", len(features))

# Convert features and labels to numpy arrays
X = np.array(features)
y = np.array(labels)

# Encode the string labels into integers
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Display sample output
print("\n🎵 Encoded labels (first 10):", y_encoded[:10])
print("🎼 Genre mapping:")
for idx, genre in enumerate(label_encoder.classes_):
    print(f"{idx}: {genre}")


⏳ Loading and processing audio files...


FileNotFoundError: [WinError 3] The system cannot find the path specified: 'genres/blues'

In [6]:
import zipfile
import os

# Set the zip file path and extraction folder
zip_file_path = r"C:\Users\J.TINCKY PRIYA\Downloads\Data (2).zip"  # Use raw string for Windows path
extract_to = "genres"  # Folder to extract into (relative to your current working directory)

# Make sure the path exists
if not os.path.exists(zip_file_path):
    raise FileNotFoundError(f"❌ ZIP file not found at: {zip_file_path}")

# Extract the ZIP file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to)

print(f"✅ ZIP file extracted successfully to folder: {extract_to}")


✅ ZIP file extracted successfully to folder: genres


In [8]:
import os
print("📂 Extracted folder full path:", os.path.abspath("genres"))


📂 Extracted folder full path: c:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres


In [9]:
import os
import librosa
import numpy as np
import json

# Set the path to your dataset
DATASET_PATH = r"C:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres"
JSON_PATH = "data_mfcc.json"  # to save features (optional)

# Feature extraction settings
SAMPLE_RATE = 22050
DURATION = 30  # GTZAN clips are 30s
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION
NUM_SEGMENTS = 10  # segmenting each clip to create more training samples

def save_mfcc(dataset_path, json_path, num_mfcc=13, n_fft=2048, hop_length=512, num_segments=10):
    data = {
        "mapping": [],
        "mfcc": [],
        "labels": []
    }

    num_samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)
    expected_num_mfcc_vectors_per_segment = math.ceil(num_samples_per_segment / hop_length)

    # Loop through all genre folders
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):

        if dirpath != dataset_path:
            # Genre label
            label = os.path.basename(dirpath)
            data["mapping"].append(label)
            print(f"\nProcessing: {label}")

            # Process each audio file in the genre folder
            for f in filenames:
                file_path = os.path.join(dirpath, f)
                signal, sr = librosa.load(file_path, sr=SAMPLE_RATE)

                # Segment the file
                for s in range(num_segments):
                    start_sample = num_samples_per_segment * s
                    end_sample = start_sample + num_samples_per_segment

                    mfcc = librosa.feature.mfcc(
                        y=signal[start_sample:end_sample],
                        sr=sr,
                        n_mfcc=num_mfcc,
                        n_fft=n_fft,
                        hop_length=hop_length
                    )

                    mfcc = mfcc.T  # transpose to (time, coefficients)

                    if len(mfcc) == expected_num_mfcc_vectors_per_segment:
                        data["mfcc"].append(mfcc.tolist())
                        data["labels"].append(i-1)  # i-1 because index 0 is root
                        print(f"{file_path}, segment:{s+1}")

    # Save as JSON
    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)

    print("\nMFCC extraction complete. Saved to:", json_path)

# Run the function
import math
save_mfcc(DATASET_PATH, JSON_PATH, num_segments=10)



Processing: genres_original

Processing: blues
C:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres\genres_original\blues\blues.00000.wav, segment:1
C:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres\genres_original\blues\blues.00000.wav, segment:2
C:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres\genres_original\blues\blues.00000.wav, segment:3
C:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres\genres_original\blues\blues.00000.wav, segment:4
C:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres\genres_original\blues\blues.00000.wav, segment:5
C:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres\genres_original\blues\blues.00000.wav, segment:6
C:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres\genres_original\blues\blues.00000.wav, segment:7
C:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres\genres_original\blues\blues.00000

  signal, sr = librosa.load(file_path, sr=SAMPLE_RATE)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


NoBackendError: 

In [10]:
import os
import librosa
import json

DATASET_PATH = r"C:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres"
JSON_PATH = "data_mfcc.json"

SAMPLES_PER_TRACK = 22050 * 30  # 30 sec

def save_mfcc(dataset_path, json_path, num_mfcc=13, n_fft=2048, hop_length=512, num_segments=10):
    data = {
        "mapping": [],
        "mfcc": [],
        "labels": []
    }

    num_samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)
    expected_num_mfcc_vectors = math.ceil(num_samples_per_segment / hop_length)

    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
        if dirpath == dataset_path:
            continue

        genre_label = os.path.basename(dirpath)
        data["mapping"].append(genre_label)
        print(f"Processing: {genre_label}")

        for f in filenames:
            file_path = os.path.join(dirpath, f)

            try:
                signal, sr = librosa.load(file_path, sr=22050)
            except Exception as e:
                print(f"Error loading {file_path}: {e}")
                continue

            for s in range(num_segments):
                start = num_samples_per_segment * s
                end = start + num_samples_per_segment

                mfcc = librosa.feature.mfcc(y=signal[start:end],
                                            sr=sr,
                                            n_mfcc=num_mfcc,
                                            n_fft=n_fft,
                                            hop_length=hop_length)
                
                mfcc = mfcc.T

                if len(mfcc) == expected_num_mfcc_vectors:
                    data["mfcc"].append(mfcc.tolist())
                    data["labels"].append(i - 1)

    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)

    print(f"MFCC extraction complete. Saved to: {json_path}")

# Required for math.ceil
import math

# Run it
save_mfcc(DATASET_PATH, JSON_PATH)



Processing: genres_original
Processing: blues
Processing: classical
Processing: country
Processing: disco
Processing: hiphop
Processing: jazz


  signal, sr = librosa.load(file_path, sr=22050)


Error loading C:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres\genres_original\jazz\jazz.00054.wav: 
Processing: metal
Processing: pop
Processing: reggae
Processing: rock
Processing: images_original
Processing: blues
Error loading C:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres\images_original\blues\blues00000.png: 
Error loading C:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres\images_original\blues\blues00001.png: 
Error loading C:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres\images_original\blues\blues00002.png: 
Error loading C:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres\images_original\blues\blues00003.png: 
Error loading C:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres\images_original\blues\blues00004.png: 
Error loading C:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres\images_original\blues\blues00005.png: 
Error load

In [2]:
import os
import librosa
import json

DATASET_PATH = "C:/Users/J.TINCKY PRIYA/AppData/Local/Programs/Microsoft VS Code/genres\genres_original"
JSON_PATH = "data.json"
SAMPLES_PER_TRACK = 22050 * 30  # 30 seconds * sample rate

def save_mfcc(dataset_path, json_path, n_mfcc=13, hop_length=512, n_fft=2048):
    data = {
        "mapping": [],
        "mfcc": [],
        "labels": []
    }

    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
        if dirpath == dataset_path:
            continue

        # Get genre label from folder name
        genre_label = os.path.basename(dirpath)
        print(f"Processing: {genre_label}")
        data["mapping"].append(genre_label)

        for f in filenames:
            if not f.lower().endswith(".wav"):
                continue  # Skip non-wav files

            file_path = os.path.join(dirpath, f)

            try:
                signal, sr = librosa.load(file_path, sr=22050)

                if len(signal) >= SAMPLES_PER_TRACK:
                    signal = signal[:SAMPLES_PER_TRACK]

                    mfcc = librosa.feature.mfcc(signal, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mfcc=n_mfcc)
                    mfcc = mfcc.T  # Transpose to (frames, coefficients)

                    data["mfcc"].append(mfcc.tolist())
                    data["labels"].append(i - 1)

            except Exception as e:
                print(f"Error loading {file_path}: {e}")

    # Save data to JSON
    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)
    print("✅ MFCC extraction completed and saved to data.json")

# Call the function
save_mfcc(DATASET_PATH, JSON_PATH)


  DATASET_PATH = "C:/Users/J.TINCKY PRIYA/AppData/Local/Programs/Microsoft VS Code/genres\genres_original"


Processing: blues
Error loading C:/Users/J.TINCKY PRIYA/AppData/Local/Programs/Microsoft VS Code/genres\genres_original\blues\blues.00000.wav: mfcc() takes 0 positional arguments but 1 positional argument (and 2 keyword-only arguments) were given
Error loading C:/Users/J.TINCKY PRIYA/AppData/Local/Programs/Microsoft VS Code/genres\genres_original\blues\blues.00001.wav: mfcc() takes 0 positional arguments but 1 positional argument (and 2 keyword-only arguments) were given
Error loading C:/Users/J.TINCKY PRIYA/AppData/Local/Programs/Microsoft VS Code/genres\genres_original\blues\blues.00002.wav: mfcc() takes 0 positional arguments but 1 positional argument (and 2 keyword-only arguments) were given
Error loading C:/Users/J.TINCKY PRIYA/AppData/Local/Programs/Microsoft VS Code/genres\genres_original\blues\blues.00003.wav: mfcc() takes 0 positional arguments but 1 positional argument (and 2 keyword-only arguments) were given
Error loading C:/Users/J.TINCKY PRIYA/AppData/Local/Programs/Micro

  signal, sr = librosa.load(file_path, sr=22050)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Error loading C:/Users/J.TINCKY PRIYA/AppData/Local/Programs/Microsoft VS Code/genres\genres_original\jazz\jazz.00057.wav: mfcc() takes 0 positional arguments but 1 positional argument (and 2 keyword-only arguments) were given
Error loading C:/Users/J.TINCKY PRIYA/AppData/Local/Programs/Microsoft VS Code/genres\genres_original\jazz\jazz.00058.wav: mfcc() takes 0 positional arguments but 1 positional argument (and 2 keyword-only arguments) were given
Error loading C:/Users/J.TINCKY PRIYA/AppData/Local/Programs/Microsoft VS Code/genres\genres_original\jazz\jazz.00059.wav: mfcc() takes 0 positional arguments but 1 positional argument (and 2 keyword-only arguments) were given
Error loading C:/Users/J.TINCKY PRIYA/AppData/Local/Programs/Microsoft VS Code/genres\genres_original\jazz\jazz.00060.wav: mfcc() takes 0 positional arguments but 1 positional argument (and 2 keyword-only arguments) were given
Error loading C:/Users/J.TINCKY PRIYA/AppData/Local/Programs/Microsoft VS Code/genres\genres

In [3]:
import os
import librosa
import json

# Set your dataset path and output file
DATASET_PATH = r"C:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres\genres_original"
JSON_PATH = "data.json"
SAMPLES_PER_TRACK = 22050 * 30  # 30 seconds of audio

def extract_mfcc(dataset_path, json_path, n_mfcc=13, hop_length=512, n_fft=2048):
    data = {
        "mapping": [],
        "mfcc": [],
        "labels": []
    }

    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
        if dirpath == dataset_path:
            continue

        genre_label = os.path.basename(dirpath)
        print(f"\nProcessing genre: {genre_label}")
        data["mapping"].append(genre_label)

        for f in filenames:
            if not f.lower().endswith(".wav"):
                continue

            file_path = os.path.join(dirpath, f)

            try:
                signal, sr = librosa.load(file_path, sr=22050)
                
                if len(signal) < SAMPLES_PER_TRACK:
                    print(f"Skipping {f} (less than 30 seconds)")
                    continue

                signal = signal[:SAMPLES_PER_TRACK]
                mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=n_mfcc,
                                            n_fft=n_fft, hop_length=hop_length)
                mfcc = mfcc.T

                data["mfcc"].append(mfcc.tolist())
                data["labels"].append(i - 1)
                print(f"✅ Processed: {f}")

            except Exception as e:
                print(f"❌ Error loading {f}: {e}")

    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)
    print("\n✅ MFCC extraction complete. Data saved to data.json")

# Run the function
extract_mfcc(DATASET_PATH, JSON_PATH)



Processing genre: blues
✅ Processed: blues.00000.wav
✅ Processed: blues.00001.wav
✅ Processed: blues.00002.wav
✅ Processed: blues.00003.wav
✅ Processed: blues.00004.wav
✅ Processed: blues.00005.wav
✅ Processed: blues.00006.wav
✅ Processed: blues.00007.wav
✅ Processed: blues.00008.wav
✅ Processed: blues.00009.wav
✅ Processed: blues.00010.wav
✅ Processed: blues.00011.wav
✅ Processed: blues.00012.wav
✅ Processed: blues.00013.wav
✅ Processed: blues.00014.wav
✅ Processed: blues.00015.wav
✅ Processed: blues.00016.wav
✅ Processed: blues.00017.wav
✅ Processed: blues.00018.wav
✅ Processed: blues.00019.wav
✅ Processed: blues.00020.wav
✅ Processed: blues.00021.wav
✅ Processed: blues.00022.wav
✅ Processed: blues.00023.wav
✅ Processed: blues.00024.wav
✅ Processed: blues.00025.wav
✅ Processed: blues.00026.wav
✅ Processed: blues.00027.wav
✅ Processed: blues.00028.wav
✅ Processed: blues.00029.wav
✅ Processed: blues.00030.wav
✅ Processed: blues.00031.wav
✅ Processed: blues.00032.wav
✅ Processed: blues

  signal, sr = librosa.load(file_path, sr=22050)


✅ Processed: jazz.00055.wav
✅ Processed: jazz.00056.wav
✅ Processed: jazz.00057.wav
✅ Processed: jazz.00058.wav
✅ Processed: jazz.00059.wav
✅ Processed: jazz.00060.wav
✅ Processed: jazz.00061.wav
✅ Processed: jazz.00062.wav
✅ Processed: jazz.00063.wav
✅ Processed: jazz.00064.wav
✅ Processed: jazz.00065.wav
✅ Processed: jazz.00066.wav
✅ Processed: jazz.00067.wav
✅ Processed: jazz.00068.wav
✅ Processed: jazz.00069.wav
✅ Processed: jazz.00070.wav
✅ Processed: jazz.00071.wav
✅ Processed: jazz.00072.wav
✅ Processed: jazz.00073.wav
✅ Processed: jazz.00074.wav
✅ Processed: jazz.00075.wav
✅ Processed: jazz.00076.wav
✅ Processed: jazz.00077.wav
✅ Processed: jazz.00078.wav
✅ Processed: jazz.00079.wav
✅ Processed: jazz.00080.wav
✅ Processed: jazz.00081.wav
✅ Processed: jazz.00082.wav
✅ Processed: jazz.00083.wav
✅ Processed: jazz.00084.wav
✅ Processed: jazz.00085.wav
✅ Processed: jazz.00086.wav
✅ Processed: jazz.00087.wav
✅ Processed: jazz.00088.wav
✅ Processed: jazz.00089.wav
✅ Processed: jazz.00

In [4]:
import json
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models, utils
import matplotlib.pyplot as plt

# Step 1: Load the extracted MFCC data from data.json
def load_data(data_path):
    with open(data_path, "r") as fp:
        data = json.load(fp)
    X = np.array(data["mfcc"])
    y = np.array(data["labels"])
    return X, y, data["mapping"]

# Step 2: Load and preprocess data
X, y, mapping = load_data("data.json")
X = X[..., np.newaxis]  # Add channel dimension for CNN
y = utils.to_categorical(y)  # One-hot encode labels

# Step 3: Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Build CNN model
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=X.shape[1:]),
    layers.MaxPooling2D((3, 3), strides=(2, 2)),
    layers.BatchNormalization(),

    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((3, 3), strides=(2, 2)),
    layers.BatchNormalization(),

    layers.Conv2D(128, (2, 2), activation='relu'),
    layers.MaxPooling2D((2, 2), strides=(2, 2)),
    layers.BatchNormalization(),

    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(len(mapping), activation='softmax')
])

# Step 5: Compile model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

# Step 6: Train the model
history = model.fit(X_train, y_train,
                    validation_data=(X_test, y_test),
                    epochs=30,
                    batch_size=32)

# Step 7: Save the model
model.save("genre_cnn_model.h5")
print("✅ Model saved as genre_cnn_model.h5")

# Optional Step: Plot training history
plt.figure(figsize=(10, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history["accuracy"], label="Train Accuracy")
plt.plot(history.history["val_accuracy"], label="Val Accuracy")
plt.title("Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history["loss"], label="Train Loss")
plt.plot(history.history["val_loss"], label="Val Loss")
plt.title("Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()

plt.tight_layout()
plt.show()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/30


ValueError: Exception encountered when calling Conv2D.call().

[1mNegative dimension size caused by subtracting 2 from 1 for '{{node sequential_1/conv2d_2_1/convolution}} = Conv2D[T=DT_FLOAT, data_format="NHWC", dilations=[1, 1, 1, 1], explicit_paddings=[], padding="VALID", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true](sequential_1/batch_normalization_1_2/batchnorm/add_1, sequential_1/conv2d_2_1/convolution/ReadVariableOp)' with input shapes: [?,320,1,64], [2,2,64,128].[0m

Arguments received by Conv2D.call():
  • inputs=tf.Tensor(shape=(None, 320, 1, 64), dtype=float32)

In [5]:
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical

# Step 1: Extract MFCC features from audio files
DATASET_PATH = r"c:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres\genres_original"
SAMPLES_PER_TRACK = 22050 * 30  # 30 sec at 22050 Hz

X = []
y = []
genres = os.listdir(DATASET_PATH)

print("Processing:", DATASET_PATH)

for genre in genres:
    genre_path = os.path.join(DATASET_PATH, genre)
    if not os.path.isdir(genre_path):
        continue
    print("Processing:", genre)
    for filename in os.listdir(genre_path):
        if filename.endswith(".wav"):
            file_path = os.path.join(genre_path, filename)
            try:
                signal, sr = librosa.load(file_path, sr=22050)
                if len(signal) >= SAMPLES_PER_TRACK:
                    signal = signal[:SAMPLES_PER_TRACK]
                    mfcc = librosa.feature.mfcc(signal, sr=sr, n_mfcc=13)
                    mfcc = mfcc.T
                    X.append(mfcc)
                    y.append(genre)
            except Exception as e:
                print(f"Error loading {file_path}: {e}")

# Step 2: Pad/truncate MFCCs to same shape
import tensorflow as tf

max_len = max([mfcc.shape[0] for mfcc in X])
X_padded = []

for mfcc in X:
    if mfcc.shape[0] < max_len:
        pad_width = max_len - mfcc.shape[0]
        padded = np.pad(mfcc, ((0, pad_width), (0, 0)), mode='constant')
    else:
        padded = mfcc[:max_len, :]
    X_padded.append(padded)

X = np.array(X_padded)
X = X[..., np.newaxis]  # Add channel dimension

# Step 3: Encode labels
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)
y_categorical = to_categorical(y_encoded)

# Step 4: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, random_state=42)

# Step 5: Build CNN Model
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(X.shape[1], X.shape[2], 1)),
    layers.MaxPooling2D((2, 2)),
    layers.BatchNormalization(),

    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.BatchNormalization(),

    # ✅ FIXED this kernel size to (1, 1)
    layers.Conv2D(128, (1, 1), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.BatchNormalization(),

    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

# Step 6: Train the model
history = model.fit(X_train, y_train,
                    validation_data=(X_test, y_test),
                    epochs=30,
                    batch_size=32)

# Step 7: Save the model
model.save("genre_cnn_model.h5")
print("Model saved as genre_cnn_model.h5")


Processing: c:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres\genres_original
Processing: blues
Error loading c:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres\genres_original\blues\blues.00000.wav: mfcc() takes 0 positional arguments but 1 positional argument (and 2 keyword-only arguments) were given
Error loading c:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres\genres_original\blues\blues.00001.wav: mfcc() takes 0 positional arguments but 1 positional argument (and 2 keyword-only arguments) were given
Error loading c:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres\genres_original\blues\blues.00002.wav: mfcc() takes 0 positional arguments but 1 positional argument (and 2 keyword-only arguments) were given
Error loading c:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres\genres_original\blues\blues.00003.wav: mfcc() takes 0 positional arguments but 1 positional argument (and 2 k

  signal, sr = librosa.load(file_path, sr=22050)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Error loading c:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres\genres_original\jazz\jazz.00059.wav: mfcc() takes 0 positional arguments but 1 positional argument (and 2 keyword-only arguments) were given
Error loading c:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres\genres_original\jazz\jazz.00060.wav: mfcc() takes 0 positional arguments but 1 positional argument (and 2 keyword-only arguments) were given
Error loading c:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres\genres_original\jazz\jazz.00061.wav: mfcc() takes 0 positional arguments but 1 positional argument (and 2 keyword-only arguments) were given
Error loading c:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres\genres_original\jazz\jazz.00062.wav: mfcc() takes 0 positional arguments but 1 positional argument (and 2 keyword-only arguments) were given
Error loading c:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres\genres

ValueError: max() iterable argument is empty

In [6]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

# Load the MFCC features and labels
X = np.load("X.npy")  # shape: (samples, 20, 44)
y = np.load("y.npy")  # shape: (samples, )

# Reshape input to 4D tensor (samples, height, width, channels)
X = X[..., np.newaxis]  # shape becomes (samples, 20, 44, 1)

# Convert labels to one-hot encoded
num_classes = len(np.unique(y))
y = to_categorical(y, num_classes)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the CNN model
model = Sequential()

model.add(Conv2D(32, (3, 3), activation='relu', input_shape=X.shape[1:]))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.3))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.3))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test))

# Save the trained model
model.save("genre_cnn_model.h5")

print("✅ CNN model training completed and saved as 'genre_cnn_model.h5'")


FileNotFoundError: [Errno 2] No such file or directory: 'X.npy'

In [7]:
import os
import librosa
import numpy as np

DATASET_PATH = "genres"
SAMPLES_PER_TRACK = 22050 * 30  # 30 sec clips
n_mfcc = 20
hop_length = 512
n_fft = 2048
max_pad_len = 130  # Pad or trim all to same length

X = []
y = []
genres = sorted(os.listdir(DATASET_PATH))  # auto label from folders

for idx, genre in enumerate(genres):
    genre_path = os.path.join(DATASET_PATH, genre)
    if not os.path.isdir(genre_path):
        continue

    for filename in os.listdir(genre_path):
        if filename.endswith(".wav"):
            file_path = os.path.join(genre_path, filename)
            try:
                signal, sr = librosa.load(file_path, sr=22050)
                mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=n_mfcc,
                                            n_fft=n_fft, hop_length=hop_length)
                
                # Padding/trimming to fixed length
                if mfcc.shape[1] < max_pad_len:
                    pad_width = max_pad_len - mfcc.shape[1]
                    mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
                else:
                    mfcc = mfcc[:, :max_pad_len]

                X.append(mfcc)
                y.append(idx)
            except Exception as e:
                print(f"❌ Error processing {file_path}: {e}")

X = np.array(X)
y = np.array(y)

# Save to .npy files
np.save("X.npy", X)
np.save("y.npy", y)

print("✅ MFCC features extracted and saved as 'X.npy' and 'y.npy'")


FileNotFoundError: [WinError 3] The system cannot find the path specified: 'genres'

In [8]:
# Step 1: Extract MFCC Features and Save
import os
import librosa
import numpy as np

DATASET_PATH = "C:/Users/J.TINCKY PRIYA/Downloads/gtzan"  # Change this to your dataset location
SAMPLES_PER_TRACK = 22050 * 30  # 30 seconds

def extract_features(dataset_path, n_mfcc=13, n_fft=2048, hop_length=512, segment_duration=3):
    X = []
    y = []
    genres = sorted(os.listdir(dataset_path))
    num_segments = int(30 / segment_duration)

    for genre_index, genre in enumerate(genres):
        genre_path = os.path.join(dataset_path, genre)
        for file_name in os.listdir(genre_path):
            file_path = os.path.join(genre_path, file_name)
            signal, sr = librosa.load(file_path, sr=22050)
            samples_per_segment = int(sr * segment_duration)
            for d in range(num_segments):
                start = samples_per_segment * d
                end = start + samples_per_segment
                mfcc = librosa.feature.mfcc(signal[start:end], sr=sr, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)
                if mfcc.shape[1] == int(np.ceil(samples_per_segment / hop_length)):
                    X.append(mfcc)
                    y.append(genre_index)

    X = np.array(X)
    y = np.array(y)
    np.save("X.npy", X)
    np.save("y.npy", y)
    print("Saved X.npy and y.npy")

extract_features(DATASET_PATH)


FileNotFoundError: [WinError 3] The system cannot find the path specified: 'C:/Users/J.TINCKY PRIYA/Downloads/gtzan'

In [9]:
import os
import librosa
import json

# Set your dataset path and output file
DATASET_PATH = r"C:\Users\J.TINCKY PRIYA\AppData\Local\Programs\Microsoft VS Code\genres\genres_original"
JSON_PATH = "data.json"
SAMPLES_PER_TRACK = 22050 * 30  # 30 seconds of audio

def extract_mfcc(dataset_path, json_path, n_mfcc=13, hop_length=512, n_fft=2048):
    data = {
        "mapping": [],
        "mfcc": [],
        "labels": []
    }

    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
        if dirpath == dataset_path:
            continue

        genre_label = os.path.basename(dirpath)
        print(f"\nProcessing genre: {genre_label}")
        data["mapping"].append(genre_label)

        for f in filenames:
            if not f.lower().endswith(".wav"):
                continue

            file_path = os.path.join(dirpath, f)

            try:
                signal, sr = librosa.load(file_path, sr=22050)
                
                if len(signal) < SAMPLES_PER_TRACK:
                    print(f"Skipping {f} (less than 30 seconds)")
                    continue

                signal = signal[:SAMPLES_PER_TRACK]
                mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=n_mfcc,
                                            n_fft=n_fft, hop_length=hop_length)
                mfcc = mfcc.T

                data["mfcc"].append(mfcc.tolist())
                data["labels"].append(i - 1)
                print(f"✅ Processed: {f}")

            except Exception as e:
                print(f"❌ Error loading {f}: {e}")

    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)
    print("\n✅ MFCC extraction complete. Data saved to data.json")

# Run the function
extract_mfcc(DATASET_PATH, JSON_PATH)



Processing genre: blues
✅ Processed: blues.00000.wav
✅ Processed: blues.00001.wav
✅ Processed: blues.00002.wav
✅ Processed: blues.00003.wav
✅ Processed: blues.00004.wav
✅ Processed: blues.00005.wav
✅ Processed: blues.00006.wav
✅ Processed: blues.00007.wav
✅ Processed: blues.00008.wav
✅ Processed: blues.00009.wav
✅ Processed: blues.00010.wav
✅ Processed: blues.00011.wav
✅ Processed: blues.00012.wav
✅ Processed: blues.00013.wav
✅ Processed: blues.00014.wav
✅ Processed: blues.00015.wav
✅ Processed: blues.00016.wav
✅ Processed: blues.00017.wav
✅ Processed: blues.00018.wav
✅ Processed: blues.00019.wav
✅ Processed: blues.00020.wav
✅ Processed: blues.00021.wav
✅ Processed: blues.00022.wav
✅ Processed: blues.00023.wav
✅ Processed: blues.00024.wav
✅ Processed: blues.00025.wav
✅ Processed: blues.00026.wav
✅ Processed: blues.00027.wav
✅ Processed: blues.00028.wav
✅ Processed: blues.00029.wav
✅ Processed: blues.00030.wav
✅ Processed: blues.00031.wav
✅ Processed: blues.00032.wav
✅ Processed: blues

  signal, sr = librosa.load(file_path, sr=22050)


✅ Processed: jazz.00058.wav
✅ Processed: jazz.00059.wav
✅ Processed: jazz.00060.wav
✅ Processed: jazz.00061.wav
✅ Processed: jazz.00062.wav
✅ Processed: jazz.00063.wav
✅ Processed: jazz.00064.wav
✅ Processed: jazz.00065.wav
✅ Processed: jazz.00066.wav
✅ Processed: jazz.00067.wav
✅ Processed: jazz.00068.wav
✅ Processed: jazz.00069.wav
✅ Processed: jazz.00070.wav
✅ Processed: jazz.00071.wav
✅ Processed: jazz.00072.wav
✅ Processed: jazz.00073.wav
✅ Processed: jazz.00074.wav
✅ Processed: jazz.00075.wav
✅ Processed: jazz.00076.wav
✅ Processed: jazz.00077.wav
✅ Processed: jazz.00078.wav
✅ Processed: jazz.00079.wav
✅ Processed: jazz.00080.wav
✅ Processed: jazz.00081.wav
✅ Processed: jazz.00082.wav
✅ Processed: jazz.00083.wav
✅ Processed: jazz.00084.wav
✅ Processed: jazz.00085.wav
✅ Processed: jazz.00086.wav
✅ Processed: jazz.00087.wav
✅ Processed: jazz.00088.wav
✅ Processed: jazz.00089.wav
✅ Processed: jazz.00090.wav
✅ Processed: jazz.00091.wav
✅ Processed: jazz.00092.wav
✅ Processed: jazz.00

In [10]:
# Step 2: Load and Train CNN Model
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

# Load MFCC features
X = np.load("X.npy")  # shape: (samples, 13, time)
y = np.load("y.npy")

# Reshape and normalize
X = X[..., np.newaxis]  # Add channel dimension
X = X / np.max(X)  # Normalize

# One-hot encode labels
num_classes = len(np.unique(y))
y = to_categorical(y, num_classes=num_classes)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build CNN
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=X.shape[1:]),
    MaxPooling2D((2, 2)),
    BatchNormalization(),

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    BatchNormalization(),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# Train
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=30, batch_size=32)

# Save
model.save("genre_cnn_model.h5")
print("Model saved as genre_cnn_model.h5")


FileNotFoundError: [Errno 2] No such file or directory: 'X.npy'

In [11]:
import json
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical

# Step 1: Load data from JSON
def load_data(json_path):
    with open(json_path, "r") as fp:
        data = json.load(fp)

    X = np.array(data["mfcc"])
    y = np.array(data["labels"])
    return X, y

# Step 2: Prepare the dataset
def prepare_datasets(test_size, json_path):
    X, y = load_data(json_path)

    # Ensure inputs have shape (samples, time, mfcc) => expand dims for CNN input
    X = np.array(X)
    X = X[..., np.newaxis]  # Add channel dimension

    # One-hot encode labels
    y = to_categorical(y)

    # Split the dataset
    return train_test_split(X, y, test_size=test_size, random_state=42)

# Step 3: Build CNN model
def build_model(input_shape):
    model = Sequential()

    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((3, 3), strides=(2, 2)))
    model.add(Dropout(0.3))

    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((3, 3), strides=(2, 2)))
    model.add(Dropout(0.3))

    model.add(Conv2D(128, (2, 2), activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    model.add(Dropout(0.3))

    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.4))
    model.add(Dense(10, activation='softmax'))  # 10 genres

    return model

# Step 4: Train and save the model
def train_model():
    JSON_PATH = "data.json"
    X_train, X_test, y_train, y_test = prepare_datasets(0.2, JSON_PATH)

    input_shape = (X_train.shape[1], X_train.shape[2], 1)
    model = build_model(input_shape)

    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    model.summary()

    history = model.fit(X_train, y_train,
                        validation_data=(X_test, y_test),
                        batch_size=32,
                        epochs=30)

    model.save("genre_cnn_model.h5")
    print("✅ Model trained and saved as genre_cnn_model.h5")

# Run training
train_model()


Epoch 1/30


ValueError: Exception encountered when calling Conv2D.call().

[1mNegative dimension size caused by subtracting 2 from 1 for '{{node sequential_1_1/conv2d_5_1/convolution}} = Conv2D[T=DT_FLOAT, data_format="NHWC", dilations=[1, 1, 1, 1], explicit_paddings=[], padding="VALID", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true](sequential_1_1/dropout_2_1/stateless_dropout/SelectV2, sequential_1_1/conv2d_5_1/convolution/ReadVariableOp)' with input shapes: [?,320,1,64], [2,2,64,128].[0m

Arguments received by Conv2D.call():
  • inputs=tf.Tensor(shape=(None, 320, 1, 64), dtype=float32)

In [12]:
import json
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Paths
JSON_PATH = "data.json"
MODEL_PATH = "genre_cnn_model.h5"

# Constants
FIXED_MFCC_LENGTH = 130  # Adjust based on most common length in your data
NUM_MFCC = 13
NUM_GENRES = 10

def load_data(json_path):
    with open(json_path, "r") as fp:
        data = json.load(fp)

    X = pad_sequences(data["mfcc"], maxlen=FIXED_MFCC_LENGTH, dtype='float32', padding='post', truncating='post')
    X = np.array(X)
    y = np.array(data["labels"])

    # Reshape to 4D (samples, height, width, channels)
    X = X[..., np.newaxis]  # Shape: (samples, time, mfcc, 1)

    # One-hot encode labels
    y = to_categorical(y, num_classes=NUM_GENRES)

    return train_test_split(X, y, test_size=0.2, random_state=42)

def build_model(input_shape):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.3),

        Conv2D(64, (3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.3),

        Conv2D(128, (3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.3),

        Flatten(),
        Dense(256, activation='relu'),
        Dropout(0.3),
        Dense(NUM_GENRES, activation='softmax')
    ])
    return model

def train_model():
    X_train, X_test, y_train, y_test = load_data(JSON_PATH)
    print(f"✅ Data loaded: {X_train.shape}")

    model = build_model(input_shape=X_train.shape[1:])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    model.summary()

    history = model.fit(X_train, y_train,
                        validation_data=(X_test, y_test),
                        batch_size=32,
                        epochs=30)

    model.save(MODEL_PATH)
    print(f"✅ Model trained and saved as {MODEL_PATH}")

# Run training
train_model()


✅ Data loaded: (792, 130, 13, 1)


Epoch 1/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 42ms/step - accuracy: 0.1755 - loss: 3.5828 - val_accuracy: 0.1111 - val_loss: 4.1799
Epoch 2/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - accuracy: 0.2904 - loss: 2.0205 - val_accuracy: 0.1212 - val_loss: 2.4995
Epoch 3/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step - accuracy: 0.3194 - loss: 1.9166 - val_accuracy: 0.1313 - val_loss: 2.3086
Epoch 4/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step - accuracy: 0.3662 - loss: 1.7484 - val_accuracy: 0.1667 - val_loss: 2.1837
Epoch 5/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - accuracy: 0.3977 - loss: 1.6478 - val_accuracy: 0.3030 - val_loss: 1.9109
Epoch 6/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - accuracy: 0.4192 - loss: 1.5671 - val_accuracy: 0.4192 - val_loss: 1.6887
Epoch 7/30
[1m25/25[0m [32m━━━━



✅ Model trained and saved as genre_cnn_model.h5


In [13]:
import json
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Constants
JSON_PATH = "data.json"
FIXED_MFCC_LENGTH = 130   # Based on 30s audio and hop_length=512
NUM_MFCC = 13             # Number of MFCC features
NUM_GENRES = 10           # GTZAN has 10 genres

def load_data(json_path):
    with open(json_path, "r") as f:
        data = json.load(f)

    # Pad MFCCs to have uniform length
    X = pad_sequences(data["mfcc"], maxlen=FIXED_MFCC_LENGTH, dtype='float32', padding='post', truncating='post')
    X = np.array(X)
    y = np.array(data["labels"])

    # Reshape for CNN input: (samples, time, mfcc, 1)
    X = X[..., np.newaxis]

    # One-hot encode labels
    y = to_categorical(y, num_classes=NUM_GENRES)

    return train_test_split(X, y, test_size=0.2, random_state=42)

def build_model(input_shape):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.3),

        Conv2D(64, (3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.3),

        Conv2D(128, (3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.3),

        Flatten(),
        Dense(256, activation='relu'),
        Dropout(0.3),
        Dense(NUM_GENRES, activation='softmax')
    ])
    return model

def train_and_validate():
    # Load and split data
    X_train, X_test, y_train, y_test = load_data(JSON_PATH)
    print(f"✅ Data shapes:\nX_train: {X_train.shape}, X_test: {X_test.shape}")
    
    # Build and compile model
    model = build_model(X_train.shape[1:])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    
    # Train
    history = model.fit(X_train, y_train, 
                        validation_data=(X_test, y_test), 
                        epochs=30, 
                        batch_size=32)

    # Save model
    model.save("genre_cnn_model.h5")
    print("✅ Model saved as genre_cnn_model.h5")

    return history

# Execute training
train_and_validate()


✅ Data shapes:
X_train: (792, 130, 13, 1), X_test: (198, 130, 13, 1)
Epoch 1/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 45ms/step - accuracy: 0.1995 - loss: 3.5985 - val_accuracy: 0.1970 - val_loss: 2.6290
Epoch 2/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - accuracy: 0.3119 - loss: 1.9354 - val_accuracy: 0.1515 - val_loss: 2.6642
Epoch 3/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step - accuracy: 0.3624 - loss: 1.7917 - val_accuracy: 0.1616 - val_loss: 2.3018
Epoch 4/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - accuracy: 0.3876 - loss: 1.6921 - val_accuracy: 0.3182 - val_loss: 1.9532
Epoch 5/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step - accuracy: 0.4381 - loss: 1.5334 - val_accuracy: 0.3384 - val_loss: 1.8113
Epoch 6/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step - accuracy: 0.4722 - loss: 1.4353 - val_a



✅ Model saved as genre_cnn_model.h5


<keras.src.callbacks.history.History at 0x1cf753adba0>

In [None]:
import librosa
import numpy as np
from tensorflow.keras.models import load_model
import tkinter as tk
from tkinter import filedialog
import os

# Constants
MODEL_PATH = "genre_cnn_model.h5"
GENRES = ['blues', 'classical', 'country', 'disco', 'hiphop',
          'jazz', 'metal', 'pop', 'reggae', 'rock']
SAMPLES_PER_TRACK = 22050 * 30  # 30 seconds
NUM_MFCC = 13
FIXED_MFCC_LENGTH = 130

# Load the trained model
model = load_model(MODEL_PATH)

def preprocess_audio(file_path):
    signal, sr = librosa.load(file_path, sr=22050)

    if len(signal) < SAMPLES_PER_TRACK:
        print("Audio too short! Padding to 30 seconds...")
        signal = np.pad(signal, (0, SAMPLES_PER_TRACK - len(signal)))
    else:
        signal = signal[:SAMPLES_PER_TRACK]

    # Extract MFCC features
    mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=NUM_MFCC,
                                n_fft=2048, hop_length=512)
    mfcc = mfcc.T

    # Pad/truncate to fixed length
    if mfcc.shape[0] < FIXED_MFCC_LENGTH:
        mfcc = np.pad(mfcc, ((0, FIXED_MFCC_LENGTH - mfcc.shape[0]), (0, 0)), mode='constant')
    else:
        mfcc = mfcc[:FIXED_MFCC_LENGTH]

    mfcc = mfcc[np.newaxis, ..., np.newaxis]  # shape: (1, 130, 13, 1)
    return mfcc

def predict_genre(file_path):
    mfcc = preprocess_audio(file_path)
    prediction = model.predict(mfcc)[0]
    predicted_index = np.argmax(prediction)
    genre = GENRES[predicted_index]
    confidence = prediction[predicted_index]

    print(f"🎧 Genre: {genre}")
    print(f"🔍 Confidence: {confidence * 100:.2f}%")

def upload_audio():
    root = tk.Tk()
    root.withdraw()  # Hide GUI window
    file_path = filedialog.askopenfilename(
        title="Select an audio file",
        filetypes=[("WAV files", "*.wav")]
    )

    if file_path:
        print(f"\n📂 Selected file: {os.path.basename(file_path)}")
        predict_genre(file_path)
    else:
        print("❌ No file selected.")

# Run the upload and classification
upload_audio()





📂 Selected file: reggae.00070.wav
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step
🎧 Genre: reggae
🔍 Confidence: 92.59%


In [15]:
import numpy as np
import librosa
from tensorflow.keras.models import load_model

# Load the trained CNN model
model = load_model("genre_cnn_model.h5")

# Genre labels (ensure order matches training)
GENRES = ['blues', 'classical', 'country', 'disco', 'hiphop',
          'jazz', 'metal', 'pop', 'reggae', 'rock']

def predict_genre_with_confidence(audio_path):
    # Constants
    SAMPLE_RATE = 22050
    DURATION = 30
    SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION
    FIXED_MFCC_LENGTH = 130
    NUM_MFCC = 13

    # Load and preprocess audio
    signal, sr = librosa.load(audio_path, sr=SAMPLE_RATE)

    # Pad/trim signal to 30 seconds
    if len(signal) < SAMPLES_PER_TRACK:
        signal = np.pad(signal, (0, SAMPLES_PER_TRACK - len(signal)))
    else:
        signal = signal[:SAMPLES_PER_TRACK]

    # Extract MFCC
    mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=NUM_MFCC, n_fft=2048, hop_length=512)
    mfcc = mfcc.T

    # Pad/truncate MFCC to fixed length
    if mfcc.shape[0] < FIXED_MFCC_LENGTH:
        mfcc = np.pad(mfcc, ((0, FIXED_MFCC_LENGTH - mfcc.shape[0]), (0, 0)), mode='constant')
    else:
        mfcc = mfcc[:FIXED_MFCC_LENGTH]

    # Reshape for model
    mfcc = mfcc[np.newaxis, ..., np.newaxis]  # Shape: (1, 130, 13, 1)

    # Predict
    predictions = model.predict(mfcc)[0]
    predicted_index = np.argmax(predictions)
    predicted_genre = GENRES[predicted_index]
    confidence = predictions[predicted_index] * 100

    # Print results
    print(f"🎵 Predicted Genre: {predicted_genre}")
    print(f"🔍 Confidence Score: {confidence:.2f}%")

# Example usage:
# predict_genre_with_confidence("test_audio.wav")


