In [None]:
import os
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import confusion_matrix

# ----------------------------
# 1. Dataset path
# ----------------------------
import kagglehub
path = kagglehub.dataset_download("andradaolteanu/gtzan-dataset-music-genre-classification")
print("Base dataset path:", path)

# ----------------------------
# 2. Auto-detect image folder
# ----------------------------
image_path = None
for root, dirs, files in os.walk(path):
    for d in dirs:
        if "image" in d.lower():  # finds "images_original" or similar
            image_path = os.path.join(root, d)
            break
    if image_path:
        break

if not image_path:
    raise FileNotFoundError("❌ No image folder found in dataset!")

print("✅ Found image folder:", image_path)
print("Genres:", os.listdir(image_path))

# ----------------------------
# 3. Image Data Generators
# ----------------------------
img_size = (128, 128)

datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2
)

train_gen = datagen.flow_from_directory(
    image_path,
    target_size=img_size,
    batch_size=32,
    class_mode='categorical',
    subset='training'
)

val_gen = datagen.flow_from_directory(
    image_path,
    target_size=img_size,
    batch_size=32,
    class_mode='categorical',
    subset='validation'
)

# ----------------------------
# 4. CNN Model
# ----------------------------
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(img_size[0], img_size[1], 3)),
    MaxPooling2D(2, 2),
    Dropout(0.25),

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Dropout(0.25),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(train_gen.num_classes, activation='softmax')
])

optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# ----------------------------
# 5. Train
# ----------------------------
history = model.fit(
    train_gen,
    epochs=15,
    validation_data=val_gen
)

# ----------------------------
# 6. Plot Accuracy & Loss
# ----------------------------
plt.figure(figsize=(8, 4))
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title("Model Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.show()

plt.figure(figsize=(8, 4))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title("Model Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.show()

# ----------------------------
# 7. Confusion Matrix
# ----------------------------
Y_pred = model.predict(val_gen)
Y_pred_classes = np.argmax(Y_pred, axis=1)
Y_true = val_gen.classes

cm = confusion_matrix(Y_true, Y_pred_classes)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=list(train_gen.class_indices.keys()),
            yticklabels=list(train_gen.class_indices.keys()))
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()
