<a href="https://colab.research.google.com/github/SakshamSharma2006/Advance-Data-science/blob/main/LSTM_%2B_RNN(GTZAN).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
## Libraries
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

## Dataset - Load CSV files
data = pd.read_csv('features_3_sec.csv')
print(f"Dataset shape: {data.shape}")
print(f"Genres: {data['label'].unique()}")
print(f"Samples per genre: {data['label'].value_counts().min()}")

## Prepare features and labels
# Remove non-feature columns
feature_columns = [col for col in data.columns if col not in ['filename', 'length', 'label']]
X = data[feature_columns].values
y = data['label'].values

print(f"Feature matrix shape: {X.shape}")
print(f"Features used: {len(feature_columns)}")

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
n_classes = len(label_encoder.classes_)
print(f"Number of classes: {n_classes}")

# Reshape features for RNN/LSTM
# Use actual number of features (X.shape[1])
X_reshaped = X.reshape(X.shape[0], X.shape[1], 1)
print(f"Reshaped for RNN: {X_reshaped.shape}")

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X.reshape(-1, X.shape[-1])).reshape(X.shape[0], X.shape[1])
X_scaled = X_scaled.reshape(X.shape[0], X.shape[1], 1)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

print(f"Training set: {X_train.shape}")
print(f"Test set: {X_test.shape}")

## Model Architecture - RNN + LSTM
model = tf.keras.Sequential()

# RNN + LSTM Layers
model.add(tf.keras.layers.SimpleRNN(128, return_sequences=True,
                                   input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(0.2))

model.add(tf.keras.layers.LSTM(128, return_sequences=True))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(0.3))

model.add(tf.keras.layers.LSTM(64, return_sequences=False))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(0.3))

# Dense layers
model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(0.4))

model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dropout(0.3))

model.add(tf.keras.layers.Dense(n_classes, activation='softmax'))

## Compile Model
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(loss='sparse_categorical_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy'])

print("\nModel Summary:")
model.summary()

## Training with callbacks
callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor='val_accuracy', patience=15, restore_best_weights=True, min_delta=0.001
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss', factor=0.7, patience=8, min_lr=1e-6, verbose=1
    ),
    tf.keras.callbacks.ModelCheckpoint(
        'best_genre_model.keras', save_best_only=True, monitor='val_accuracy', verbose=1
    )
]

## Train Model
print("\n🎵 Training Music Genre Classifier...")
history = model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=64,
    validation_data=(X_test, y_test),
    callbacks=callbacks,
    verbose=1
)

## Evaluate Model
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"\n🎯 Final Test Accuracy: {test_accuracy:.4f} ({test_accuracy*100:.2f}%)")
print(f"📉 Final Test Loss: {test_loss:.4f}")

## Predictions and Classification Report
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

print("\n📊 Classification Report:")
print(classification_report(y_test, y_pred_classes,
                          target_names=label_encoder.classes_))

## Visualizations
plt.figure(figsize=(15, 5))

# Training History
plt.subplot(1, 3, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 3, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# Confusion Matrix
plt.subplot(1, 3, 3)
cm = confusion_matrix(y_test, y_pred_classes)
sns.heatmap(cm, annot=True, fmt='d',
            xticklabels=label_encoder.classes_,
            yticklabels=label_encoder.classes_,
            cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.xticks(rotation=45)
plt.yticks(rotation=0)

plt.tight_layout()
plt.show()

## Save Model
model.save('music_genre_rnn_lstm.keras')
print("\n✅ Model saved as 'music_genre_rnn_lstm.keras'")
