# Method 1: LSTM with Pre-extracted CSV Features

**Key Improvements:**
- Uses pre-extracted features from CSV files (no need to extract MFCC)
- Trains on 3-second segments (features_3_sec.csv)
- Tests on 30-second aggregated features (features_30_sec.csv)
- Faster training and more features available
- Bidirectional LSTM for better temporal modeling

## 1. Imports

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Plot styling
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')

# Reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print(f'TensorFlow version: {tf.__version__}')
print(f'GPU Available: {len(tf.config.list_physical_devices("GPU")) > 0}')

## 2. Load CSV Features

In [None]:
# Load datasets
df_3sec = pd.read_csv('../data/gtzan/features_3_sec.csv')
df_30sec = pd.read_csv('../data/gtzan/features_30_sec.csv')

print(f"3-second features shape: {df_3sec.shape}")
print(f"30-second features shape: {df_30sec.shape}")
print(f"\nColumn names (first 10): {list(df_3sec.columns[:10])}")
print(f"\nGenres in dataset: {df_3sec['label'].unique()}")

## 3. Data Preprocessing

In [None]:
# Drop unnecessary columns
columns_to_drop = ['filename', 'length']
df_3sec_clean = df_3sec.drop(columns=[col for col in columns_to_drop if col in df_3sec.columns])
df_30sec_clean = df_30sec.drop(columns=[col for col in columns_to_drop if col in df_30sec.columns])

# Separate features and labels for 3-second data (training)
X_train_val = df_3sec_clean.drop('label', axis=1).values
y_train_val = df_3sec_clean['label'].values

# Separate features and labels for 30-second data (testing)
X_test = df_30sec_clean.drop('label', axis=1).values
y_test = df_30sec_clean['label'].values

print(f"Training features shape: {X_train_val.shape}")
print(f"Training labels shape: {y_train_val.shape}")
print(f"Test features shape: {X_test.shape}")
print(f"Test labels shape: {y_test.shape}")

## 4. Encode Labels

In [None]:
# Encode labels
label_encoder = LabelEncoder()
y_train_val_encoded = label_encoder.fit_transform(y_train_val)
y_test_encoded = label_encoder.transform(y_test)

# Get genre names
genre_names = label_encoder.classes_
num_classes = len(genre_names)

print(f"Number of classes: {num_classes}")
print(f"Genre names: {genre_names}")

# One-hot encode
y_train_val_cat = to_categorical(y_train_val_encoded, num_classes)
y_test_cat = to_categorical(y_test_encoded, num_classes)

print(f"\nOne-hot encoded shape: {y_train_val_cat.shape}")

## 5. Train/Validation Split

In [None]:
# Split training data into train and validation
X_train, X_val, y_train, y_val = train_test_split(
    X_train_val, y_train_val_cat,
    test_size=0.2,
    random_state=42,
    stratify=y_train_val_encoded
)

print(f"Train set: {X_train.shape}")
print(f"Validation set: {X_val.shape}")
print(f"Test set: {X_test.shape}")

## 6. Feature Scaling

In [None]:
# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Reshape for LSTM (samples, timesteps, features)
# For LSTM, we'll treat each feature as a timestep
X_train_lstm = X_train_scaled.reshape(X_train_scaled.shape[0], X_train_scaled.shape[1], 1)
X_val_lstm = X_val_scaled.reshape(X_val_scaled.shape[0], X_val_scaled.shape[1], 1)
X_test_lstm = X_test_scaled.reshape(X_test_scaled.shape[0], X_test_scaled.shape[1], 1)

print(f"\nReshaped for LSTM:")
print(f"Train shape: {X_train_lstm.shape}")
print(f"Val shape: {X_val_lstm.shape}")
print(f"Test shape: {X_test_lstm.shape}")

## 7. Build Bidirectional LSTM Model

In [None]:
def build_lstm_model(input_shape, num_classes):
    """
    Build improved Bidirectional LSTM model.
    
    Architecture:
    - Bidirectional LSTM layers for better temporal modeling
    - Batch normalization for stable training
    - Dropout for regularization
    - Dense layers for classification
    """
    model = Sequential([
        # First Bidirectional LSTM layer
        Bidirectional(LSTM(128, return_sequences=True), input_shape=input_shape),
        BatchNormalization(),
        Dropout(0.3),
        
        # Second Bidirectional LSTM layer
        Bidirectional(LSTM(64, return_sequences=True)),
        BatchNormalization(),
        Dropout(0.3),
        
        # Third LSTM layer (not bidirectional to reduce parameters)
        LSTM(32),
        BatchNormalization(),
        Dropout(0.4),
        
        # Dense layers
        Dense(64, activation='relu'),
        Dropout(0.4),
        Dense(32, activation='relu'),
        Dropout(0.3),
        
        # Output layer
        Dense(num_classes, activation='softmax')
    ], name='bidirectional_lstm')
    
    # Compile
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model

# Build model
model = build_lstm_model(
    input_shape=(X_train_lstm.shape[1], X_train_lstm.shape[2]),
    num_classes=num_classes
)

# Display model architecture
model.summary()

## 8. Training

In [None]:
# Callbacks
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=20,
    restore_best_weights=True,
    verbose=1
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=5,
    min_lr=1e-6,
    verbose=1
)

# Train model
history = model.fit(
    X_train_lstm, y_train,
    validation_data=(X_val_lstm, y_val),
    batch_size=64,
    epochs=100,
    callbacks=[early_stop, reduce_lr],
    verbose=1
)

## 9. Training History

In [None]:
# Plot training history
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Loss
axes[0].plot(history.history['loss'], label='Train Loss', linewidth=2)
axes[0].plot(history.history['val_loss'], label='Val Loss', linewidth=2)
axes[0].set_xlabel('Epoch', fontsize=12)
axes[0].set_ylabel('Loss', fontsize=12)
axes[0].set_title('Training & Validation Loss', fontsize=14, fontweight='bold')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Accuracy
axes[1].plot(history.history['accuracy'], label='Train Accuracy', linewidth=2)
axes[1].plot(history.history['val_accuracy'], label='Val Accuracy', linewidth=2)
axes[1].set_xlabel('Epoch', fontsize=12)
axes[1].set_ylabel('Accuracy', fontsize=12)
axes[1].set_title('Training & Validation Accuracy', fontsize=14, fontweight='bold')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('lstm_csv_training_history.png', dpi=300, bbox_inches='tight')
plt.show()

## 10. Evaluation on Test Set

In [None]:
# Evaluate on test set (30-second features)
test_loss, test_acc = model.evaluate(X_test_lstm, y_test_cat, verbose=0)

print(f"\n{'='*60}")
print(f"Test Accuracy (30-second features): {test_acc*100:.2f}%")
print(f"Test Loss: {test_loss:.4f}")
print(f"{'='*60}\n")

# Predictions
y_pred = model.predict(X_test_lstm, verbose=0)
y_pred_labels = np.argmax(y_pred, axis=1)
y_true_labels = np.argmax(y_test_cat, axis=1)

# Classification report
print("\nClassification Report:")
print(classification_report(y_true_labels, y_pred_labels, target_names=genre_names))

## 11. Confusion Matrix

In [None]:
# Confusion matrix
cm = confusion_matrix(y_true_labels, y_pred_labels)

plt.figure(figsize=(12, 10))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=genre_names, yticklabels=genre_names,
            cbar_kws={'label': 'Count'})
plt.xlabel('Predicted Genre', fontsize=12, fontweight='bold')
plt.ylabel('True Genre', fontsize=12, fontweight='bold')
plt.title('Bidirectional LSTM - Confusion Matrix', fontsize=14, fontweight='bold')
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()
plt.savefig('lstm_csv_confusion_matrix.png', dpi=300, bbox_inches='tight')
plt.show()

## 12. Per-Genre Accuracy

In [None]:
# Calculate per-genre accuracy
per_genre_acc = []
for i, genre in enumerate(genre_names):
    genre_mask = y_true_labels == i
    genre_acc = accuracy_score(
        y_true_labels[genre_mask],
        y_pred_labels[genre_mask]
    )
    per_genre_acc.append(genre_acc * 100)

# Plot per-genre accuracy
plt.figure(figsize=(12, 6))
bars = plt.bar(genre_names, per_genre_acc, color='steelblue', edgecolor='black')
plt.axhline(y=test_acc*100, color='red', linestyle='--', 
            label=f'Overall Accuracy: {test_acc*100:.2f}%', linewidth=2)
plt.xlabel('Genre', fontsize=12, fontweight='bold')
plt.ylabel('Accuracy (%)', fontsize=12, fontweight='bold')
plt.title('Per-Genre Classification Accuracy', fontsize=14, fontweight='bold')
plt.xticks(rotation=45, ha='right')
plt.ylim([0, 100])
plt.grid(True, alpha=0.3, axis='y')
plt.legend()

# Add value labels on bars
for bar, acc in zip(bars, per_genre_acc):
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height,
             f'{acc:.1f}%', ha='center', va='bottom', fontsize=10)

plt.tight_layout()
plt.savefig('lstm_csv_per_genre_accuracy.png', dpi=300, bbox_inches='tight')
plt.show()

## 13. Save Model

In [None]:
# Save model
model.save('lstm_csv_features.keras')
print("✓ Model saved: lstm_csv_features.keras")

# Save training history
np.save('lstm_csv_history.npy', history.history)
print("✓ Training history saved: lstm_csv_history.npy")

# Save scaler and label encoder
import joblib
joblib.dump(scaler, 'lstm_scaler.pkl')
joblib.dump(label_encoder, 'lstm_label_encoder.pkl')
print("✓ Scaler and label encoder saved")

## Summary

This improved LSTM model uses pre-extracted CSV features:

**Data Strategy:**
- Training: 3-second segment features (more data samples)
- Testing: 30-second aggregated features (realistic full-track evaluation)
- No need to extract audio features (much faster)

**Model Architecture:**
- Bidirectional LSTM layers (128 → 64 units)
- Regular LSTM layer (32 units)
- Batch normalization for stable training
- Progressive dropout (0.3 → 0.4) for regularization
- Dense classification head (64 → 32 → 10)

**Training Strategy:**
- Adam optimizer with LR=0.001
- ReduceLROnPlateau callback
- Early stopping with patience=20
- Batch size=64 for faster training

**Expected Performance:**
- Target accuracy: 75-85% (depending on feature quality)
- Much faster than extracting features from audio
- Better generalization with pre-extracted features