# Method 1: Kaggle LSTM Implementation

**Reference**: [Kaggle Notebook](https://www.kaggle.com/code/gtessier/lstm-implementation-grad)

**Architecture**:
- Input: 13 MFCC coefficients
- Reshaping: `(N, 13, 1)` - Treating the 13 coefficients as time steps.
- Model:
    - LSTM(64, return_sequences=True)
    - LSTM(64)
    - Dense(64, activation='relu')
    - Dropout(0.3)
    - Dense(10, activation='softmax')

**Improvement**:
- We use **Song-ID based splitting** instead of random splitting to strictly prevent data leakage.

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Activation
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import GroupShuffleSplit
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

np.random.seed(42)
tf.random.set_seed(42)

print(f'TensorFlow version: {tf.__version__}')

## 1. Load Data & Select MFCC Features

In [None]:
df = pd.read_csv('../data/gtzan/features_3_sec.csv')

# Extract Song ID for grouping
df['song_id'] = df['filename'].apply(lambda x: '.'.join(x.split('.')[:2]))

# Select ONLY the first 13 MFCC mean features to match reference
mfcc_cols = [f'mfcc{i}_mean' for i in range(1, 14)]
print(f"Selected Features: {mfcc_cols}")

X = df[mfcc_cols]
y = df['label']
groups = df['song_id']

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
num_classes = len(label_encoder.classes_)
y_cat = to_categorical(y_encoded, num_classes)

## 2. Train/Test Split (Grouped by Song)

In [None]:
# Split: 70% Train, 15% Val, 15% Test
splitter_outer = GroupShuffleSplit(test_size=0.30, n_splits=1, random_state=42)
train_idx, temp_idx = next(splitter_outer.split(X, y, groups))

X_train, X_temp = X.iloc[train_idx], X.iloc[temp_idx]
y_train, y_temp = y_cat[train_idx], y_cat[temp_idx]
groups_temp = groups.iloc[temp_idx]

splitter_inner = GroupShuffleSplit(test_size=0.50, n_splits=1, random_state=42)
val_idx, test_idx = next(splitter_inner.split(X_temp, y_temp, groups_temp))

X_val, X_test = X_temp.iloc[val_idx], X_temp.iloc[test_idx]
y_val, y_test = y_temp[val_idx], y_temp[test_idx]
groups_test = groups_temp.iloc[test_idx]

print(f"Train: {X_train.shape}, Val: {X_val.shape}, Test: {X_test.shape}")

## 3. Preprocessing & Reshaping
Standardize, then reshape to `(N, 13, 1)`.

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Reshape for LSTM: (Samples, Timesteps, Features)
# Here Timesteps=13 (coefficients), Features=1
X_train_lstm = X_train_scaled.reshape(X_train_scaled.shape[0], 13, 1)
X_val_lstm = X_val_scaled.reshape(X_val_scaled.shape[0], 13, 1)
X_test_lstm = X_test_scaled.reshape(X_test_scaled.shape[0], 13, 1)

print(f"LSTM Input Shape: {X_train_lstm.shape}")

## 4. Build Model (Kaggle Architecture)

In [None]:
def build_kaggle_lstm_model(input_shape, num_classes):
    model = Sequential()
    
    # 1st LSTM Layer
    model.add(LSTM(64, input_shape=input_shape, return_sequences=True))
    
    # 2nd LSTM Layer
    model.add(LSTM(64))
    
    # Dense Layer
    model.add(Dense(64, activation='relu'))
    
    # Dropout
    model.add(Dropout(0.3))
    
    # Output Layer
    model.add(Dense(num_classes, activation='softmax'))
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model

model = build_kaggle_lstm_model((13, 1), num_classes)
model.summary()

In [None]:
callbacks = [
    EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
]

history = model.fit(
    X_train_lstm, y_train,
    validation_data=(X_val_lstm, y_val),
    epochs=50,
    batch_size=32,
    callbacks=callbacks,
    verbose=1
)

## 5. Evaluation

In [None]:
# Segment-level evaluation
test_loss, test_acc = model.evaluate(X_test_lstm, y_test, verbose=0)
print(f"Segment Test Accuracy: {test_acc*100:.2f}%")

# Plot history
plt.plot(history.history['accuracy'], label='Train')
plt.plot(history.history['val_accuracy'], label='Val')
plt.title('Accuracy')
plt.legend()
plt.show()

# Song-level evaluation
y_pred_prob = model.predict(X_test_lstm)
y_pred_classes = np.argmax(y_pred_prob, axis=1)
y_true_classes = np.argmax(y_test, axis=1)

results_df = pd.DataFrame({
    'song_id': groups_test.values,
    'true_label': y_true_classes,
    'pred_label': y_pred_classes
})

song_results = results_df.groupby('song_id').agg(lambda x: x.mode()[0])
song_acc = accuracy_score(song_results['true_label'], song_results['pred_label'])

print(f"\nSong-Level Accuracy: {song_acc*100:.2f}%")
print(classification_report(song_results['true_label'], song_results['pred_label'], target_names=label_encoder.classes_))