<a href="https://colab.research.google.com/github/SakshamSharma2006/Advance-Data-science/blob/main/LSTM(GTZAN).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
## Libraries
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Define Keras layers
model = tf.keras.models.Sequential()
Dense = tf.keras.layers.Dense
Dropout = tf.keras.layers.Dropout
LSTM = tf.keras.layers.LSTM
Bidirectional = tf.keras.layers.Bidirectional

## Dataset
# The dataset is a CSV file containing audio features extracted from 3-second segments of music tracks across various genres.
# Each original song is split into approximately 10 segments, treated as sequences for LSTM input.
# Features include means and variances of chroma, RMS, spectral centroid, etc., up to MFCCs, totaling 57 features per segment.
df = pd.read_csv('features_3_sec.csv')

## Create train and test data
# Parse song ID from filename (e.g., 'blues.00000' from 'blues.00000.0.wav')
df['song_id'] = df['filename'].apply(lambda x: '.'.join(x.split('.')[:2]))

# Features columns (exclude filename, length, label, song_id)
feature_cols = [col for col in df.columns if col not in ['filename', 'length', 'label', 'song_id']]

# Scale features
scaler = StandardScaler()
df[feature_cols] = scaler.fit_transform(df[feature_cols])

# Group by song_id, assuming ~10 segments per song
grouped = df.groupby('song_id')
sequences = []
labels = []

for song_id, group in grouped:
    # Sort by segment (last part of filename)
    group = group.sort_values(by='filename')
    seq = group[feature_cols].values  # Shape: (num_segments, num_features)
    label = group['label'].iloc[0]    # Same label for all segments
    sequences.append(seq)
    labels.append(label)

# Pad sequences to max length (assuming max 10 segments)
max_seq_len = max(len(seq) for seq in sequences)
padded_sequences = np.array([np.pad(seq, ((0, max_seq_len - len(seq)), (0, 0)), mode='constant') for seq in sequences])

# Encode labels
le = LabelEncoder()
encoded_labels = le.fit_transform(labels)

# Split into train/test
x_train, x_test, y_train, y_test = train_test_split(padded_sequences, encoded_labels, test_size=0.2, random_state=42)

# The features are scaled using StandardScaler for better model performance.
# Without scaling, the accuracy may decrease, and training could be less stable.
# The sequences are padded to ensure uniform input shape for LSTM.
# Input shape for LSTM will be (max_seq_len, num_features), e.g., (10, 57).

# Define the model
model.add(Bidirectional(LSTM(256, return_sequences=True, dropout=0.3), input_shape=(max_seq_len, len(feature_cols))))
model.add(Bidirectional(LSTM(256, dropout=0.3)))
model.add(Dropout(0.1))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(len(le.classes_), activation='softmax'))

# Define optimizer
optimizer = tf.keras.optimizers.AdamW(learning_rate=0.0005, weight_decay=1e-6)

# Compile model
model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# The specification of loss='sparse_categorical_crossentropy' is crucial as our targets are integers, not one-hot encoded categories.
# Train the model
model.fit(x_train, y_train, epochs=50, batch_size=32, validation_data=(x_test, y_test))

# Evaluate the model
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')

  super().__init__(**kwargs)


Epoch 1/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 231ms/step - accuracy: 0.2909 - loss: 2.0274 - val_accuracy: 0.4650 - val_loss: 1.5048
Epoch 2/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 230ms/step - accuracy: 0.5246 - loss: 1.3453 - val_accuracy: 0.5750 - val_loss: 1.2323
Epoch 3/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 200ms/step - accuracy: 0.6153 - loss: 1.0815 - val_accuracy: 0.6500 - val_loss: 1.0964
Epoch 4/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 244ms/step - accuracy: 0.6179 - loss: 1.0305 - val_accuracy: 0.6400 - val_loss: 1.0808
Epoch 5/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 235ms/step - accuracy: 0.6853 - loss: 0.9246 - val_accuracy: 0.6450 - val_loss: 1.0797
Epoch 6/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 193ms/step - accuracy: 0.7007 - loss: 0.8672 - val_accuracy: 0.7000 - val_loss: 0.9693
Epoch 7/50
[1m25/25[0m 