# Loading Data


In [12]:
import pickle
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import shutil
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import zipfile


In [None]:
# Split raw Beethoven MIDIs into train/dev/test (70/15/15)
# No Beethoven files were in the data set from Module 2
# ——— Configuration ———
BASE_DIR = Path('../data')
RAW_BEET = BASE_DIR / 'raw_Beethoven'
SPLITS   = ['train', 'dev', 'test']
LABEL    = 'beethoven'

# 1) Gather all .mid files under data/raw_Beethoven
all_beet = list(RAW_BEET.rglob('*.mid'))
print(f"Found {len(all_beet)} raw Beethoven files in {RAW_BEET}")

# 2) Split: 70% train, 15% dev, 15% test
train_files, temp_files = train_test_split(all_beet, test_size=0.30, random_state=42)
dev_files,   test_files = train_test_split(temp_files, test_size=0.50, random_state=42)

# 3) Create the target dirs and copy files
for split, files in zip(SPLITS, [train_files, dev_files, test_files]):
    target_dir = BASE_DIR / split / LABEL
    target_dir.mkdir(parents=True, exist_ok=True)
    for f in files:
        shutil.copy(f, target_dir / f.name)
    print(f"  • {split}: copied {len(files)} files to {target_dir}")

Found 215 raw Beethoven files in ../data/raw_Beethoven
  • train: copied 150 files to ../data/train/beethoven
  • dev: copied 32 files to ../data/dev/beethoven
  • test: copied 33 files to ../data/test/beethoven


In [23]:
# Paths
BASE_DIR   = Path('../data/processed_data')
OUTPUT_DIR = BASE_DIR                   

# Hyperparameters
batch_size    = 32
learning_rate = 1e-3
num_epochs    = 20

# Load data
with open(BASE_DIR / 'lstm_train.pkl','rb') as f:
    X_train, y_train = pickle.load(f)
with open(BASE_DIR / 'lstm_dev.pkl','rb')   as f:
    X_dev,   y_dev   = pickle.load(f)
with open(BASE_DIR / 'lstm_test.pkl','rb')  as f:
    X_test,  y_test  = pickle.load(f)

with open(BASE_DIR / 'label_encoder.pkl','rb') as f:
    le = pickle.load(f)
num_classes = len(le.classes_)
input_size  = X_train.shape[2]

if   torch.backends.mps.is_available(): device = torch.device('mps')
elif torch.cuda.is_available():         device = torch.device('cuda')
else:                                   device = torch.device('cpu')

print(f"Device: {device}")
print(f"Train: {X_train.shape}, Dev: {X_dev.shape}, Test: {X_test.shape}")
print(f"Classes: {le.classes_}")

Device: mps
Train: (480044, 50, 3), Dev: (41034, 50, 3), Test: (39295, 50, 3)
Classes: ['bach' 'beethoven' 'chopin' 'mozart']


In [24]:
# Convert to tensors
X_tr = torch.tensor(X_train, dtype=torch.float32).to(device)
y_tr = torch.tensor(y_train, dtype=torch.long).to(device)
X_dev_t = torch.tensor(X_dev, dtype=torch.float32).to(device)
y_dev_t = torch.tensor(y_dev, dtype=torch.long).to(device)

# Create TensorDatasets
train_ds = TensorDataset(X_tr, y_tr)
dev_ds   = TensorDataset(X_dev_t, y_dev_t)

# Create DataLoaders
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
dev_loader   = DataLoader(dev_ds,   batch_size=batch_size, shuffle=False)
#check check
batch = next(iter(train_loader))
print("One training batch X shape:", batch[0].shape)
print("One training batch y shape:", batch[1].shape)

One training batch X shape: torch.Size([32, 50, 3])
One training batch y shape: torch.Size([32])


In [25]:
class MusicLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=0.3
        )
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        # x: (batch, seq_len, input_size)
        lstm_out, _ = self.lstm(x)
        # Take the output from the last time step
        last_output = lstm_out[:, -1, :]     # shape: (batch, hidden_size)
        return self.fc(last_output)          # shape: (batch, num_classes)

# Hyperparameters for the model
hidden_size = 128
num_layers  = 2

model = MusicLSTM(input_size, hidden_size, num_layers, num_classes).to(device)
print(model)

MusicLSTM(
  (lstm): LSTM(3, 128, num_layers=2, batch_first=True, dropout=0.3)
  (fc): Linear(in_features=128, out_features=4, bias=True)
)


In [26]:
# Loss function
criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(model.parameters(), lr=learning_rate)

best_dev_acc = 0.0

print("Criterion:", criterion)
print("Optimizer:", optimizer)



Criterion: CrossEntropyLoss()
Optimizer: Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    decoupled_weight_decay: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 0
)


In [27]:
for epoch in range(1, num_epochs + 1):
    # ——— Training———
    model.train()
    train_losses = []
    for Xb, yb in train_loader:
        optimizer.zero_grad()            # reset gradients
        logits = model(Xb)               # forward pass
        loss   = criterion(logits, yb)   # compute loss
        loss.backward()                  # backpropagate
        optimizer.step()                 # update weights
        train_losses.append(loss.item())

    avg_train_loss = np.mean(train_losses)

    # ——— Validation———
    model.eval()
    dev_preds, dev_true, dev_losses = [], [], []
    with torch.no_grad():
        for Xb, yb in dev_loader:
            logits = model(Xb)
            loss   = criterion(logits, yb)
            dev_losses.append(loss.item())
            preds = logits.argmax(dim=1).cpu().numpy()
            dev_preds.extend(preds)
            dev_true.extend(yb.cpu().numpy())

    avg_dev_loss = np.mean(dev_losses)
    dev_acc      = accuracy_score(dev_true, dev_preds)

    #epoch results
    print(f"Epoch {epoch:2d}/{num_epochs}  "
          f"Train Loss: {avg_train_loss:.4f}  "
          f"Dev Loss: {avg_dev_loss:.4f}  "
          f"Dev Acc: {dev_acc:.4f}")

    # Save best model by dev accuracy
    if dev_acc > best_dev_acc:
        best_dev_acc = dev_acc
        torch.save(model.state_dict(), OUTPUT_DIR / 'best_model.pth')
        print("  🔖 New best model saved")

Epoch  1/20  Train Loss: 0.1066  Dev Loss: 0.0332  Dev Acc: 0.9871
  🔖 New best model saved
Epoch  2/20  Train Loss: 0.0486  Dev Loss: 0.0535  Dev Acc: 0.9806
Epoch  3/20  Train Loss: 0.0327  Dev Loss: 0.0278  Dev Acc: 0.9898
  🔖 New best model saved
Epoch  4/20  Train Loss: 0.0265  Dev Loss: 0.0498  Dev Acc: 0.9841
Epoch  5/20  Train Loss: 0.0232  Dev Loss: 0.0337  Dev Acc: 0.9892
Epoch  6/20  Train Loss: 0.0197  Dev Loss: 0.0241  Dev Acc: 0.9914
  🔖 New best model saved
Epoch  7/20  Train Loss: 0.0186  Dev Loss: 0.1263  Dev Acc: 0.9631
Epoch  8/20  Train Loss: 0.0185  Dev Loss: 0.0516  Dev Acc: 0.9885
Epoch  9/20  Train Loss: 0.0186  Dev Loss: 0.0547  Dev Acc: 0.9835
Epoch 10/20  Train Loss: 0.0171  Dev Loss: 0.0437  Dev Acc: 0.9864
Epoch 11/20  Train Loss: 0.0168  Dev Loss: 0.0259  Dev Acc: 0.9920
  🔖 New best model saved
Epoch 12/20  Train Loss: 0.0155  Dev Loss: 0.0521  Dev Acc: 0.9836
Epoch 13/20  Train Loss: 0.0172  Dev Loss: 0.0230  Dev Acc: 0.9920
Epoch 14/20  Train Loss: 0.01

In [28]:
# Load the best model checkpoint
best_model = MusicLSTM(input_size, hidden_size, num_layers, num_classes).to(device)
best_model.load_state_dict(torch.load(OUTPUT_DIR / 'best_model.pth'))
best_model.eval()

# Prepare test DataLoader
X_te = torch.tensor(X_test, dtype=torch.float32).to(device)
y_te = torch.tensor(y_test, dtype=torch.long).to(device)
test_loader = DataLoader(TensorDataset(X_te, y_te), batch_size=batch_size)

# Collect predictions & true labels
test_preds, test_true = [], []
with torch.no_grad():
    for Xb, yb in test_loader:
        logits = best_model(Xb)
        preds  = logits.argmax(dim=1).cpu().numpy()
        test_preds.extend(preds)
        test_true.extend(yb.cpu().numpy())

# accuracy
test_acc = accuracy_score(test_true, test_preds)
print(f"\nTest Accuracy: {test_acc:.4f}\n")

labels = list(range(num_classes))    # [0, 1, 2, 3]
names  = le.classes_                 # ['bach', 'beethoven', 'chopin', 'mozart']

print("Classification Report:\n",
      classification_report(
          test_true,
          test_preds,
          labels=labels,
          target_names=names,
          zero_division=0
      ))

print("\nConfusion Matrix:\n",
      confusion_matrix(
          test_true,
          test_preds,
          labels=labels
      ))


Test Accuracy: 0.9694

Classification Report:
               precision    recall  f1-score   support

        bach       0.98      1.00      0.99     11313
   beethoven       0.00      0.00      0.00         0
      chopin       0.99      0.88      0.93      9581
      mozart       0.95      1.00      0.97     18401

    accuracy                           0.97     39295
   macro avg       0.73      0.72      0.72     39295
weighted avg       0.97      0.97      0.97     39295


Confusion Matrix:
 [[11312     0     0     1]
 [    0     0     0     0]
 [  225     0  8445   911]
 [    0     0    66 18335]]
