In [None]:
import pickle
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from pathlib import Path
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix



In [4]:

BASE_DIR = Path('../data/processed_data')

# Sequence length  (match your LSTM window size)
WINDOW_LEN = 50

# Pitch range for piano-roll
PITCH_START = 21
PITCH_END   = 108
NUM_PITCHES = PITCH_END - PITCH_START + 1  # 88

### Define Piano-Roll Conversion

In [5]:
def seq_to_pianoroll(seq,
                     pitch_start=PITCH_START,
                     num_pitches=NUM_PITCHES):
    """
    seq: np.array of shape (WINDOW_LEN, 3), where seq[:,0] = pitch.
    Returns a binary piano-roll: shape (1, num_pitches, WINDOW_LEN).
    """
    pr = np.zeros((num_pitches, seq.shape[0]), dtype=np.float32)
    for t, note in enumerate(seq):
        p = int(note[0])
        pr[p - pitch_start, t] = 1.0
    return pr[np.newaxis, :, :] 

### Load LSTM Windows & Build CNN Features

In [6]:
# Load LSTM windows + labels
with open(BASE_DIR/'lstm_train.pkl','rb') as f:
    X_train_l, y_train = pickle.load(f)
with open(BASE_DIR/'lstm_dev.pkl','rb') as f:
    X_dev_l,   y_dev   = pickle.load(f)
with open(BASE_DIR/'lstm_test.pkl','rb') as f:
    X_test_l,  y_test  = pickle.load(f)
with open(BASE_DIR/'label_encoder.pkl','rb') as f:
    le = pickle.load(f)
print("Label classes:", le.classes_)
# Convert each window to a piano-roll
X_train_c = np.stack([seq_to_pianoroll(seq) for seq in X_train_l], axis=0)
X_dev_c   = np.stack([seq_to_pianoroll(seq) for seq in X_dev_l],   axis=0)
X_test_c  = np.stack([seq_to_pianoroll(seq) for seq in X_test_l],  axis=0)

print("LSTM shapes:", X_train_l.shape, X_dev_l.shape, X_test_l.shape)
print("CNN shapes: ", X_train_c.shape, X_dev_c.shape, X_test_c.shape)
print("Labels train/dev/test:", y_train.shape, y_dev.shape, y_test.shape)


Label classes: ['bach' 'beethoven' 'chopin' 'mozart']
LSTM shapes: (480044, 50, 3) (41034, 50, 3) (39295, 50, 3)
CNN shapes:  (480044, 1, 88, 50) (41034, 1, 88, 50) (39295, 1, 88, 50)
Labels train/dev/test: (480044,) (41034,) (39295,)


In [13]:

np.save(BASE_DIR/'cnn_train_data.npy',  X_train_c)
np.save(BASE_DIR/'cnn_train_labels.npy', y_train)
np.save(BASE_DIR/'cnn_dev_data.npy',    X_dev_c)
np.save(BASE_DIR/'cnn_dev_labels.npy',   y_dev)
np.save(BASE_DIR/'cnn_test_data.npy',   X_test_c)
np.save(BASE_DIR/'cnn_test_labels.npy',  y_test)
print("✅ CNN feature files written.")

✅ CNN feature files written.


### Create dataloaders

In [12]:
# Hybrid Dataset & DataLoaders
import torch
from torch.utils.data import Dataset, DataLoader

class HybridDataset(Dataset):
    def __init__(self, lstm_data, cnn_data, labels):
        self.lstm = torch.tensor(lstm_data, dtype=torch.float32)
        self.cnn  = torch.tensor(cnn_data,  dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.lstm[idx], self.cnn[idx], self.labels[idx]

# Build datasets
train_ds = HybridDataset(X_train_l, X_train_c, y_train)
dev_ds   = HybridDataset(X_dev_l,   X_dev_c,   y_dev)
test_ds  = HybridDataset(X_test_l,  X_test_c,  y_test)

# Create loaders
batch_size = 32
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
dev_loader   = DataLoader(dev_ds,   batch_size=batch_size)
test_loader  = DataLoader(test_ds,  batch_size=batch_size)


lstm_b, cnn_b, y_b = next(iter(train_loader))
print("LSTM batch shape:", lstm_b.shape)   
print("CNN batch shape: ", cnn_b.shape)    
print("Label batch shape:", y_b.shape)    

LSTM batch shape: torch.Size([32, 50, 3])
CNN batch shape:  torch.Size([32, 1, 88, 50])
Label batch shape: torch.Size([32])


### Defining a Hybrid Model

In [None]:
# Device selection 
device = torch.device('mps' if torch.backends.mps.is_available() 
                      else 'cuda' if torch.cuda.is_available() 
                      else 'cpu')
print("Using device:", device)

class HybridNet(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        # CNN branch
        self.cnn_branch = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, padding=1),  
            nn.ReLU(),
            nn.MaxPool2d((2,2)),                         
            nn.Conv2d(16,32, kernel_size=3, padding=1),  
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((1,1))                
        )
        # LSTM branch
        self.lstm_branch = nn.LSTM(
            input_size=3,
            hidden_size=64,
            num_layers=1,
            batch_first=True
        )
        # classifier
        self.fc = nn.Linear(32 + 64, num_classes)

    def forward(self, x_lstm, x_cnn):
        # LSTM path
        out_l, _ = self.lstm_branch(x_lstm)     
        feat_l   = out_l[:, -1, :]             
        # CNN path
        feat_c   = self.cnn_branch(x_cnn)      
        feat_c   = feat_c.view(feat_c.size(0), -1)  
        # Concatenate and classify
        combined = torch.cat([feat_l, feat_c], dim=1)  
        return self.fc(combined)                       

# Instantiate
num_classes = len(le.classes_) 
model = HybridNet(num_classes).to(device)
print(model)

Using device: mps
HybridNet(
  (cnn_branch): Sequential(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): AdaptiveAvgPool2d(output_size=(1, 1))
  )
  (lstm_branch): LSTM(3, 64, batch_first=True)
  (fc): Linear(in_features=96, out_features=4, bias=True)
)


### Training Setup

In [None]:
# Hyperparameters
batch_size    = 32
learning_rate = 1e-3
num_epochs    = 20

# Loss and optimizer
criterion     = nn.CrossEntropyLoss()
optimizer     = optim.Adam(model.parameters(), lr=learning_rate)

# Track best dev accuracy
best_dev_acc  = 0.0

print(f"Training for {num_epochs} epochs on device {device}")

Training for 20 epochs on device mps


### Hybrid Model Training Loop

In [13]:
for epoch in range(1, num_epochs + 1):
    # ——— Training ———
    model.train()
    train_losses = []
    for x_lstm, x_cnn, yb in train_loader:
        x_lstm, x_cnn, yb = x_lstm.to(device), x_cnn.to(device), yb.to(device)
        optimizer.zero_grad()
        logits = model(x_lstm, x_cnn)
        loss   = criterion(logits, yb)
        loss.backward()
        optimizer.step()
        train_losses.append(loss.item())

    avg_train_loss = np.mean(train_losses)

    # ——— Validation ———
    model.eval()
    val_preds, val_true, val_losses = [], [], []
    with torch.no_grad():
        for x_lstm, x_cnn, yb in dev_loader:
            x_lstm, x_cnn, yb = x_lstm.to(device), x_cnn.to(device), yb.to(device)
            logits = model(x_lstm, x_cnn)
            loss   = criterion(logits, yb)
            val_losses.append(loss.item())
            preds = logits.argmax(dim=1).cpu().numpy()
            val_preds.extend(preds)
            val_true.extend(yb.cpu().numpy())

    avg_val_loss = np.mean(val_losses)
    val_acc      = accuracy_score(val_true, val_preds)

    print(f"Epoch {epoch:2d}/{num_epochs}  "
          f"Train Loss: {avg_train_loss:.4f}  "
          f"Val Loss: {avg_val_loss:.4f}  "
          f"Val Acc: {val_acc:.4f}")

    # Save best model
    if val_acc > best_dev_acc:
        best_dev_acc = val_acc
        torch.save(model.state_dict(), BASE_DIR / 'best_hybrid.pth')
        print("  🔖 New best hybrid model saved")

Epoch  1/20  Train Loss: 0.1003  Val Loss: 0.0239  Val Acc: 0.9922
  🔖 New best hybrid model saved
Epoch  2/20  Train Loss: 0.0429  Val Loss: 0.0329  Val Acc: 0.9903
Epoch  3/20  Train Loss: 0.0317  Val Loss: 0.0720  Val Acc: 0.9731
Epoch  4/20  Train Loss: 0.0249  Val Loss: 0.0249  Val Acc: 0.9934
  🔖 New best hybrid model saved
Epoch  5/20  Train Loss: 0.0203  Val Loss: 0.0265  Val Acc: 0.9911
Epoch  6/20  Train Loss: 0.0172  Val Loss: 0.0238  Val Acc: 0.9905
Epoch  7/20  Train Loss: 0.0155  Val Loss: 0.0266  Val Acc: 0.9916
Epoch  8/20  Train Loss: 0.0144  Val Loss: 0.0365  Val Acc: 0.9910
Epoch  9/20  Train Loss: 0.0132  Val Loss: 0.0313  Val Acc: 0.9919
Epoch 10/20  Train Loss: 0.0118  Val Loss: 0.0342  Val Acc: 0.9916
Epoch 11/20  Train Loss: 0.0109  Val Loss: 0.0328  Val Acc: 0.9927
Epoch 12/20  Train Loss: 0.0104  Val Loss: 0.0240  Val Acc: 0.9929
Epoch 13/20  Train Loss: 0.0099  Val Loss: 0.0377  Val Acc: 0.9910
Epoch 14/20  Train Loss: 0.0089  Val Loss: 0.0335  Val Acc: 0.991