**Introduction**

In this assignment, you will implement a Recurrent Neural Network (RNN) for music generation.  

For this, you will use the Irish Massive ABC Notation (IrishMAN) dataset, which contains a collection of Irish folk tunes in ABC notation.  

The goal is to train an RNN to generate new tunes based on the patterns learned from the dataset.

**Dataset:**  
IrishMAN Dataset can be found at [https://huggingface.co/datasets/sander-wood/irishman](https://huggingface.co/datasets/sander-wood/irishman).


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from datasets import load_dataset
import random
from datetime import datetime
start_time = datetime.now()

# 0. Control the data size for quick testing
SAMPLE_SIZE = 50000     # number of tunes to use (set to None for full dataset)

# 1. Set parameters
SEQ_LENGTH = 75       # number of characters per training sequence
BATCH_SIZE = 32       # how many sequences per batch
EMBED_DIM = 64        # size of character embeddings
HIDDEN_DIM = 128      # size of LSTM hidden state
NUM_LAYERS = 1        # number of LSTM layers
LEARNING_RATE = 0.001 # optimizer learning rate
EPOCHS = 20            # number of passes through the data
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 2. Load data from Hugging Face
print(f"Loading data on {DEVICE}...")
dataset = load_dataset('sander-wood/irishman', split='train')
texts = dataset['abc notation']  # list of music strings
if SAMPLE_SIZE is not None:
    texts = texts[:SAMPLE_SIZE]   # keep only a small subset
print(f"Using {len(texts)} tunes for training")

# 3. Build a character-level vocabulary
vocab = sorted(set(''.join(texts)))
char2idx = {ch: i for i, ch in enumerate(vocab)}
idx2char = {i: ch for ch, i in char2idx.items()}
VocabSize = len(vocab)
print(f"Vocab size: {VocabSize}")

# 4. Create a simple Dataset class
class MusicDataset(Dataset):
    def __init__(self, texts):
        self.data = []
        for t in texts:
            # turn text into list of indices
            seq = [char2idx.get(c, 0) for c in t]
            # slice into pairs of (input, target)
            for i in range(len(seq) - SEQ_LENGTH):
                inp = seq[i:i+SEQ_LENGTH]
                tgt = seq[i+1:i+SEQ_LENGTH+1]
                self.data.append((inp, tgt))
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        inp, tgt = self.data[idx]
        return torch.tensor(inp), torch.tensor(tgt)

# 5. Split into train/val
random.shuffle(texts)
split_idx = int(0.9 * len(texts))
train_texts = texts[:split_idx]
val_texts = texts[split_idx:]
train_ds = MusicDataset(train_texts)
val_ds = MusicDataset(val_texts)
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE)

# 6. Define the LSTM model
class SimpleRNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.embed = nn.Embedding(VocabSize, EMBED_DIM)
        self.lstm = nn.LSTM(EMBED_DIM, HIDDEN_DIM, NUM_LAYERS, batch_first=True)
        self.fc = nn.Linear(HIDDEN_DIM, VocabSize)
    def forward(self, x, hidden=None):
        x = self.embed(x)
        out, hidden = self.lstm(x, hidden)
        out = out.reshape(-1, HIDDEN_DIM)
        logits = self.fc(out)
        return logits, hidden

model = SimpleRNN().to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion = nn.CrossEntropyLoss()

# 7. Training loop
# Early stopping parameters
patience = 5              # how many epochs to wait without improvement
best_val_loss = float('inf')
epochs_no_improve = 0
early_stop = False

for epoch in range(1, EPOCHS+1):
    model.train()
    total_loss = 0
    for inputs, targets in train_loader:
        inputs = inputs.to(DEVICE)
        targets = targets.to(DEVICE).view(-1)
        optimizer.zero_grad()
        logits, _ = model(inputs)
        loss = criterion(logits, targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    avg_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch}/{EPOCHS}, Loss: {avg_loss:.4f}")

    # Validation step
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs = inputs.to(DEVICE)
            targets = targets.to(DEVICE).view(-1)
            logits, _ = model(inputs)
            val_loss += criterion(logits, targets).item()
    val_loss /= len(val_loader)
    print(f"Validation Loss: {val_loss:.4f}")

    # Check for improvement
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        epochs_no_improve = 0
        # Save the best model here
        torch.save(model.state_dict(), "best_model.pth")
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= patience:
            print(f"Early stopping triggered after {epoch} epochs.")
            early_stop = True

    if early_stop:
        break
# 8. Generate a sample sequence with dynamic M, L, K
model.eval()
# Keep X and T fixed, but choose M, L, K randomly
M_options = ["M:6/8", "M:4/4", "M:3/4"]
L_options = ["L:1/8", "L:1/16", "L:1/4"]
K_options = ["K:D", "K:G", "K:C"]
# Build header
start = f"""X:1
T:Generated Tune
{random.choice(M_options)}
{random.choice(L_options)}
{random.choice(K_options)}
"""
# Convert header to indices
seq = [char2idx.get(c, 0) for c in start]
hidden = None
# Generate additional 200 characters of music
for _ in range(800):
    inp_seq = seq[-SEQ_LENGTH:]
    inp = torch.tensor(inp_seq).unsqueeze(0).to(DEVICE)
    logits, hidden = model(inp, hidden)
    probs = torch.softmax(logits[-1], dim=0)
    idx = torch.multinomial(probs, 1).item()
    seq.append(idx)

print("Training finished in:", datetime.now() - start_time)


# Save the model
torch.save({
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
}, 'best_model.pth')




# Convert back to characters and print
gen = ''.join(idx2char[i] for i in seq)
print("Generated ABC notation with dynamic headers:", gen)

Loading data on cuda...
Using 50000 tunes for training
Vocab size: 95
