In [10]:
from model import ChordModelV2
from data_processing import ChordDataset

0003
       Timestamp  Chroma_1  Chroma_2  Chroma_3  Chroma_4  Chroma_5  Chroma_6  \
2254  104.675556  1.214170  0.000000  0.561130  0.377481  0.518743  0.191375   
2853  132.493061  0.604096  0.323584  0.000000  0.000000  0.420399  0.249410   
377    17.507846  1.850250  0.000000  0.000000  0.000000  0.418672  0.000000   
2193  101.842721  0.004824  0.000000  0.579948  0.884349  0.520062  0.000000   
2147   99.706485  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   

      Chroma_7  Chroma_8  Chroma_9  Chroma_10  Chroma_11  Chroma_12  Chord  
2254  0.000000  0.000000  0.760100   1.126270        0.0   0.000000  A:maj  
2853  0.617099  1.046600  1.208380   0.398198        0.0   0.000000  A:min  
377   0.000000  0.114854  0.071723   0.000000        0.0   0.000000  A:min  
2193  0.436535  1.316590  0.253221   0.662340        0.0   0.020372  C:maj  
2147  0.000000  0.000000  0.000000   0.000000        0.0   0.000000  C:maj  
0004
       Timestamp  Chroma_1  Chroma_2  Chroma_3 

In [20]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
torch.serialization.add_safe_globals([ChordDataset])

In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
data_path = "chord_dataset.pt"

try:

    dataset = torch.load(data_path)
    print("Dataset successfully loaded")
except FileNotFoundError:
    print("Dataset not found")
    exit()

Dataset successfully loaded


In [None]:
#Check dataset
for i in range(10):
    data = next(iter(dataset))
    print(data)

In [23]:
model = ChordModelV2()
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [24]:
#Split dataset

train_size = int(0.8 * len(dataset))
val_size = int(0.1 * len(dataset))
test_size = len(dataset) - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [25]:
from tqdm import tqdm

#Training loop
num_epochs = 20
best_val_loss = float('inf') #initialize with infinity
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.1, verbose=True)

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0

    for inputs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        inputs, labels = inputs.to(device), labels.to(device)

        #reset for next iteration
        optimizer.zero_grad()
        outputs = model(inputs)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
    
    #Loss for this epoch
    epoch_loss = train_loss / len(train_loader)

    #Validation
    model.eval()
    val_loss = 0.0
    #Don't change gradients
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            val_loss += loss.item()

    val_loss /= len(val_loader)
    scheduler.step(val_loss) #Reduce learning rate if val_loss doesn't improve

    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {epoch_loss:.4f}, Val Loss: {val_loss:.4f}")

    #Save checkpoint
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), "checkpoint_model.pt")
        print("Checkpoint saved")

torch.save(model.state_dict(), "final_model.pt")
print("Training complete, model saved")


Epoch 1/20:   1%|          | 659/103484 [00:08<22:24, 76.50it/s]


KeyboardInterrupt: 