## This jupyter notebook was used to finetune the resnet model for guitar chords. 

In [1]:
import torch
import torchvision.models as models
import os
from torchvision.datasets import ImageFolder
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from tqdm import tqdm

In [6]:
resnet = models.resnet152(weights="ResNet152_Weights.DEFAULT")

num_classes = 14

resnet.fc = torch.nn.Linear(resnet.fc.in_features, num_classes)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(resnet.parameters(), lr=0.00175, momentum=0.9, weight_decay=0.01)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
resnet = resnet.to(device)

Downloading: "https://download.pytorch.org/models/resnet152-f82ba261.pth" to C:\Users\slyft/.cache\torch\hub\checkpoints\resnet152-f82ba261.pth
100%|██████████| 230M/230M [00:21<00:00, 11.4MB/s] 


In [7]:
# Define the paths to the training and testing directories
train_dir = 'data/training'  # Path to the folder containing subfolders of training data
test_dir = 'data/test'  # Path to the folder containing subfolders of testing data

train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Apply the transformations when loading the datasets
train_dataset = ImageFolder(train_dir, transform=train_transform)
validation_dataset = ImageFolder(test_dir, transform=val_transform)

batch_size = 64  # Define your preferred batch size

# Create data loaders for the training and validation sets
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
validation_dataloader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False)

In [4]:
num_epochs = 20  # Adjust the number of training epochs as needed

best_val_loss = float('inf')

for epoch in range(num_epochs):
    running_loss = 0.0  # Initialize the running loss for the epoch

    # Wrap the train_dataloader with tqdm for the loading bar
    train_dataloader_with_bar = tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False)
    
    # Training phase
    resnet.train()
    for images, labels in train_dataloader_with_bar:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = resnet(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()  # Accumulate the loss
        
        # Update the loading bar with the current loss value
        train_dataloader_with_bar.set_postfix({'Loss': loss.item()})
        
    epoch_loss = running_loss / len(train_dataloader)  # Calculate the average epoch loss
    print(f"Epoch {epoch+1}/{num_epochs}, Training Loss: {epoch_loss:.4f}")

    # Validation phase
    resnet.eval()  # Set the model to evaluation mode
    running_val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in validation_dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = resnet(images)
            loss = criterion(outputs, labels)
            running_val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss = running_val_loss / len(validation_dataloader)
    val_accuracy = 100 * correct / total

    print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%")

    # Save the model if validation loss decreased

    model_folder = 'models'
    if val_loss < best_val_loss:
        # Create the model folder if it doesn't exist
        os.makedirs(model_folder, exist_ok=True)
        
        # Define the file path within the model folder
        model_path = os.path.join(model_folder, "best_model.pth")
        
        # Save the model to the specified file path
        torch.save(resnet.state_dict(), model_path)
        
        best_val_loss = val_loss


Epoch 1/20:   0%|          | 0/34 [00:00<?, ?it/s]

                                                                      

Epoch 1/20, Training Loss: 2.6175
Validation Loss: 2.5052, Validation Accuracy: 18.14%


                                                                      

Epoch 2/20, Training Loss: 2.3381
Validation Loss: 2.0930, Validation Accuracy: 30.79%


                                                                      

Epoch 3/20, Training Loss: 1.8375
Validation Loss: 1.5522, Validation Accuracy: 49.84%


                                                                       

Epoch 4/20, Training Loss: 1.1876
Validation Loss: 0.8188, Validation Accuracy: 75.04%


                                                                       

Epoch 5/20, Training Loss: 0.6681
Validation Loss: 0.5940, Validation Accuracy: 83.42%


                                                                       

Epoch 6/20, Training Loss: 0.3883
Validation Loss: 0.3052, Validation Accuracy: 92.69%


                                                                       

Epoch 7/20, Training Loss: 0.2236
Validation Loss: 0.1984, Validation Accuracy: 95.73%


                                                                        

Epoch 8/20, Training Loss: 0.1351
Validation Loss: 0.1447, Validation Accuracy: 95.98%


                                                                        

Epoch 9/20, Training Loss: 0.1071
Validation Loss: 0.1180, Validation Accuracy: 97.13%


                                                                         

Epoch 10/20, Training Loss: 0.0952
Validation Loss: 0.0761, Validation Accuracy: 98.28%


                                                                         

Epoch 11/20, Training Loss: 0.0829
Validation Loss: 0.0959, Validation Accuracy: 97.45%


                                                                         

Epoch 12/20, Training Loss: 0.0764
Validation Loss: 0.0961, Validation Accuracy: 97.29%


                                                                         

Epoch 13/20, Training Loss: 0.0663
Validation Loss: 0.0767, Validation Accuracy: 98.28%


                                                                         

Epoch 14/20, Training Loss: 0.0567
Validation Loss: 0.0871, Validation Accuracy: 97.78%


                                                                         

Epoch 15/20, Training Loss: 0.0377
Validation Loss: 0.0707, Validation Accuracy: 98.44%


                                                                         

Epoch 16/20, Training Loss: 0.0330
Validation Loss: 0.0606, Validation Accuracy: 97.70%


                                                                         

Epoch 17/20, Training Loss: 0.0297
Validation Loss: 0.0682, Validation Accuracy: 97.95%


                                                                          

Epoch 18/20, Training Loss: 0.0250
Validation Loss: 0.0717, Validation Accuracy: 97.70%


                                                                          

Epoch 19/20, Training Loss: 0.0329
Validation Loss: 0.0633, Validation Accuracy: 98.19%


                                                                          

Epoch 20/20, Training Loss: 0.0215
Validation Loss: 0.0762, Validation Accuracy: 97.37%


## Interpretation and Validation: 

The training loss decreases significantly over the epochs, from 2.6175 in the first epoch to 0.0215 in the last epoch, indicating that the network is learning effectively from the training data.

The validation loss also decreases over the epochs, from 2.5052 in the first epoch to 0.0762 in the last epoch. However, the validation loss does not decrease as steadily as the training loss, and even increases in some epochs (for example, between the 10th and 11th epoch, and the 19th and 20th epoch). This could potentially suggest the model is starting to overfit to the training data, as its performance on unseen data (validation set) is getting worse while its performance on the training data continues to improve.

The validation accuracy increases over the epochs, from 18.14% in the first epoch to 97.37% in the last epoch, indicating that the network's ability to correctly classify unseen images is improving. However, similar to the validation loss, the validation accuracy does not steadily increase, and decreases in some epochs, which could potentially suggest overfitting. Some techniques have been applied in the code above to counter overfitting:
- Data augmentation
- L2 Regularization
- Saving model with lowest validation loss 
- Hyperparameter tuning - 
    this process undoubtedly required the most time. The fine-tuning of hyperparameters, such as the batch size, learning rate, and number of epochs involved incremental modifications and observing their respective impact on the model's performance.


Overall, the network shows a decent performance with high accuracy on the validation set. An additional test with the 'unknown' dataset provides a more unbiased evaluation of the network's abilities (see below).

In [5]:
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import torchvision.transforms as transforms

# Load the fine-tuned model parameters
state_dict = torch.load("models/best_model.pth")

# Load the state dict into the resnet model
resnet.load_state_dict(state_dict)


# Define the path to the test directory
unknown_dir = 'data/unknown'  # Replace with the path to your folder containing subfolders of test data

# Define the transformations to apply to the images
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create the test dataset and data loader
unknown_dataset = ImageFolder(unknown_dir, transform=transform)
unknown_dataloader = DataLoader(unknown_dataset, batch_size=32, shuffle=False)

# Ensure model is in evaluation mode
resnet.eval()

correct = 0
total = 0

# Testing phase
with torch.no_grad():
    for images, labels in unknown_dataloader:
        images, labels = images.to(device), labels.to(device)
        outputs = resnet(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Test accuracy: {accuracy}%")


Test accuracy: 71.42857142857143%
