In [223]:
# data loading
from sklearn.preprocessing import LabelEncoder
import os
import torch
import numpy as np

# mount drive
from google.colab import drive
drive.mount('/content/drive')

def load_dataset(base_dir):
    features = []
    labels = []
    for genre in os.listdir(base_dir):
        genre_dir = os.path.join(base_dir, genre)
        if not os.path.isdir(genre_dir):
            continue
        for fname in os.listdir(genre_dir):
            if fname.endswith('.pt'):
                path = os.path.join(genre_dir, fname)
                tensor = torch.load(path).flatten().numpy()
                features.append(tensor)
                labels.append(genre)
    return np.array(features), np.array(labels)

file_dir = "/content/drive/MyDrive/split_data_small (1)/"

X_train, y_train = load_dataset(file_dir + "train")
X_val, y_val = load_dataset(file_dir + "val")
X_test, y_test = load_dataset(file_dir + "test")

# Encode labels
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_val = le.transform(y_val)
y_test = le.transform(y_test)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [224]:
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset, SubsetRandomSampler

def get_data_loader(
    target_classes,
    batch_size=64,
    root_dir="/content/drive/MyDrive/split_data_small (1)/",
    random_seed=1000
):
    """
    Adapted for music genre classification using pre-processed .pt files

    Args:
        target_classes: List of genre classes to include (or None for all)
        batch_size: samples per batch
        root_dir: path to directory containing train/val/test folders
        random_seed: for reproducible results

    Returns:
        train_loader, val_loader, test_loader, classes
    """

    # Set random seed for reproducibility
    torch.manual_seed(random_seed)
    np.random.seed(random_seed)

    # Load all datasets using existing function
    X_train, y_train = load_dataset(root_dir + "train")
    X_val, y_val = load_dataset(root_dir + "val")
    X_test, y_test = load_dataset(root_dir + "test")

    # Encode labels using existing label encoder
    y_train_encoded = le.fit_transform(y_train)
    y_val_encoded = le.transform(y_val)
    y_test_encoded = le.transform(y_test)

    # Convert to PyTorch tensors
    X_train_tensor = torch.FloatTensor(X_train)
    y_train_tensor = torch.LongTensor(y_train_encoded)
    X_val_tensor = torch.FloatTensor(X_val)
    y_val_tensor = torch.LongTensor(y_val_encoded)
    X_test_tensor = torch.FloatTensor(X_test)
    y_test_tensor = torch.LongTensor(y_test_encoded)

    # Reshape to 224x224 images (inputs are confirmed to be this size)
    X_train_tensor = X_train_tensor.view(-1, 1, 224, 224)
    X_val_tensor = X_val_tensor.view(-1, 1, 224, 224)
    X_test_tensor = X_test_tensor.view(-1, 1, 224, 224)

    # Create TensorDatasets
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

    # Create DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # Get class names from label encoder
    classes = le.classes_.tolist()

    # Filter by target_classes if specified
    if target_classes is not None:
        # This would require more complex filtering logic
        # For now, we'll just return all classes
        pass

    print(f"Loaded {len(train_dataset)} training, {len(val_dataset)} validation, {len(test_dataset)} test samples")
    print(f"Classes: {classes}")

    return train_loader, val_loader, test_loader, classes

In [225]:
def evaluate_multiclass(model, dataloader, criterion):
    model.eval()
    total_loss = 0.0
    total_err = 0
    total_samples = 0

    with torch.no_grad():
        for inputs, labels in dataloader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            preds = torch.argmax(outputs, dim=1)
            total_err += (preds != labels).sum().item()
            total_loss += loss.item()
            total_samples += labels.size(0)

    avg_err = total_err / total_samples
    avg_loss = total_loss / len(dataloader)
    return avg_err, avg_loss

from sklearn.metrics import classification_report

def evaluate_per_class(model, dataloader, le, criterion):
    model.eval()
    all_preds = []
    all_labels = []
    total_loss = 0.0

    with torch.no_grad():
        for inputs, labels in dataloader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            preds = torch.argmax(outputs, dim=1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            total_loss += loss.item()

    avg_loss = total_loss / len(dataloader)
    report = classification_report(all_labels, all_preds, target_names=le.classes_)
    print(f"\nTest Set Classification Report (Avg Loss: {avg_loss:.4f}):\n")
    print(report)

def get_model_name(name, batch_size, learning_rate, epoch, base_dir="models"):
    # Create base directory if it doesn't exist
    os.makedirs(base_dir, exist_ok=True)

    # Format model path
    path = os.path.join(base_dir, "model_{0}_bs{1}_lr{2}_epoch{3}.pt".format(
        name, batch_size, learning_rate, epoch))

    return path

In [226]:
#stuff for model
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import time

# normalize data (penalizing weights so to speak)
from sklearn.utils.class_weight import compute_class_weight

# activation function
import numpy as np
from scipy.special import erf

#(dont use this for now; guarantee values w/ reLU first)
def activation(x):
  return -x * erf(np.exp(-x))

# pooling
def pool(dim, kernel, stride = 1, padding = 0):
  out = ((dim + 2*padding - kernel) // stride ) + 1
  return out

# model
class GlizzyNet(nn.Module):
    def __init__(self, num_classes):
        super(GlizzyNet, self).__init__()
        self.name = "GlizzyNet"

        # Sequential js blocks everythign tgether so its a little easier to read
        self.blockOne = nn.Sequential(
            nn.Conv2d(1, 5, 5),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
        )

        self.blockTwo = nn.Sequential(
            nn.Conv2d(5, 10, 5),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
        )

        self.blockThree = nn.Sequential(
            nn.Conv2d(10, 15, 5),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
        )

        # find the output size; uh js change input_dim (ruian)
        dim = 224
        for i in range (3):
          # conv(kernel=5, stride=0, pad=0)
          dim = pool(dim, kernel=5)  # conv layer
          dim = pool(dim, kernel=2, stride=2)  # pool layer


        self.fc1 = nn.Linear(15 * dim * dim, 32) # 32 hidden neurons like in lab 2
        # now connect 32 to 9 (instead of 32 to 1 in lab 2)
        self.fc2 = nn.Linear(32, num_classes)

    # the forward pass function
    def forward(self, x):
        x = self.blockOne(x)
        x = self.blockTwo(x)
        x = self.blockThree(x)
        # flatten
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

#training function
def train_net(net, batch_size=64, learning_rate=0.01, num_epochs=30, target_classes=None):
    ########################################################################
    # Train a classifier on music genres
    if target_classes is None:
        raise ValueError("target_classes must be specified for music genre classification")

    torch.manual_seed(1000)

    ########################################################################
    # Obtain the PyTorch data loader objects to load batches of the datasets
    train_loader, val_loader, test_loader, classes = get_data_loader(target_classes, batch_size)

    ########################################################################
    # Define the Loss function and optimizer
    # Compute weights
    class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
    class_weights_tensor = torch.FloatTensor(class_weights)

    # Use weighted loss
    criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)
    # optimizer
    optimizer = optim.Adam(net.parameters(), lr = learning_rate) # idk this is js a placeholder value for the lr im not acc too sure which is good

    ########################################################################
    # Set up arrays to store training/validation metrics
    train_err = np.zeros(num_epochs)
    train_loss = np.zeros(num_epochs)
    val_err = np.zeros(num_epochs)
    val_loss = np.zeros(num_epochs)

    ########################################################################
    # Train the network
    start_time = time.time()
    for epoch in range(num_epochs):
        net.train()
        total_train_loss = 0.0
        total_train_err = 0.0
        total_samples = 0

        for i, (inputs, labels) in enumerate(train_loader):
            optimizer.zero_grad()
            outputs = net(inputs)  # shape: [batch_size, num_classes]
            loss = criterion(outputs, labels)  # labels are class indices [0, ..., C-1]
            loss.backward()
            optimizer.step()

            # Compute number of incorrect predictions
            preds = torch.argmax(outputs, dim=1)
            total_train_err += (preds != labels).sum().item()
            total_train_loss += loss.item()
            total_samples += labels.size(0)

        train_err[epoch] = total_train_err / total_samples
        train_loss[epoch] = total_train_loss / (i + 1)

        # Evaluate on validation set
        val_err[epoch], val_loss[epoch] = evaluate_multiclass(net, val_loader, criterion)

        print(f"Epoch {epoch+1}: "
              f"Train err: {train_err[epoch]:.4f}, Train loss: {train_loss[epoch]:.4f} | "
              f"Val err: {val_err[epoch]:.4f}, Val loss: {val_loss[epoch]:.4f}")

        # Save checkpoint
        model_path = get_model_name(net.name, batch_size, learning_rate, epoch)
        torch.save(net.state_dict(), model_path)

    print('Finished Training')
    print(f"Total time elapsed: {time.time() - start_time:.2f} seconds")

    # Save logs for plotting
    np.savetxt(f"{model_path}_train_err.csv", train_err)
    np.savetxt(f"{model_path}_train_loss.csv", train_loss)
    np.savetxt(f"{model_path}_val_err.csv", val_err)
    np.savetxt(f"{model_path}_val_loss.csv", val_loss)

In [None]:
net = GlizzyNet(num_classes=8)
batch_size = 64
learning_rate = 0.0005
num_epochs = 30  # Start with fewer epochs for testing
target_classes = le.classes_.tolist()  # Use all classes
train_net(net, batch_size=batch_size, learning_rate=learning_rate, num_epochs=num_epochs, target_classes=target_classes)

Loaded 470 training, 101 validation, 101 test samples
Classes: ['Electronic', 'Experimental', 'Folk', 'Hip-Hop', 'Instrumental', 'International', 'Pop', 'Rock']
Epoch 1: Train err: 0.8809, Train loss: 3.8881 | Val err: 0.8911, Val loss: 2.0822
Epoch 2: Train err: 0.8638, Train loss: 2.0833 | Val err: 0.8911, Val loss: 2.0833
Epoch 3: Train err: 0.8957, Train loss: 2.0908 | Val err: 0.8911, Val loss: 2.0796
Epoch 4: Train err: 0.8915, Train loss: 2.0860 | Val err: 0.8911, Val loss: 2.0843
Epoch 5: Train err: 0.8894, Train loss: 2.0821 | Val err: 0.8911, Val loss: 2.0748
Epoch 6: Train err: 0.8574, Train loss: 2.0652 | Val err: 0.8317, Val loss: 2.0562
Epoch 7: Train err: 0.8213, Train loss: 2.0512 | Val err: 0.8911, Val loss: 2.1091
Epoch 8: Train err: 0.8745, Train loss: 2.0396 | Val err: 0.8416, Val loss: 2.0593
Epoch 9: Train err: 0.7426, Train loss: 2.0231 | Val err: 0.8515, Val loss: 2.0616
Epoch 10: Train err: 0.7936, Train loss: 1.9919 | Val err: 0.8416, Val loss: 2.1039
Epoch 11

In [None]:
# Training Curve (borrowed from lab 2)
def plot_training_curve(path):
    """ Plots the training curve for a model run, given the csv files
    containing the train/validation error/loss.

    Args:
        path: The base path of the csv files produced during training
    """
    import matplotlib.pyplot as plt
    train_err = np.loadtxt("{}_train_err.csv".format(path))
    val_err = np.loadtxt("{}_val_err.csv".format(path))
    train_loss = np.loadtxt("{}_train_loss.csv".format(path))
    val_loss = np.loadtxt("{}_val_loss.csv".format(path))
    plt.title("Train vs Validation Error")
    n = len(train_err) # number of epochs
    plt.plot(range(1,n+1), train_err, label="Train")
    plt.plot(range(1,n+1), val_err, label="Validation")
    plt.xlabel("Epoch")
    plt.ylabel("Error")
    plt.legend(loc='best')
    plt.show()
    plt.title("Train vs Validation Loss")
    plt.plot(range(1,n+1), train_loss, label="Train")
    plt.plot(range(1,n+1), val_loss, label="Validation")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend(loc='best')
    plt.show()

final_model_path = get_model_name('GlizzyNet', 64, 0.0005, 29)  # adjust epoch number
plot_training_curve(final_model_path)

In [None]:
# final test on testing data after training model
criterion = nn.CrossEntropyLoss()
_, _, test_loader, _ = get_data_loader(target_classes, batch_size)
evaluate_per_class(net, test_loader, le, criterion)