In [3]:
# MA416 Final Project Sprint 1 CNN
# Jacob Richardson, Kevin Cotellesso
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt


In [4]:
# -------------- NETWORK SETTINGS -------------- #
# --- Device setup ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pin_memory = False

# --- Hyperparameters ---
num_epochs = 10
batch_size = 32
learning_rate = 0.001

# --- Data transformations ---
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
]) 
 
# --- Datasets and DataLoaders ---
dataset_root = "../Datasets/SmellySongs23k"


In [None]:



# --- Load dataset ---
full_dataset = datasets.ImageFolder(root=dataset_root, transform=transform)

dataset_size = len(full_dataset)
val_size = int(0.2 * dataset_size)
train_size = dataset_size - val_size

torch.manual_seed(42)
train_data, val_data = torch.utils.data.random_split(full_dataset, [train_size, val_size])

class_names = full_dataset.classes


train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=pin_memory)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=pin_memory)

print(f"Loaded {dataset_size} images from {dataset_root} ({len(class_names)} classes). Train/Val = {train_size}/{val_size}")

# --- Load pretrained VGG16 ---
model = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)

# --- Freeze convolutional base ---
for param in model.features.parameters():
    param.requires_grad = False

# --- Replace the classifier (for 2 output classes) ---
num_features = model.classifier[6].in_features
model.classifier[6] = nn.Linear(num_features, 2)

model = model.to(device)

# --- Loss and optimizer ---
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.classifier.parameters(), lr=0.0001)

FileNotFoundError: [WinError 3] The system cannot find the path specified: 'data/train'

In [None]:
# --- Training loop ---
epochs = 5
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct, total = 0, 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, preds = torch.max(outputs, 1)
        correct += torch.sum(preds == labels)
        total += labels.size(0)

    train_acc = correct.double() / total
    print(f"Epoch {epoch+1}/{epochs} | Loss: {running_loss/len(train_loader.dataset):.4f} | Train Acc: {train_acc:.4f}")

# --- Save the trained model ---
torch.save(model.state_dict(), "vgg16_spectrogram_ai_classifier.pth")

print("Training complete and model saved")


In [None]:
from PIL import Image
import torch.nn.functional as F

# Load model
model = models.vgg16(weights=None)
num_features = model.classifier[6].in_features
model.classifier[6] = nn.Linear(num_features, 2)
model.load_state_dict(torch.load("vgg16_spectrogram_ai_classifier.pth"))
model.eval()

# Load image
img_path = "your_test_spectrogram.png"
img = Image.open(img_path).convert("RGB")
img_t = transform(img).unsqueeze(0)

# Predict
with torch.no_grad():
    outputs = model(img_t)
    probs = F.softmax(outputs, dim=1)
    pred_class = torch.argmax(probs, dim=1).item()

labels = ["human", "ai"]
print(f"Prediction: {labels[pred_class]} (confidence: {probs[0][pred_class]:.2f})")


In [None]:
# If spectrograms are grayscale, use: transforms.Grayscale(num_output_channels=3)