In [None]:
import torch
import torchvision.models as models
import torch.nn as nn
from torchsummary import summary

import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from sklearn.model_selection import train_test_split
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt

data_path = "C:/Users/arwin/Documents/dev/APS360-PROJECT/data"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device


In [None]:

class BigModel(nn.Module):
  def  __init__(self):
    super(BigModel, self).__init__()
    # CNN
    resnet18 = models.resnet18(pretrained=True)
    self.features = nn.Sequential(*(list(resnet18.children())[:-2])) # I removed the last two layers of resnet and replaced them with the ones said in the paper
    self.features.add_module("pool", nn.AdaptiveMaxPool2d((1,1)))

    self.fc1_cnn = nn.Linear(512, 256)
    self.dropout_cnn = nn.Dropout(p = 0.5)
    self.batch_norm_cnn = nn.BatchNorm1d(256)
    
    # RNN
    self.bi_gru = nn.GRU(input_size=672, hidden_size = 256, num_layers =1, batch_first=True, bidirectional=True) # I dont know what the size of the input to the bi-gru is supposed to be
    self.fc_gru = nn.Linear(512, 256)
    
    # FC LAYERS
    self.dropout = nn.Dropout(p = 0.5)
    # self.fc = nn.Linear(512, 1)
    self.fc = nn.Linear(512, 10) # becuase 10 genre classes
    # self.sigmoid = nn.Sigmoid()

  def forward(self, x):
    # CNN
    # print(f"Shape before ResNet features: {x.shape}")  # Debug statement 1
    x1 = self.features(x)
    # print(f"Shape after ResNet features: {x1.shape}")  # Debug statement 1
    x1 = torch.flatten(x1, 1)
    # print(f"Shape after flattening: {x1.shape}")  # Debug statement 2
    x1 = self.fc1_cnn(x1)
    x1 = self.dropout_cnn(x1)
    x1 = self.batch_norm_cnn(x1)
    
    # Bi-GRU
    batch_size, channels, height, width = x.shape
    x2 = x.view(batch_size, height, -1)
    # print(f"Shape before Bi-GRU: {x2.shape}")  # Debug statement 4
    x2, _ = self.bi_gru(x2)
    x2 = torch.cat((x2[:, -1, :256], x2[:, 0, 256:]), dim=1)
    x2 = self.fc_gru(x2)

    # Concat Outputs of each model
    x = torch.cat((x1, x2), -1)
    x = self.dropout(x)
    x = self.fc(x)
    # x = self.sigmoid(x)
    return x

In [None]:
def get_model_name(batch_size, learning_rate, epoch, weight_decay):
    """ Generate a name for the model consisting of all the hyperparameter values

    Args:
        config: Configuration object containing the hyperparameters
    Returns:
        path: A string with the hyperparameter name and value concatenated
    """
    path = "transfermodel_bs{0}_lr{1}_epoch{2}_wd{3}".format(
                                                   batch_size,
                                                   learning_rate,
                                                   epoch,
                                                   weight_decay)
    return path

In [None]:
def modify_model_for_binary_classification(model):
    # Freeze feature layers
    for param in model.features.parameters():
        param.requires_grad = False
    for param in model.bi_gru.parameters():
        param.requires_grad = False
    # for param in model.fc1_cnn.parameters():
    #     param.requires_grad = False
    # for param in model.fc_gru.parameters():
    #     param.requires_grad = False
    # for param in model.batch_norm_cnn.parameters():
    #     param.requires_grad = False
    
    # Remove and add new FC layers for binary classification
    # Adjust the input features of the first linear layer according to your new architecture
    model.fc = nn.Sequential(
        nn.Linear(512, 256),  # Adjust the input size if needed
        nn.ReLU(),
        nn.Dropout(p=0.5),
        nn.Linear(256, 1),
        nn.Sigmoid()
    )

    return model

In [None]:
class CustomImageDataset(Dataset):
    def __init__(self, img_dirs, transform=None):
        """
        Args:
            img_dirs (dict): Dictionary with keys 'hit' and 'miss' and their respective image directory paths.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.img_labels = []
        self.img_files = []
        for label, dir_path in img_dirs.items():
            for file in os.listdir(dir_path):
                if file.endswith('.png'):
                    self.img_files.append(os.path.join(dir_path, file))
                    self.img_labels.append(1 if label=='hit' else 0)
        self.transform = transform

    def __len__(self):
        return len(self.img_files)

    def __getitem__(self, idx):
        img_path = self.img_files[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.img_labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, torch.tensor(label, dtype=torch.float) 


In [None]:
transform = transforms.Compose([
    # transforms.Resize((224, 224)),  
    transforms.ToTensor(),
])
img_dirs = {
    'hit': data_path + '/billboard_spec',
    'miss': data_path + '/random_spec'
}

# Initialize your dataset
dataset = CustomImageDataset(img_dirs=img_dirs, transform=transform)


In [None]:
train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size

train_dataset, val_test_dataset = torch.utils.data.random_split(dataset, [train_size, val_size + test_size])
val_dataset, test_dataset = torch.utils.data.random_split(val_test_dataset, [val_size, test_size])


In [None]:
model = BigModel()
path_to_saved_model = 'C:/Users/arwin/Documents/dev/APS360-PROJECT/genre_classifier/genre_ensemble_model_batch256_lr0.001_weightdecay3e-05.pth' 
model.load_state_dict(torch.load(path_to_saved_model))
model = modify_model_for_binary_classification(model)
model = model.to(device)

batch_size = 128
learning_rate = 0.0005
num_epochs = 75
weight_decay = 0
save = True

test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [None]:
model.eval()
test_running_loss = 0.0
test_running_corrects = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device).long()  # Adjust for label dimensions

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        acc = get_accuracy(outputs, labels)

        test_running_loss += loss.item() * inputs.size(0)
        test_running_corrects += acc.item() * inputs.size(0)

test_loss = test_running_loss / len(test_loader.dataset)
test_acc = test_running_corrects / len(test_loader.dataset)

In [None]:
test_acc