This is the data set loader

In [None]:
#Custom Data Set Loader to deal with multi label data-set

from torch.utils.data import Dataset
from PIL import Image
import pandas as pd
import os
import torch
#Custom Data Set Loader to deal with multi label data-set
class MultiLabelDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        """
        Args:
            CSV_FILE (string): Path to the csv file with annotations.
            ROOT_DIR (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied on an image.
        """
        #Read from the CSV file into a pandas dataframe.
        self.annotations = pd.read_csv(csv_file) 
        self.root_dir = root_dir #Store the directory path where images are stored.
        self.transform = transform #Store the transformation function.

    #number of items in dataset
    def __len__(self): 
        return len(self.annotations)

    #retive an item from the dataset at the specified index ('idx')
    def __getitem__(self, idx): 
        #Construct the path to the image file.
        #image file convert it to RGB format.
        #Extract the labels for the current image from the dataframe and convert them into a PyTorch tensor

        img_path = os.path.join(self.root_dir, self.annotations.iloc[idx, 0])
        image = Image.open(img_path).convert('RGB') 
        labels = torch.tensor(self.annotations.iloc[idx, 1:].values.astype('float32'))

        #if not None apply transformation
        if self.transform:
            image = self.transform(image)

        #tuple containing the transformed image and its corresponding labels.
        return image, labels 




: 

In [34]:
#This is the model 
import torch.nn as nn
import torch.nn.functional as F
import torch
import numpy as np
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
import torchvision.models as models
from torchvision.models import VGG16_Weights



class ImageClassificationBase(nn.Module):
    def training_step(self, batch):
        images, labels = batch
        out = self(images)  # Generate predictions
        loss = F.binary_cross_entropy_with_logits(out, labels)  # Calculate loss
        return loss

    def epoch_end(self, epoch, result):
        print(f'Epoch: {epoch}, Last Result: {result}')


class MusicPosterClassification(ImageClassificationBase):
    def __init__(self, num_classes):
        super(MusicPosterClassification, self).__init__()
        # Assuming we are using VGG16 as a base
        self.vgg16 = models.vgg16(weights=VGG16_Weights.IMAGENET1K_V1)
        for param in self.vgg16.features.parameters():  # Freeze feature layers
            param.requires_grad = False
        
        # Replace the classifier part of the VGG16
        self.vgg16.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),  # First fully connected layer
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),  # Second fully connected layer
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, num_classes)  # Final layer with num_classes outputs
        )

    def forward(self, x):
        x = self.vgg16.features(x)  # Use the feature part of VGG16
        # Flatten the output of the conv layers to feed into the fully connected layer
        x = x.view(x.size(0), -1)
        # Get the output from the classifier
        x = self.vgg16.classifier(x)
        return x
    


This is the actual method where we train the data

In [None]:
#This is the actual taining function
import torch
import os
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import random_split
from torch.utils.data.dataloader import DataLoader
import matplotlib.pyplot as plt
from torchvision.utils import make_grid
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter
from torchvision.transforms import Compose, Resize, ToTensor
from torch.utils.data import DataLoader
import torch.optim as optim
from torchvision.utils import make_grid
from torch.utils.tensorboard import SummaryWriter
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.metrics import roc_auc_score

BATCH_SIZE = 50  # Adjust as needed to avoid remainder error
THRESHOLD = 0.1
NUM_CLASSES = 16
CSV_FILE = 'subset_dataset.csv'
ROOT_DIR = 'dataset4'
MODEL_DIR = "model"
LOGS_DIR = "logs"
CHECKPOINT_DIR = "checkpoints"
NUM_EPOCHS: int = 30
LEARNING_RATE: float = 0.001
DATA_IMAGE_SIZE = (224,224)


@torch.no_grad()
def evaluate(model, val_loader):
    model.eval()
    
    losses = []
    all_preds = []
    all_labels = []
    all_probs = []
    prob = 0
    
    for batch in val_loader:
        images, labels = batch
        output = model(images)
        
        loss = F.binary_cross_entropy_with_logits(output, labels)
        losses.append(loss.item())

        prob = torch.sigmoid(output)
        preds = (prob > THRESHOLD).float() 
        all_preds.append(preds.cpu().numpy())
        all_labels.append(labels.cpu().numpy())
        all_probs.append(prob.cpu().numpy())  # Save the probabilities for ROC AUC

    # Combine losses and compute metrics
    epoch_loss = np.mean(losses)
    all_preds = np.vstack(all_preds)
    all_labels = np.vstack(all_labels)
    all_probs = np.vstack(all_probs)
    precision = precision_score(y_true=all_labels, y_pred=all_preds, average='weighted', zero_division=1)  #we changed to weighted from macro due to class imbalances
    recall = recall_score(y_true=all_labels, y_pred=all_preds, average='weighted', zero_division=1)
    val_score = f1_score(y_true=all_labels, y_pred= all_preds, average='weighted', zero_division=1)
    #roc_auc = roc_auc_score(y_true=all_labels, y_score=all_probs, average='weighted', multi_class='ovr')
    
    output = {'val_loss': epoch_loss, 'precision': precision, 'recall': recall, 'val_score': val_score}
    return output

def fit(epochs, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    writer = SummaryWriter(LOGS_DIR)
    history = []
    optimizer = opt_func(model.parameters())
   
    best_val_loss = float(0)
    patience = 10
    total_images_processed = 0 
    epochs_without_improvement = 0
    
    for epoch in range(epochs):
        model.train()
        train_losses = []
        for bnum, batch in enumerate(train_loader, start=1):
            images, labels = batch  #vectors
            total_images_processed += images.size(0)  # Update the counterS

            loss = model.training_step(batch)
            loss.backward()
            train_losses.append(loss)
            optimizer.step()
            optimizer.zero_grad()

            print(f'Epoch [{epoch}/{epochs}], Batch [{bnum}/{len(train_loader)}], Loss: {loss.item():.4f}, Total Images Processed: {total_images_processed}')


        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()

        #Tensor Board
        writer.add_scalar('Train Loss', result['train_loss'], epoch)
        writer.add_scalar('Validation Loss', result['val_loss'], epoch)
        writer.add_scalar('F1 Score', result['val_score'], epoch)
        writer.add_scalar('Precision', result['precision'], epoch)
        writer.add_scalar('Recall', result['recall'], epoch)

        if result['val_loss'] < best_val_loss:
            best_val_loss = result['val_loss']
            epochs_without_improvement = 0
            torch.save(model.state_dict(), 'best_model2.pth')
        else:
            epochs_without_improvement += 1
            if epochs_without_improvement >= patience:
                print("Early stopping!")
                torch.save(model.state_dict(), 'early_stopped_model2.pth')
                break

        checkpoint_path = os.path.join(CHECKPOINT_DIR, f'checkpoint_epoch_{epoch}.pth')
        torch.save({'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'best_val_loss': best_val_loss}, checkpoint_path)

        model.epoch_end(epoch, result)
        history.append(result)

    writer.close()
    return history 


def TrainModel(model_output_name, train_dataloader, validation_dataloader):
    model: MusicPosterClassification = MusicPosterClassification(NUM_CLASSES)
    evaluation = evaluate(model, validation_dataloader)
    print(f"INFO: model evaluation {evaluation}")

    # history = fit(NUM_EPOCHS, LEARNING_RATE, model, train_dl, val_dl, opt_func=torch.optim.Adam)
    history = fit(NUM_EPOCHS, model, train_dataloader, validation_dataloader, opt_func=torch.optim.Adam)
    print(f"INFO: model history {history}")
    
    os.makedirs(MODEL_DIR, exist_ok=True)
    model_path = os.path.join(MODEL_DIR, model_output_name)
    torch.save(model.state_dict(), model_path)


In [None]:
#This method splits the entire dataset into 10% of it while keeping the same statistical distribution
import pandas as pd
from sklearn.model_selection import train_test_split

def create_stratified_subset(csv_path, subset_size=0.1, random_state=None):
    """
    Creates a stratified subset of a given dataset.

    Parameters:
    - csv_path (str): Path to the full dataset CSV file.
    - subset_size (float): Fraction of the dataset to include in the subset (default 0.1 for 10%).
    - random_state (int or None): Controls the shuffling applied to the data before applying the split. 
                                  Pass an int for reproducible output across multiple function calls.

    Saves the subset to a new CSV file named 'subset_dataset.csv'.
    """
    full_df = pd.read_csv(csv_path)

    # an alternative to extract also feautres if needed
    # features = full_df.iloc[:, 1:-1]  
    # labels = full_df.iloc[:, -1]
    
    # Exclude the filename column  
    labels = full_df.iloc[:, 1:]  
    subset_df, _ = train_test_split(full_df, test_size=1-subset_size, stratify=labels, random_state=random_state)
    subset_df.to_csv('subset_dataset.csv', index=False)

csv_path = 'dataset_encoded.csv'
# current set subset 0.9
create_stratified_subset(csv_path, subset_size=0.1, random_state=45)

In [None]:
# Here we can train the model adjusting different parameters 
dataset = MultiLabelDataset(csv_file=CSV_FILE, ROOT_DIR=ROOT_DIR, transform=transforms.Compose([
    transforms.Resize(DATA_IMAGE_SIZE), transforms.ToTensor()
]))

validation_percent = 0.2 # for example, 10%  
num_val = int(len(dataset) * validation_percent)
num_train = len(dataset) - num_val

# Split dataset into training and validation sets
train_ds, val_ds = random_split(dataset, [num_train, num_val])
# Create DataLoaders for training and validation sets
train_dl = DataLoader(train_ds, BATCH_SIZE, shuffle=True, num_workers=0, pin_memory=True)
val_dl = DataLoader(val_ds, BATCH_SIZE*2, num_workers=0, pin_memory=True)

# TrainModel()
TrainModel("model4.pth", train_dl, val_dl)


#WE TRIED TO RUN IT ON MICROSOFT AZURE AI

: 

This is the preprocessing of the image to        the right size

In [75]:
from torchvision import transforms
from PIL import Image

def preprocess_image(image_path):
    image = Image.open(image_path)
    
    transform = transforms.Compose([
        transforms.Resize(DATA_IMAGE_SIZE),
        transforms.ToTensor()
    ])
    # Add batch dimension
    return transform(image).unsqueeze(0) 

def match_classes_with_confidence(predictions, probabilities, class_names):
    """
    Match the binary predictions of the model to the class names and include confidence levels,
    rounded to 2 decimal places and formatted with a '%' sign.

    Parameters:
    - predictions: A PyTorch tensor of shape (1, num_classes) with binary predictions.
    - probabilities: A PyTorch tensor of shape (1, num_classes) indicating the confidence levels of predictions.
    - class_names: A list of class names corresponding to each position in the tensors.

    Returns:
    - List of tuples with predicted class names and their confidence percentages, for those predicted,
      formatted as 'XX.XX%'.
    """
    # Flatten predictions for easier handling
    predictions = predictions.squeeze()

    # Find indices of predicted classes
    predicted_indices = (predictions > 0).nonzero(as_tuple=False).squeeze().tolist()

    # Ensure predicted_indices is a list of integers
    if type(predicted_indices) is not list:
        predicted_indices = [predicted_indices]  # Make it a list if it's a single number

    # Extract the confidence levels for the predicted classes, round to 2.dp, and add a '%' sign
    predicted_classes_with_confidence = [
        (class_names[i], f"{probabilities[0][i].item() * 100:.2f}%") for i in predicted_indices
    ]

    return sorted(predicted_classes_with_confidence,key=lambda x: x[1], reverse=True)


In [None]:
model_path = "model\model6.pth"
poster_path = "BurningRome.jpg"  # Replace this with the path to your poster
model = MusicPosterClassification(num_classes=16)
model.load_state_dict(torch.load(model_path))
model.eval()

image = preprocess_image(poster_path)

with torch.no_grad():  # No gradients needed
    outputs = model(image)  # Get raw scores from the model
    probabilities = torch.sigmoid(outputs)  # Apply sigmoid to convert scores to probabilities)
    predictions = (probabilities > THRESHOLD).int()  # Apply threshold to get binary predictions


# Define the class names
class_names = [
    "Blues", "Classical", "Country", "Electronic", "Folk", "HipHop",
    "Jazz", "LoFi", "Metal", "Pop", "Punk", "Reggae",
    "Reggaeton", "Rock", "Soul", "Techno"
]

# test loaded model on provided image  
print(match_classes_with_confidence(predictions=predictions, probabilities=probabilities, class_names=class_names))

: 

In [1]:
import tensorflow as tf
import datetime




In [1]:
def show_batch(dl):
    """Plot images grid of single batch"""
    for images, labels in dl:
        fig, ax = plt.subplots(figsize = (16,12))
        ax.set_xticks([])
        ax.set_yticks([])
        ax.imshow(make_grid(images,nrow=10).permute(1,2,0))
        break 

    plt.show()

In [None]:
import torch
from torchviz import make_dot

# Define the model with num_classes=16
model = MusicPosterClassification(num_classes=16)

# Dummy input data for visualization (batch size=1, 3 channels, 224x224 size)
dummy_input = torch.randn(1, 3, 224, 224)

# Forward pass to create the computation graph
output = model(dummy_input)

# Generate the visual diagram
dot = make_dot(output, params=dict(model.named_parameters()))

# Save the diagram as an image (PNG format)
dot.format = 'png'
dot.render('music_poster_classification_diagram')


NameError: name 'MusicPosterClassification' is not defined