In [1]:
import xml.etree.ElementTree as ET
from PIL import Image
import numpy as np
import torch
import matplotlib.pyplot as plt
import os
from Models import CustomResNet34
from sklearn.metrics import classification_report, log_loss, accuracy_score,  confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Subset
import torch.nn as nn
import torch.optim as optim
from Models import CustomResNet34, CustomResNet18, CustomResNet50
import seaborn as sns
import shutil
import pandas as pd
import time

This here function parses the XML file annotation for a given path to the XML

The file are structures like so; For every object:
- name tag for class name
- colour tag for colour name(not used)
- bounding box tag for the x and y min and max coordinates of the bounding box

This function returns a list of objects containing class names and bounding box coordinates

In [2]:
def parse_xml(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    
    objects = []
    
    for obj in root.findall('object'):
        name = obj.find('name').text
        bndbox = obj.find('bndbox')
        xmin = int(bndbox.find('xmin').text)
        ymin = int(bndbox.find('ymin').text)
        xmax = int(bndbox.find('xmax').text)
        ymax = int(bndbox.find('ymax').text)
        
        object_dict = {
            'name': name,
            'xmin': xmin,
            'ymin': ymin,
            'xmax': xmax,
            'ymax': ymax
        }
        objects.append(object_dict)
    
    return objects

Using the afore mentioned list, it will go through each item and crop the big image with the provided bounding boxes

This function will return another list of objects, with the name of the class and the np array reprezentation of the image

In [3]:
# Function to crop image based on coordinates and return a list of dictionaries with names and cropped images
def crop_image(image_path, objects):
    image = Image.open(image_path)
    cropped_images = []
    
    for obj in objects:
        name = obj['name']
        xmin = obj['xmin']
        ymin = obj['ymin']
        xmax = obj['xmax']
        ymax = obj['ymax']
        
        cropped_image = image.crop((xmin, ymin, xmax, ymax))
        cropped_image_np = np.array(cropped_image)
        
        cropped_images.append({
            'name': name,
            'image': cropped_image_np
        })
    
    return cropped_images

Transform it into a tensor

In [4]:
# Function to preprocess image for the model
def preprocess_image(image):
    preprocess = transforms.Compose([
        transforms.Grayscale(num_output_channels = 3),
        transforms.Resize((224, 224)),  # Resize to the required input size of your model
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    image = Image.fromarray(image)
    image = preprocess(image).unsqueeze(0)  # Add batch dimension
    return image

Load the model

In [5]:
# Function to load the model
def load_model(model_path):
    model = torch.load(model_path)
    model.eval()  # Set model to evaluation mode
    return model

In [6]:
# Function to create a directory
def create_directory(path, folder_name):
    directory = os.path.join(path, folder_name)
    if not os.path.exists(directory):
        os.makedirs(directory)
    return directory

# Function to save metrics in a text file
def save_metrics_to_file(metrics, directory):
    text_path = os.path.join(directory, 'metrics.txt')
    with open(text_path, 'w') as f:
        for metric, value in metrics.items():
            f.write(f"{metric}: {value:.4f}\n")

# Function to save confusion matrix as an image
def save_confusion_matrix_image(cm, classes, directory):
    plt.figure(figsize=(20, 20))
    sns.heatmap(cm, annot=False, cmap='Blues', xticklabels=classes, yticklabels=classes)
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.title('Confusion Matrix')
    
    image_path = os.path.join(directory, 'confusion_matrix.png')

    # Convert confusion matrix to a DataFrame
    df_confusion_matrix = pd.DataFrame(cm)

    csv_path = os.path.join(directory, "confusion_matrix.csv")

    # Save DataFrame to a CSV file
    df_confusion_matrix.to_csv(csv_path, index=False)

    
    plt.savefig(image_path)
    plt.close()


# Main script

def save_metrics(path, folder_name, y_true, y_pred, time=0, worst=0):
    # Create directory
    directory = create_directory(path, folder_name)

    # Convert lists or arrays to numpy arrays if not already
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    classes = [str(i) for i in range(200)]  # List of class labels
    save_confusion_matrix_image(cm, classes, directory)
    
    # Compute metrics
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='macro')
    recall = recall_score(y_true, y_pred, average='macro')
    f1 = f1_score(y_true, y_pred, average='macro')
    
    metrics = {
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1,
        'time': time,
        'worst': worst
    }

    # Save metrics to file
    save_metrics_to_file(metrics, directory)

In [7]:
XML_path = "/home/gras/Documents/University/ComputerVision/BigDataset/annotations/"
img_path = "/home/gras/Documents/University/ComputerVision/BigDataset/images/"
model_path = "/home/gras/Documents/University/ComputerVision/resnet34-occlusion_1.pth"

model = load_model(model_path)

classes = os.listdir("/home/gras/Documents/University/ComputerVision/archive/64/")

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

overall_true = []
overall_pred = []

worst = 100

# Start the timer
start_time = time.time()

for index in range(200): # range(0, len(os.listdir(XML_path))):
    XML_file = str(index)+".xml"
    img_file = str(index)+".png"
    
    XML_objs = parse_xml(os.path.join(XML_path, XML_file))

    croped_images = crop_image(os.path.join(img_path, img_file), XML_objs)

    y_pred = []
    for img in croped_images:
        with torch.no_grad():
            tensor = preprocess_image(img['image']).to(device)
            output = model(tensor)
            _, predicted = torch.max(output, 1)

            y_pred.append(predicted.item())

    pred_names = [classes[i] for i in y_pred]
    true_names = [obj["name"] for obj in croped_images]

    f1 = f1_score(true_names, pred_names, average='macro')
    worst = min(f1, worst)

    overall_pred.append(pred_names)
    overall_true.append(true_names)
    
    save_metrics('./', 'resnet34_occlusion_bigdataset_'+str(index), true_names, pred_names)

save_metrics('./', 'resnet34_occlusion_overall', np.array(overall_true).flatten(), np.array(overall_pred).flatten(), (time.time() - start_time), worst)


  return F.conv2d(input, weight, bias, self.stride,
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))