In [1]:
!pip install onnxruntime-gpu torchvision timm matplotlib seaborn psutil

Collecting onnxruntime-gpu
  Downloading onnxruntime_gpu-1.20.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting coloredlogs (from onnxruntime-gpu)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime-gpu)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
Downloading onnxruntime_gpu-1.20.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (291.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m291.5/291.5 MB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hDownloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.8/86.8 kB[0m [31m6.6 MB/s[0m eta [36m0:

In [None]:
import os
import time
import torch
import numpy as np
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
from PIL import Image
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import matplotlib.pyplot as plt
import psutil
import onnxruntime as ort
from onnxruntime.quantization import quantize_dynamic, QuantType
from torchvision.models import efficientnet_b0

class DefectDataset(Dataset):
    def __init__(self, folder, transform=None):
        self.folder = folder
        self.transform = transform
        self.images = [f for f in os.listdir(folder) if f.endswith('.png') and not f.endswith('_GT.png')]

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = os.path.join(self.folder, self.images[idx])
        gt_name = os.path.join(self.folder, self.images[idx].replace('.png', '_GT.png'))
        image = Image.open(img_name).convert('RGB')
        label = 0  # label is defaulting to 0 (no defect)
        if os.path.exists(gt_name):
            label_image = plt.imread(gt_name)
            label = int(np.max(label_image) > 0)  # ensuring that the label is binary
        if self.transform:
            image = self.transform(image)
        return image, label

# utility function for metrics
def calculate_metrics(true_labels, pred_labels):
    true_labels = np.array(true_labels).astype(int)
    pred_labels = np.array(pred_labels).astype(int)

    if len(np.unique(true_labels)) > 2 or len(np.unique(pred_labels)) > 2:
        raise ValueError("Labels must be binary (0 or 1). Found more than two classes.")

    accuracy = accuracy_score(true_labels, pred_labels)
    precision = precision_score(true_labels, pred_labels, zero_division=1)
    recall = recall_score(true_labels, pred_labels, zero_division=1)
    f1 = f1_score(true_labels, pred_labels, zero_division=1)
    cm = confusion_matrix(true_labels, pred_labels)
    return accuracy, precision, recall, f1, cm

def pytorch_inference(model_path, test_folder, device):
    image_size = (224, 224)
    test_transform = transforms.Compose([
        transforms.Resize(image_size),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ])
    test_dataset = DefectDataset(test_folder, transform=test_transform)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    model = efficientnet_b0(weights=None)
    num_classes = 1
    model.classifier[1] = torch.nn.Linear(model.classifier[1].in_features, num_classes)
    state_dict = torch.load(model_path, map_location=device)
    model.load_state_dict(state_dict)
    model = model.to(device)
    model.eval()

    true_labels, pred_labels = [], []
    start_time = time.time()
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            preds = torch.sigmoid(outputs).cpu().numpy().round().astype(int)
            true_labels.extend(labels.cpu().numpy())
            pred_labels.extend(preds.flatten())
    end_time = time.time()

    metrics = calculate_metrics(true_labels, pred_labels)
    memory_usage = psutil.virtual_memory().used / (1024 ** 3)
    return metrics, end_time - start_time, memory_usage

def export_to_onnx(pytorch_model_path, device):
    model = efficientnet_b0(weights=None)
    num_classes = 1
    model.classifier[1] = torch.nn.Linear(model.classifier[1].in_features, num_classes)
    state_dict = torch.load(pytorch_model_path, map_location=device)
    model.load_state_dict(state_dict)
    model = model.to(device)
    model.eval()

    onnx_model_path = input("Enter the path to save the ONNX model (including filename, e.g., /kaggle/working/model.onnx): ")
    if not onnx_model_path.endswith(".onnx"):
        onnx_model_path += ".onnx"

    dummy_input = torch.randn(1, 3, 224, 224, device=device)
    torch.onnx.export(
        model,
        dummy_input,
        onnx_model_path,
        opset_version=11,
        input_names=["input"],
        output_names=["output"],
        dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}}
    )

    print(f"ONNX model exported to {onnx_model_path}")
    return onnx_model_path

def quantized_onnx_inference(onnx_model_path, test_folder):
    image_size = (224, 224)
    test_transform = transforms.Compose([
        transforms.Resize(image_size),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ])
    test_dataset = DefectDataset(test_folder, transform=test_transform)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    quantized_model_path = input("Enter the path to save the quantized ONNX model (including filename, e.g., /kaggle/working/quantized_model.onnx): ")
    quantize_dynamic(onnx_model_path, quantized_model_path, weight_type=QuantType.QUInt8)
    print(f"Quantized ONNX model saved to {quantized_model_path}")

    ort_session = ort.InferenceSession(quantized_model_path, providers=["CUDAExecutionProvider"])

    true_labels, pred_labels = [], []
    start_time = time.time()
    for inputs, labels in test_loader:
        inputs = inputs.numpy()
        ort_inputs = {ort_session.get_inputs()[0].name: inputs}
        ort_outs = ort_session.run(None, ort_inputs)

        # applying sigmoid activation to ensure binary predictions
        sigmoid_outputs = 1 / (1 + np.exp(-ort_outs[0]))
        preds = np.round(sigmoid_outputs).astype(int)
        
        true_labels.extend(labels.numpy())
        pred_labels.extend(preds.flatten())
    end_time = time.time()

    metrics = calculate_metrics(true_labels, pred_labels)
    memory_usage = psutil.virtual_memory().used / (1024 ** 3)
    return metrics, end_time - start_time, memory_usage

if __name__ == "__main__":
    pytorch_model_path = input("Enter the PyTorch model path: ")
    test_folder = input("Enter the test data folder path: ")

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    pytorch_metrics, pytorch_time, pytorch_memory = pytorch_inference(pytorch_model_path, test_folder, device)
    onnx_model_path = export_to_onnx(pytorch_model_path, device)
    quantized_metrics, quantized_time, quantized_memory = quantized_onnx_inference(onnx_model_path, test_folder)

    print("\nPyTorch Inference Results:")
    print(f"Accuracy: {pytorch_metrics[0]:.4f}, Precision: {pytorch_metrics[1]:.4f}, Recall: {pytorch_metrics[2]:.4f}, F1 Score: {pytorch_metrics[3]:.4f}")
    print(f"Time Taken: {pytorch_time:.2f}s, Memory Usage: {pytorch_memory:.2f}GB")

    print("\nQuantized ONNX Inference Results:")
    print(f"Accuracy: {quantized_metrics[0]:.4f}, Precision: {quantized_metrics[1]:.4f}, Recall: {quantized_metrics[2]:.4f}, F1 Score: {quantized_metrics[3]:.4f}")
    print(f"Time Taken: {quantized_time:.2f}s, Memory Usage: {quantized_memory:.2f}GB")


Enter the PyTorch model path:  /kaggle/input/imagesurfacedefectclassification/pytorch/efficientnet-b0-defect-classification-v1/1/fine_tuned_efficientnet.pth
Enter the test data folder path:  /kaggle/input/testdataimageclassificationsurfacedefect/test


  state_dict = torch.load(model_path, map_location=device)
  state_dict = torch.load(pytorch_model_path, map_location=device)


Enter the path to save the ONNX model (including filename, e.g., /kaggle/working/model.onnx):  /kaggle/working/Working_Model.onnx


ONNX model exported to /kaggle/working/Working_Model.onnx


Enter the path to save the quantized ONNX model (including filename, e.g., /kaggle/working/quantized_model.onnx):  /kaggle/working/Quantized_Working_Model.onnx


Quantized ONNX model saved to /kaggle/working/Quantized_Working_Model.onnx


[0;93m2024-11-28 12:39:16.763371063 [W:onnxruntime:, transformer_memcpy.cc:74 ApplyImpl] 245 Memcpy nodes are added to the graph main_graph for CUDAExecutionProvider. It might have negative impact on performance (including unable to run CUDA graph). Set session_options.log_severity_level=1 to see the detail logs before this message.[m
[0;93m2024-11-28 12:39:16.769833843 [W:onnxruntime:, session_state.cc:1168 VerifyEachNodeIsAssignedToAnEp] Some nodes were not assigned to the preferred execution providers which may or may not have an negative impact on performance. e.g. ORT explicitly assigns shape related ops to CPU to improve perf.[m
[0;93m2024-11-28 12:39:16.769851776 [W:onnxruntime:, session_state.cc:1170 VerifyEachNodeIsAssignedToAnEp] Rerunning with verbose output on a non-minimal build will show node assignments.[m
  sigmoid_outputs = 1 / (1 + np.exp(-ort_outs[0]))



PyTorch Inference Results:
Accuracy: 0.9671, Precision: 0.9873, Recall: 0.7091, F1 Score: 0.8254
Time Taken: 11.33s, Memory Usage: 2.79GB

Quantized ONNX Inference Results:
Accuracy: 0.7570, Precision: 0.1650, Recall: 0.3000, F1 Score: 0.2129
Time Taken: 60.62s, Memory Usage: 3.01GB


In [None]:
import os
import time
import torch
import numpy as np
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
from PIL import Image
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import matplotlib.pyplot as plt
import psutil
import onnx
import onnxruntime as ort
from onnxruntime.transformers import optimizer
from torchvision.models import efficientnet_b0

class DefectDataset(Dataset):
    def __init__(self, folder, transform=None):
        self.folder = folder
        self.transform = transform
        self.images = [f for f in os.listdir(folder) if f.endswith('.png') and not f.endswith('_GT.png')]

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = os.path.join(self.folder, self.images[idx])
        gt_name = os.path.join(self.folder, self.images[idx].replace('.png', '_GT.png'))
        image = Image.open(img_name).convert('RGB')
        label = 0  # label is defaulting to 0
        if os.path.exists(gt_name):
            label_image = plt.imread(gt_name)
            label = int(np.max(label_image) > 0)  # ensuring that the label is binary
        if self.transform:
            image = self.transform(image)
        return image, label

#utility function for calculating metrics
def calculate_metrics(true_labels, pred_labels):
    true_labels = np.array(true_labels).astype(int)
    pred_labels = np.array(pred_labels).astype(int)

    if len(np.unique(true_labels)) > 2 or len(np.unique(pred_labels)) > 2:
        raise ValueError("Labels must be binary (0 or 1). Found more than two classes.")

    accuracy = accuracy_score(true_labels, pred_labels)
    precision = precision_score(true_labels, pred_labels, zero_division=1)
    recall = recall_score(true_labels, pred_labels, zero_division=1)
    f1 = f1_score(true_labels, pred_labels, zero_division=1)
    cm = confusion_matrix(true_labels, pred_labels)
    return accuracy, precision, recall, f1, cm

def pytorch_inference(model_path, test_folder, device):
    image_size = (224, 224)
    test_transform = transforms.Compose([
        transforms.Resize(image_size),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ])
    test_dataset = DefectDataset(test_folder, transform=test_transform)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    model = efficientnet_b0(weights=None)
    num_classes = 1
    model.classifier[1] = torch.nn.Linear(model.classifier[1].in_features, num_classes)
    state_dict = torch.load(model_path, map_location=device)
    model.load_state_dict(state_dict)
    model = model.to(device)
    model.eval()

    true_labels, pred_labels = [], []
    start_time = time.time()
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            preds = torch.sigmoid(outputs).cpu().numpy().round().astype(int)
            true_labels.extend(labels.cpu().numpy())
            pred_labels.extend(preds.flatten())
    end_time = time.time()

    metrics = calculate_metrics(true_labels, pred_labels)
    memory_usage = psutil.virtual_memory().used / (1024 ** 3)
    return metrics, end_time - start_time, memory_usage

def export_to_onnx(pytorch_model_path, device):
    model = efficientnet_b0(weights=None)
    num_classes = 1
    model.classifier[1] = torch.nn.Linear(model.classifier[1].in_features, num_classes)
    state_dict = torch.load(pytorch_model_path, map_location=device)
    model.load_state_dict(state_dict)
    model = model.to(device)
    model.eval()

    onnx_model_path = input("Enter the path to save the ONNX model (including filename, e.g., /kaggle/working/model.onnx): ")
    if not onnx_model_path.endswith(".onnx"):
        onnx_model_path += ".onnx"

    dummy_input = torch.randn(1, 3, 224, 224, device=device) #ensure size is 224 by 224 pixels for smooth inference
    torch.onnx.export(
        model,
        dummy_input,
        onnx_model_path,
        opset_version=11,
        input_names=["input"],
        output_names=["output"],
        dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}}
    )

    print(f"ONNX model exported to {onnx_model_path}")
    return onnx_model_path

def optimized_onnx_inference(onnx_model_path, test_folder):
    image_size = (224, 224)
    test_transform = transforms.Compose([
        transforms.Resize(image_size),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ])
    test_dataset = DefectDataset(test_folder, transform=test_transform)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    # optimizing ONNX model using GraphOptimizationLevel, enabling all of them
    optimized_model_path = onnx_model_path.replace(".onnx", "_optimized.onnx")
    session_options = ort.SessionOptions()
    session_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL

    # setting ONNX Runtime session with CUDA
    ort_session = ort.InferenceSession(onnx_model_path, session_options, providers=["CUDAExecutionProvider"])

    true_labels, pred_labels = [], []
    start_time = time.time()
    for inputs, labels in test_loader:
        inputs = inputs.numpy()
        ort_inputs = {ort_session.get_inputs()[0].name: inputs}
        ort_outs = ort_session.run(None, ort_inputs)

        # applying sigmoid activation to ensure binary predictions
        sigmoid_outputs = 1 / (1 + np.exp(-ort_outs[0]))
        preds = np.round(sigmoid_outputs).astype(int)
        
        true_labels.extend(labels.numpy())
        pred_labels.extend(preds.flatten())
    end_time = time.time()

    metrics = calculate_metrics(true_labels, pred_labels)
    memory_usage = psutil.virtual_memory().used / (1024 ** 3)
    return metrics, end_time - start_time, memory_usage

if __name__ == "__main__":
    pytorch_model_path = input("Enter the PyTorch model path: ")
    test_folder = input("Enter the test data folder path: ")

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    pytorch_metrics, pytorch_time, pytorch_memory = pytorch_inference(pytorch_model_path, test_folder, device)
    onnx_model_path = export_to_onnx(pytorch_model_path, device)
    optimized_metrics, optimized_time, optimized_memory = optimized_onnx_inference(onnx_model_path, test_folder)

    print("\nPyTorch Inference Results:")
    print(f"Accuracy: {pytorch_metrics[0]:.4f}, Precision: {pytorch_metrics[1]:.4f}, Recall: {pytorch_metrics[2]:.4f}, F1 Score: {pytorch_metrics[3]:.4f}")
    print(f"Time Taken: {pytorch_time:.2f}s, Memory Usage: {pytorch_memory:.2f}GB")

    print("\nOptimized ONNX Inference Results:")
    print(f"Accuracy: {optimized_metrics[0]:.4f}, Precision: {optimized_metrics[1]:.4f}, Recall: {optimized_metrics[2]:.4f}, F1 Score: {optimized_metrics[3]:.4f}")
    print(f"Time Taken: {optimized_time:.2f}s, Memory Usage: {optimized_memory:.2f}GB")


Enter the PyTorch model path:  /kaggle/input/imagesurfacedefectclassification/pytorch/efficientnet-b0-defect-classification-v1/1/fine_tuned_efficientnet.pth
Enter the test data folder path:  /kaggle/input/testdataimageclassificationsurfacedefect/test


  state_dict = torch.load(model_path, map_location=device)
  state_dict = torch.load(pytorch_model_path, map_location=device)


Enter the path to save the ONNX model (including filename, e.g., /kaggle/working/model.onnx):  /kaggle/working/Fine-Tuned_EffNet.onnx


ONNX model exported to /kaggle/working/Fine-Tuned_EffNet.onnx

PyTorch Inference Results:
Accuracy: 0.9671, Precision: 0.9873, Recall: 0.7091, F1 Score: 0.8254
Time Taken: 10.06s, Memory Usage: 2.81GB

Optimized ONNX Inference Results:
Accuracy: 0.9671, Precision: 0.9873, Recall: 0.7091, F1 Score: 0.8254
Time Taken: 10.17s, Memory Usage: 2.82GB


Here we see there is not much difference between the time and memory usage of Optimized ONNX model compared to Pytorch. And the results are the same.