### Answer 1:
Train a deep learning model which would classify the vegetables based on the
images provided. The dataset can be accessed from the given link.
Link-
https://www.kaggle.com/datasets/misrakahmed/vegetable-image-dataset


Note -
1. Use PyTorch as the framework for training model
2. Use Distributed Parallel Training technique to optimize training time.
3. Achieve an accuracy of at least 85% on the validation dataset.
4. Use albumentations library for image transformation
5. Use TensorBoard logging for visualizing training performance
6. Use custom modular Python scripts to train model


In [None]:
!pip install kaggle

import os

os.environ['KAGGLE_USERNAME'] = 
os.environ['KAGGLE_KEY'] = 

!kaggle datasets download -d misrakahmed/vegetable-image-dataset

!unzip vegetable-image-dataset.zip

In [None]:
!pip install albumentations
!pip install tensorboard

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torchvision.models import resnet18
from torch.utils.tensorboard import SummaryWriter

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define data transformations
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

val_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Define dataset paths
train_dataset_path = "/content/Vegetable Images/train"
val_dataset_path = "/content/Vegetable Images/validation"

# Create train and validation datasets
train_dataset = ImageFolder(train_dataset_path, transform=train_transforms)
val_dataset = ImageFolder(val_dataset_path, transform=val_transforms)

# Set up data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4)

# Check the number of classes in the dataset
num_classes = len(train_dataset.classes)

# Load the pre-trained ResNet-18 model
model = resnet18(pretrained=True)
num_ftrs = model.fc.in_features

# Replace the last fully connected layer
model.fc = nn.Linear(num_ftrs, num_classes)
model = model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Set up TensorBoard logging
writer = SummaryWriter()

# Training loop
num_epochs = 10
total_steps = len(train_loader)
for epoch in range(num_epochs):
    model.train()
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Track training loss
        writer.add_scalar("Training Loss", loss.item(), epoch * total_steps + i)

        if (i + 1) % 100 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{total_steps}], Loss: {loss.item():.4f}")

    model.eval()
    correct_per_class = [0] * num_classes
    total_per_class = [0] * num_classes
    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total_per_class = [total_per_class[i] + labels.eq(i).sum().item() for i in range(num_classes)]
            correct_per_class = [correct_per_class[i] + predicted.eq(labels).mul(labels.eq(i)).sum().item()
                                 for i in range(num_classes)]

    print("Validation Accuracy:")
    class_names = train_dataset.classes
    for i, class_name in enumerate(class_names):
        accuracy = 100 * correct_per_class[i] / total_per_class[i]
        print(f"{class_name}: {accuracy:.2f}%")

    # Track validation accuracy
    writer.add_scalar("Validation Accuracy", val_accuracy, epoch)

# Save the trained model
torch.save(model.state_dict(), "vegetable_classification_model.pt")

# Close the TensorBoard writer
writer.close()

Epoch [1/10], Step [100/235], Loss: 0.1740
Epoch [1/10], Step [200/235], Loss: 0.0582
Validation Accuracy:
Bean: 98.00%
Bitter_Gourd: 100.00%
Bottle_Gourd: 100.00%
Brinjal: 95.50%
Broccoli: 98.00%
Cabbage: 100.00%
Capsicum: 97.50%
Carrot: 100.00%
Cauliflower: 99.50%
Cucumber: 93.50%
Papaya: 96.00%
Potato: 91.00%
Pumpkin: 99.00%
Radish: 99.50%
Tomato: 99.00%
Epoch [2/10], Step [100/235], Loss: 0.0815
Epoch [2/10], Step [200/235], Loss: 0.0342
Validation Accuracy:
Bean: 97.00%
Bitter_Gourd: 100.00%
Bottle_Gourd: 99.50%
Brinjal: 90.00%
Broccoli: 98.50%
Cabbage: 96.50%
Capsicum: 95.00%
Carrot: 100.00%
Cauliflower: 99.50%
Cucumber: 91.00%
Papaya: 96.50%
Potato: 100.00%
Pumpkin: 98.00%
Radish: 99.50%
Tomato: 97.00%
Epoch [3/10], Step [100/235], Loss: 0.0029
Epoch [3/10], Step [200/235], Loss: 0.0569
Validation Accuracy:
Bean: 99.50%
Bitter_Gourd: 100.00%
Bottle_Gourd: 99.50%
Brinjal: 91.50%
Broccoli: 99.00%
Cabbage: 99.50%
Capsicum: 100.00%
Carrot: 95.00%
Cauliflower: 100.00%
Cucumber: 92.00

In [None]:
import torch
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torchvision.models import resnet18

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define data transformation
test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Define dataset path
test_dataset_path = "/content/Vegetable Images/test"

# Load the test dataset
test_dataset = ImageFolder(test_dataset_path, transform=test_transforms)

# Set up data loader
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False)

# Load the trained model
model = resnet18(pretrained=False)
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, len(test_dataset.classes))
model.load_state_dict(torch.load("vegetable_classification_model.pt"))
model = model.to(device)
model.eval()

# Classify vegetable images
class_names = test_dataset.classes
with torch.no_grad():
    for images, _ in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        class_name = class_names[predicted.item()]
        print(f"Predicted class: {class_name}")




Predicted class: Bean
Predicted class: Bean
Predicted class: Bean
Predicted class: Bean
Predicted class: Bean
Predicted class: Bean
Predicted class: Bean
Predicted class: Bean
Predicted class: Bean
Predicted class: Bean
Predicted class: Bean
Predicted class: Capsicum
Predicted class: Broccoli
Predicted class: Bean
Predicted class: Bean
Predicted class: Bean
Predicted class: Bean
Predicted class: Bean
Predicted class: Broccoli
Predicted class: Bean
Predicted class: Broccoli
Predicted class: Broccoli
Predicted class: Bean
Predicted class: Bean
Predicted class: Bean
Predicted class: Bean
Predicted class: Bean
Predicted class: Bean
Predicted class: Bean
Predicted class: Bean
Predicted class: Bean
Predicted class: Broccoli
Predicted class: Bean
Predicted class: Bean
Predicted class: Bean
Predicted class: Bean
Predicted class: Cucumber
Predicted class: Cucumber
Predicted class: Broccoli
Predicted class: Bean
Predicted class: Bean
Predicted class: Bean
Predicted class: Bean
Predicted class: B


### Answer 2:
From Question 1, you would get a trained model which would classify the
vegetables based on the classes. You need to convert the trained model to ONNX
format and achieve faster inference
Note -
1. There is no set inference time, but try to achieve as low an inference time as
possible
2. Create a web app to interact with the model, where the user can upload the
image and get predictions
3. Try to reduce the model size considerably so that inference time can be faster
4. Use modular Python scripts to train and infer the model


In [None]:
pip install onnx

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting onnx
  Downloading onnx-1.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (14.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.6/14.6 MB[0m [31m96.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: onnx
Successfully installed onnx-1.14.0


In [None]:
import torch
import torch.onnx as onnx
from torchvision.models import resnet18


# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# Load the trained model
model = resnet18(pretrained=False)
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, num_classes)
model.load_state_dict(torch.load("vegetable_classification_model.pt"))
model = model.to(device)
model.eval()


# Export the model to ONNX format
dummy_input = torch.randn(1, 3, 224, 224).to(device)
onnx_path = "vegetable_classification_model.onnx"
torch.onnx.export(model, dummy_input, onnx_path)

print("Model exported to ONNX format successfully.")

verbose: False, log level: Level.ERROR

Model exported to ONNX format successfully.


app.py

In [None]:
pip install onnxruntime

In [None]:
import os
from flask import Flask, request, render_template
from PIL import Image
import torch
from torchvision import transforms
import onnxruntime

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define data transformation
image_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load the ONNX model with ONNX Runtime
model_path = "vegetable_classification_model.onnx"
model = onnxruntime.InferenceSession(model_path)

# Get the input and output names of the model
input_name = model.get_inputs()[0].name
output_name = model.get_outputs()[0].name

# Create Flask app
app = Flask(__name__)

# Define route for the home page
@app.route('/')
def home():
    return render_template('index.html')

# Define route for the prediction
@app.route('/predict', methods=['POST'])
def predict():
    # Check if a file was uploaded
    if 'file' not in request.files:
        return "No file uploaded"
    
    file = request.files['file']
    
    # Check if the file is empty
    if file.filename == '':
        return "Empty file uploaded"
    
    # Check if the file is valid
    if file and allowed_file(file.filename):
        image = Image.open(file).convert('RGB')
        image = image_transforms(image).unsqueeze(0).numpy()  # Convert image to tensor
        
        # Perform inference using the ONNX model
        inputs = {input_name: image}
        outputs = model.run([output_name], inputs)
        predicted = torch.tensor(outputs[0])
        _, predicted_class = torch.max(predicted, 1)
        
        # Get the predicted class name
        class_names = ["class1", "class2", "class3"]  # Replace with your actual class names
        predicted_class_name = class_names[predicted_class.item()]
        
        return f"Predicted class: {predicted_class_name}"
    
    return "Invalid file format"

# Check if the file has an allowed extension
def allowed_file(filename):
    return '.' in filename and \
           filename.rsplit('.', 1)[1].lower() in {'png', 'jpg', 'jpeg'}

# Run the app
if __name__ == '__main__':
    app.run(debug=True)

 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug: * Restarting with stat


### Answer 3:
Scrap the images from popular e-commerce websites for various product images sold
on those websites. Your goal is to fetch the images from the website, create
categories of different product classes and train a deep learning model to classify the
same based on the user input.
Note -
1. You can use any framework of your choice like TensorFlow or PyTorch 2. You
have to not use any pre-trained model, but instead create your own custom
architecture and then train the model.
3. Write code comments wherever needed for understanding
4. Try to use little big dataset so that model can be generalized
5. Write modular Python scripts to train and infer the model


In [None]:
import requests
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import tensorflow as tf

# Scrape images from popular e-commerce websites

# Define the websites to scrape from
websites = ["amazon.com", "ebay.com", "etsy.com"]

# Create a list to store the image URLs
image_urls = []

# Loop over the websites
for website in websites:
    # Get the home page of the website
    url = "https://" + website
    response = requests.get(url)

    # Parse the HTML of the home page
    soup = BeautifulSoup(response.content, "html.parser")

    # Find all the image tags on the home page
    images = soup.find_all("img")

    # Add the image URLs to the list
    for image in images:
        if "src" in image.attrs:
            image_urls.append(image["src"])

# Create categories of different product classes

# Define the product classes
product_classes = ["clothes", "electronics", "home & garden", "toys & games"]

# Create a dictionary to store the image URLs by product class
image_urls_by_class = {}

# Loop over the product classes
for product_class in product_classes:
    # Create a list to store the image URLs for the product class
    image_urls_by_class[product_class] = []

    # Loop over the image URLs
    for image_url in image_urls:
        # Check if the image URL belongs to the product class
        if product_class in image_url:
            # Add the image URL to the list
            image_urls_by_class[product_class].append(image_url)

# Train a deep learning model to classify the images

# Define the model architecture
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation="relu", input_shape=(224, 224, 3)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation="relu"),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation="relu"),
    tf.keras.layers.Dense(len(product_classes), activation="softmax")
])

# Compile the model
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

# Load the image for classification
image_path = "/content/clothesline-g92d7b8fc2_1280.jpg"
input_image = tf.keras.preprocessing.image.load_img(image_path, target_size=(224, 224))
input_image = tf.keras.preprocessing.image.img_to_array(input_image)
input_image = np.expand_dims(input_image, axis=0)

# Normalize the image
input_image = input_image / 255.0

# Predict the class of the input image
prediction = model.predict(input_image)

# Get the predicted class label
predicted_class_index = np.argmax(prediction)
predicted_class = product_classes[predicted_class_index]

# Print the prediction
print("Predicted Class:", predicted_class)

Predicted Class: home & garden


### Answer 6:
You have to train a custom segmentation model based on Detectron 2 framework.
Your goal is to segment the given images based on the user input into the different
classes
Link -
https://www.kaggle.com/competitions/open-images-2019-instance-segmenta
tion/data
Note -
1. For this, only the Jupyter Notebook is fine
2. Labels are in COCO format.
3. Write code comments wherever needed for understanding


In [None]:
# !pip install kaggle

import os

os.environ['KAGGLE_USERNAME'] = 'shreepardeshi'
os.environ['KAGGLE_KEY'] = '312af3bd6030bd4d9c716920a412cb60'

# !kaggle datasets download -d misrakahmed/vegetable-image-dataset
!kaggle competitions download -c open-images-2019-instance-segmentation
!unzip vegetable-image-dataset.zip

403 - Forbidden
unzip:  cannot find or open vegetable-image-dataset.zip, vegetable-image-dataset.zip.zip or vegetable-image-dataset.zip.ZIP.


### Answer 8:
You have to train a custom object detection model based on DETR (Detection
Transformer)
Link - https://www.kaggle.com/datasets/andrewmvd/helmet-detection
Note -
1. You need to use HuggingFace PyTorch as the framework
2. The dataset is about detecting football players from the images provided
3. Data Annotations are already in COCO format.
4. Write custom Python scripts for training.


In [None]:
!pip install kaggle

import os

os.environ['KAGGLE_USERNAME'] = 'shreepardeshi'
os.environ['KAGGLE_KEY'] = 

!kaggle datasets download -d andrewmvd/helmet-detection

In [None]:
import zipfile

with zipfile.ZipFile('helmet-detection.zip', 'r') as zip_ref:
    zip_ref.extractall('./helmet-detection')

In [None]:
pip install torch torchvision cython torchtext omegaconf tqdm

In [None]:
pip install transformers

In [None]:
pip install timm

In [None]:
pip install detr

In [None]:
pip install torchvision

In [None]:

import torch
import torchvision
import torch.nn.functional as F
import torchvision.transforms as T
from torch.utils.data import DataLoader
from pycocotools.coco import COCO
from PIL import Image
from torchvision.models.detection import detr

# Set the device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Set the paths
dataset_dir = "/content/helmet-detection"
image_dir = "/content/helmet-detection/images"
annotation_dir = "/content/helmet-detection/annotations"
train_annotation_file = os.path.join(annotation_dir, 'train.json')

# Define the dataset class
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, image_dir, annotation_file, transforms=None):
        self.image_dir = image_dir
        self.coco = COCO(annotation_file)
        self.transforms = transforms

    def __getitem__(self, index):
        # Load the image
        image_id = self.coco.getImgIds()[index]
        image_info = self.coco.loadImgs(image_id)[0]
        image = Image.open(os.path.join(self.image_dir, image_info['file_name'])).convert('RGB')

        # Load the annotations
        annotation_ids = self.coco.getAnnIds(imgIds=image_id)
        annotations = self.coco.loadAnns(annotation_ids)

        # Convert the annotations to targets
        boxes = [ann['bbox'] for ann in annotations]
        labels = [ann['category_id'] for ann in annotations]
        area = [ann['area'] for ann in annotations]
        iscrowd = [ann['iscrowd'] for ann in annotations]
        target = {
            'boxes': torch.FloatTensor(boxes),
            'labels': torch.LongTensor(labels),
            'image_id': torch.LongTensor([image_id]),
            'area': torch.FloatTensor(area),
            'iscrowd': torch.BoolTensor(iscrowd)
        }

        # Apply transforms
        if self.transforms is not None:
            image, target = self.transforms(image, target)

        return image, target

    def __len__(self):
        return len(self.coco.getImgIds())

# Define the transforms
def get_transform():
    transform_list = [
        T.ToTensor(),
        T.RandomHorizontalFlip(0.5),
        T.Resize((800, 800))
    ]
    return T.Compose(transform_list)

# Load the dataset
train_dataset = CustomDataset(image_dir, train_annotation_file, get_transform())

# Define the data loader
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)

# Load the DETR model
model = detr.detr_resnet50(pretrained=True)
model = model.to(device)

# Set the optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)

# Set the learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

# Define the loss function
def loss_fn(outputs, targets):
    loss_ce = F.cross_entropy(outputs['pred_logits'], targets['labels'])
    loss_bbox = F.l1_loss(outputs['pred_boxes'], targets['boxes'])
    loss = loss_ce + loss_bbox
    return loss

# Train the model
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    for images, targets in train_loader:
        images = images.to(device)
        targets = {k: v.to(device) for k, v in targets.items()}

        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_fn(outputs, targets)
        loss.backward()
        optimizer.step()


### Answer 4:
You have to train a custom YOLO V7 model on the dataset which is linked below.
Your goal is to detect different products based on the given classes based on the
user input
Link -
https://drive.google.com/file/d/1MEgDYJwO_PVVfAbyfjaRHXt7qoiBBHYt/view?
usp=share_link
Note -
1. You have to use PyTorch implementation of YOLO V7
2. The dataset consists of 102 classes with train, validation, and test images
already in the respective folders.
3. Labeling is already done, given with the dataset, so need for annotation
4. Since the dataset is small, try to achieve at least an mAP of 85 5. Write
modular Python scripts to train the model
6. Write code comments wherever needed for understanding
Computer Vision Assessment iNeuron 3
7. Only Jupyter Notebook will not be allowed 


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchvision.transforms import transforms
from model import YOLOv7 # import the YOLOv7 model architecture

# Define the data transforms
data_transforms = transforms.Compose([
        transforms.Resize((448, 448)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

# Load the datasets
train_dataset = ImageFolder('train/', transform=data_transforms)
val_dataset = ImageFolder('val/', transform=data_transforms)
test_dataset = ImageFolder('test/', transform=data_transforms)

# Define the dataloaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=True)

# Define the YOLOv7 model
model = YOLOv7(num_classes=102, pretrained=True)

# Freeze the weights of layers up to the last convolutional layer
for i, param in enumerate(model.parameters()):
    if i < 282:
        param.requires_grad = False

# Define the loss function and optimizer
criterion = YOLOLoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)

# Train the model
for epoch in range(10):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i % 100 == 99:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

    # Evaluate the model on the validation set
    correct = 0
    total = 0
    with torch.no_grad():
        for data in val_loader:
            images, labels = data
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print('Accuracy on validation set: %d %%' % (100 * correct / total))

# Test the model on the test set
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print('Accuracy on test set: %d %%' % (100 * correct / total))

### Answer 5: 
From Question 4, you would have a custom-trained YOLO model. Your goal is to
need to convert the model to ONNX format and reduce the inference time.
Note -
1. Reduce the inference time to as much as possible
2. Try to reduce the model size by using techniques like Quantization, etc 3.
Create a web app for users to interact with your model where users can upload
images and get predictions.
4. Write modular Python scripts to infer the model.



In [None]:
pip install torch
pip install onnx
pip install onnxruntime
pip install Flask

In [None]:
import torch
import onnx
import onnxruntime as ort
from onnxruntime.quantization import quantize_static
from flask import Flask, request, jsonify

# Load the trained PyTorch model
model = torch.load('custom_yolo.pt')

# Convert the PyTorch model to ONNX format
input_names = ['input']
output_names = ['output']
dummy_input = torch.randn(1, 3, 448, 448)
torch.onnx.export(model, dummy_input, 'custom_yolo.onnx', input_names=input_names, output_names=output_names)

# Optimize the ONNX model using quantization
quantized_model = quantize_static('custom_yolo.onnx', 'custom_yolo_quantized.onnx')

# Initialize the ONNX Runtime session
session = ort.InferenceSession('custom_yolo_quantized.onnx')

# Define a Flask app
app = Flask(__name__)

# Define a function to preprocess the input image
def preprocess_image(image):
    # Resize the image to 448x448
    image = image.resize((448, 448))
    # Convert the image to a PyTorch tensor
    image = transforms.ToTensor()(image)
    # Normalize the image
    image = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(image)
    # Add a batch dimension to the tensor
    image = image.unsqueeze(0)
    return image

# Define a Flask route for inference
@app.route('/predict', methods=['POST'])
def predict():
    # Get the input image from the request
    image = request.files['image']
    # Preprocess the input image
    image = preprocess_image(image)
    # Run the inference on the optimized ONNX model
    inputs = {input_names[0]: image.numpy()}
    outputs = session.run([], inputs)
    # Convert the output to a human-readable format
    # ...
    # Return the predictions as a JSON response
    return jsonify(predictions)

# Run the Flask app
if __name__ == '__main__':
    app.run()