In [1]:
#pip install flask

# IMPORTING LIBRARIES

In [2]:
import io
import torch
from flask import Flask, request, jsonify, render_template
from torchvision import transforms
from PIL import Image, ImageDraw, ImageFont
from torchvision.models.detection import fasterrcnn_resnet50_fpn, ssd300_vgg16
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.ssd import SSDClassificationHead
import torchvision.ops as ops
import json
import base64

In [3]:
# Create the Flask app
app = Flask(__name__)

In [4]:
# Load the models from specified paths
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'--> {device} <--')

--> cuda <--


In [5]:
# Paths to the saved models
SSD_path = r'C:\Users\krish\Documents\Summer\Project\model\Sho.pth'
FastRCNN_path = r'C:\Users\krish\Documents\Summer\Project\model\Luna.pth'

# LABEL ENCODING

In [6]:
# Load COCO categories for Fast R-CNN
with open('coco/instances_train2017.json') as f:
    coco_data = json.load(f)

categories = coco_data['categories']
original_ids = [category['id'] for category in categories]
id_to_sequential_id = {original_id: idx for idx, original_id in enumerate(original_ids)}
sequential_id_to_name = {idx: category['name'] for idx, category in enumerate(categories)}

# Create a reverse mapping for Fast R-CNN
sequential_id_to_original_id = {v: k for k, v in id_to_sequential_id.items()}

# Load COCO categories for SSD
category_id_to_name = {category['id']: category['name'] for category in categories}

In [7]:
sequential_id_to_name[45]

'bowl'

In [8]:
print(id_to_sequential_id)

{1: 0, 2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 13: 11, 14: 12, 15: 13, 16: 14, 17: 15, 18: 16, 19: 17, 20: 18, 21: 19, 22: 20, 23: 21, 24: 22, 25: 23, 27: 24, 28: 25, 31: 26, 32: 27, 33: 28, 34: 29, 35: 30, 36: 31, 37: 32, 38: 33, 39: 34, 40: 35, 41: 36, 42: 37, 43: 38, 44: 39, 46: 40, 47: 41, 48: 42, 49: 43, 50: 44, 51: 45, 52: 46, 53: 47, 54: 48, 55: 49, 56: 50, 57: 51, 58: 52, 59: 53, 60: 54, 61: 55, 62: 56, 63: 57, 64: 58, 65: 59, 67: 60, 70: 61, 72: 62, 73: 63, 74: 64, 75: 65, 76: 66, 77: 67, 78: 68, 79: 69, 80: 70, 81: 71, 82: 72, 84: 73, 85: 74, 86: 75, 87: 76, 88: 77, 89: 78, 90: 79}


In [9]:
print(sequential_id_to_name)

{0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microw

In [10]:
print(category_id_to_name)

{1: 'person', 2: 'bicycle', 3: 'car', 4: 'motorcycle', 5: 'airplane', 6: 'bus', 7: 'train', 8: 'truck', 9: 'boat', 10: 'traffic light', 11: 'fire hydrant', 13: 'stop sign', 14: 'parking meter', 15: 'bench', 16: 'bird', 17: 'cat', 18: 'dog', 19: 'horse', 20: 'sheep', 21: 'cow', 22: 'elephant', 23: 'bear', 24: 'zebra', 25: 'giraffe', 27: 'backpack', 28: 'umbrella', 31: 'handbag', 32: 'tie', 33: 'suitcase', 34: 'frisbee', 35: 'skis', 36: 'snowboard', 37: 'sports ball', 38: 'kite', 39: 'baseball bat', 40: 'baseball glove', 41: 'skateboard', 42: 'surfboard', 43: 'tennis racket', 44: 'bottle', 46: 'wine glass', 47: 'cup', 48: 'fork', 49: 'knife', 50: 'spoon', 51: 'bowl', 52: 'banana', 53: 'apple', 54: 'sandwich', 55: 'orange', 56: 'broccoli', 57: 'carrot', 58: 'hot dog', 59: 'pizza', 60: 'donut', 61: 'cake', 62: 'chair', 63: 'couch', 64: 'potted plant', 65: 'bed', 67: 'dining table', 70: 'toilet', 72: 'tv', 73: 'laptop', 74: 'mouse', 75: 'remote', 76: 'keyboard', 77: 'cell phone', 78: 'micro

# MODEL WEIGHT LOADING FROM ".PTH" FILE

In [11]:
# Define the function to load the Fast R-CNN model
def load_fastrcnn_model(model_path, num_classes):
    model = fasterrcnn_resnet50_fpn(pretrained=False)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    model.eval()
    return model

In [12]:
# Define the function to load the SSD model
def load_ssd_model(model_path, num_classes):
    model = ssd300_vgg16(pretrained=False)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    model.eval()
    return model

In [13]:
# Load the models with classes
FastRCNN = load_fastrcnn_model(FastRCNN_path, num_classes=81)
SSD = load_ssd_model(SSD_path, num_classes=91)



In [14]:
def transform_image(image_bytes):
    try:
        image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
        transform = transforms.Compose([
            transforms.ToTensor(),
        ])
        return transform(image).unsqueeze(0), image
    except UnidentifiedImageError:
        raise ValueError("The uploaded file is not a valid image.")

# NON-MAX SUPPRESSION AND IOU FILTERING

In [15]:
# Define the function to filter and apply non-max suppression
def filter_and_nms(predictions, score_threshold=0.7, iou_threshold=0.7):
    filtered_predictions = []
    for prediction in predictions:
        boxes = prediction['boxes']
        scores = prediction['scores']
        labels = prediction['labels']

        # Filter by score
        keep = scores >= score_threshold
        boxes = boxes[keep]
        scores = scores[keep]
        labels = labels[keep]

        # Apply non-max suppression
        keep = ops.nms(boxes, scores, iou_threshold)
        filtered_predictions.append({
            'boxes': boxes[keep],
            'scores': scores[keep],
            'labels': labels[keep]
        })

    return filtered_predictions

In [16]:
# Function to draw bounding boxes on the image
def draw_boxes(image, predictions, label_map):
    draw = ImageDraw.Draw(image)
    font = ImageFont.load_default()
    for prediction in predictions:
        for i in range(len(prediction["boxes"])):
            box = prediction["boxes"][i].cpu().numpy().tolist()
            score = prediction["scores"][i].cpu().numpy().tolist()
            label = prediction["labels"][i].cpu().numpy().tolist()
            x0, y0, x1, y1 = box
            draw.rectangle([x0, y0, x1, y1], outline="red", width=2)
            text = f"{label_map.get(label, 'Unknown')}: {score:.2f}"
            text_bbox = draw.textbbox((x0, y0), text, font=font)
            text_background = [x0, y0 - (text_bbox[3] - text_bbox[1]), x0 + (text_bbox[2] - text_bbox[0]), y0]
            draw.rectangle(text_background, fill="red")
            draw.text((x0, y0 - (text_bbox[3] - text_bbox[1])), text, fill="white", font=font)
    return image

# MODEL PROCESSING AND DETECTION!!

In [17]:
# Define the route to render the HTML form
@app.route("/", methods=["GET"])
def index():
    return render_template("index.html")

In [18]:
@app.route("/predict/fastrcnn", methods=["POST"])
def predict_fastrcnn():
    if 'file' not in request.files:
        return jsonify({"error": "No file provided"}), 400
    file = request.files['file']
    image_bytes = file.read()
    image_tensor, image = transform_image(image_bytes)
    image_tensor = image_tensor.to(device)

    with torch.no_grad():
        prediction = FastRCNN(image_tensor)

    # Filter and apply non-max suppression
    nms_prediction = filter_and_nms(prediction, score_threshold=0.7, iou_threshold=0.7)

    # Check if there are any predictions
    if nms_prediction and any(len(pred['boxes']) > 0 for pred in nms_prediction):
        # Draw bounding boxes on the image
        image_with_boxes = draw_boxes(image, nms_prediction, sequential_id_to_name)

        # Convert image to base64
        buffered = io.BytesIO()
        image_with_boxes.save(buffered, format="JPEG")
        img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")

        results = []
        for element in nms_prediction:
            for i in range(len(element["boxes"])):
                score = element["scores"][i].cpu().numpy().tolist() * 100
                label = element["labels"][i].cpu().numpy().tolist()
                original_id = sequential_id_to_original_id.get(label, 'Unknown')
                results.append({
                    "score": f"{score:.2f}%",
                    "label": category_id_to_name[original_id]
                })
                    
        return jsonify({"prediction": results, "image": img_str})
    else:
        return jsonify({"prediction": "No objects detected", "image": None})

In [19]:
@app.route("/predict/ssd300", methods=["POST"])
def predict_ssd300():
    if 'file' not in request.files:
        return jsonify({"error": "No file provided"}), 400
    file = request.files['file']
    image_bytes = file.read()
    image_tensor, image = transform_image(image_bytes)
    image_tensor = image_tensor.to(device)

    with torch.no_grad():
        prediction = SSD(image_tensor)

    # Filter and apply non-max suppression
    nms_prediction = filter_and_nms(prediction, score_threshold=0.7, iou_threshold=0.7)

    # Check if there are any predictions
    if nms_prediction and any(len(pred['boxes']) > 0 for pred in nms_prediction):
        # Draw bounding boxes on the image
        image_with_boxes = draw_boxes(image, nms_prediction, category_id_to_name)

        # Convert image to base64
        buffered = io.BytesIO()
        image_with_boxes.save(buffered, format="JPEG")
        img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")

        results = []
        for element in nms_prediction:
            for i in range(len(element["boxes"])):
                score = element["scores"][i].cpu().numpy().tolist() * 100
                label = element["labels"][i].cpu().numpy().tolist()
                results.append({
                    "score": f"{score:.2f}%",
                    "label": category_id_to_name[label]
                })

        return jsonify({"prediction": results, "image": img_str})
    else:
        return jsonify({"prediction": "No objects detected", "image": None})

In [20]:
# Run the app
if __name__ == "__main__":
    app.run(host="0.0.0.0", port=8000)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:8000
 * Running on http://192.168.1.63:8000
Press CTRL+C to quit
127.0.0.1 - - [05/Jul/2024 15:51:13] "GET / HTTP/1.1" 200 -
