In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import torch
import torchvision.transforms.functional as F
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches

# Define the label map
label_map = {
    "arrow_line_down": 1,
    "decision": 2,
    "process": 3,
    "start_end": 4,
    "arrow_line_left": 5,
    "arrow_line_right": 6,
    "arrow_line_up":8,
    "print":9,
    "scan": 7  # Add all other unique labels you found here
}
reverse_label_map = {v: k for k, v in label_map.items()}

# Load the pre-trained model
num_classes = len(label_map) + 1  # Include background as class 0
model = fasterrcnn_resnet50_fpn(weights='FasterRCNN_ResNet50_FPN_Weights.COCO_V1')

# Get the number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# Replace the pre-trained head with a new one (number of classes is the number of unique labels in your dataset + background)
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# Load the trained model weights
model.load_state_dict(torch.load('/content/drive/MyDrive/faster_rcnn_flowchart.pth'))
model.eval()

# Move model to the device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

# Preprocess the image
def preprocess_image(image_path):
    img = Image.open(image_path).convert("RGB")
    img_tensor = F.to_tensor(img)
    return img_tensor

# Postprocess the output
def postprocess_output(prediction, threshold=0.5):
    boxes = prediction[0]['boxes']
    labels = prediction[0]['labels']
    scores = prediction[0]['scores']

    # Filter out low confidence predictions
    boxes = boxes[scores > threshold]
    labels = labels[scores > threshold]
    scores = scores[scores > threshold]

    return boxes, labels, scores

# Test the model on a single image
image_path = '/content/drive/MyDrive/4107.jpg'
img_tensor = preprocess_image(image_path)
img_tensor = img_tensor.unsqueeze(0)  # Add batch dimension

# Move the image tensor to the device
img_tensor = img_tensor.to(device)

# Get predictions
with torch.no_grad():
    prediction = model(img_tensor)

# Postprocess the output
boxes, labels, scores = postprocess_output(prediction, threshold=0.5)

# Move boxes and labels to the CPU
boxes = boxes.cpu()
labels = labels.cpu()

# Output the shapes with their coordinates
def output_shapes_and_coordinates(boxes, labels, reverse_label_map):
    shapes = []
    for i in range(len(boxes)):
        box = boxes[i].numpy()
        label = labels[i].item()
        shapes.append({
            "shape": reverse_label_map[label],
            "coordinates": {
                "xmin": int(box[0]),
                "ymin": int(box[1]),
                "xmax": int(box[2]),
                "ymax": int(box[3])
            }
        })
    return shapes

shapes = output_shapes_and_coordinates(boxes, labels, reverse_label_map)

# Print the shapes and their coordinates
for shape in shapes:
    print(f"Shape: {shape['shape']}, Coordinates: {shape['coordinates']}")

# Optionally visualize the results
# def visualize_results(image_path, boxes, labels, reverse_label_map):
#     img = Image.open(image_path).convert("RGB")
#     plt.figure(figsize=(12, 12))
#     plt.imshow(img)
#     ax = plt.gca()

#     for i in range(len(boxes)):
#         box = boxes[i].numpy()
#         label = labels[i].item()
#         color = 'r'
#         rect = patches.Rectangle((box[0], box[1]), box[2]-box[0], box[3]-box[1], linewidth=2, edgecolor=color, facecolor='none')
#         ax.add_patch(rect)
#         plt.text(box[0], box[1], reverse_label_map[label], color=color, fontsize=12, bbox=dict(facecolor='yellow', alpha=0.5))

#     plt.show()

from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches

def visualize_results(image_path, boxes, labels, reverse_label_map):
    img = Image.open(image_path).convert("RGB")
    plt.figure(figsize=(12, 12))
    plt.imshow(img)
    ax = plt.gca()

    for i in range(len(boxes)):
        box = boxes[i].numpy()
        label = labels[i].item()

        # Skip boxes with label "process"
        if reverse_label_map[label] == "process":
            continue

        color = 'r'
        rect = patches.Rectangle((box[0], box[1]), box[2]-box[0], box[3]-box[1], linewidth=2, edgecolor=color, facecolor='none')
        ax.add_patch(rect)
        plt.text(box[0], box[1], reverse_label_map[label], color=color, fontsize=12, bbox=dict(facecolor='yellow', alpha=0.5))

    plt.show()

# Example usage:
# Assuming you have defined image_path, boxes, labels, and reverse_label_map appropriately
# visualize_results(image_path, boxes, labels, reverse_label_map)


visualize_results(image_path, boxes, labels, reverse_label_map)


In [None]:
import torch
import torchvision.transforms.functional as F
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches

# Define the label map
label_map = {
    "arrow_line_down": 1,
    "decision": 2,
    "process": 3,
    "start_end": 4,
    "arrow_line_left": 5,
    "arrow_line_right": 6,
    "arrow_line_up": 8,
    "print": 9,
    "scan": 7  # Add all other unique labels you found here
}
reverse_label_map = {v: k for k, v in label_map.items()}

# Load the pre-trained model
num_classes = len(label_map) + 1  # Include background as class 0
model = fasterrcnn_resnet50_fpn(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# Load the trained model weights
model.load_state_dict(torch.load('/content/drive/MyDrive/faster_rcnn_flowchart.pth', map_location=torch.device('cpu')))  # Update the path
model.eval()

# Move model to the device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

# Preprocess the image
def preprocess_image(image_path):
    img = Image.open(image_path).convert("RGB")
    img_tensor = F.to_tensor(img)
    return img_tensor

# Postprocess the output
def postprocess_output(prediction, threshold=0.5):
    boxes = prediction[0]['boxes']
    labels = prediction[0]['labels']
    scores = prediction[0]['scores']

    # Filter out low confidence predictions
    boxes = boxes[scores > threshold]
    labels = labels[scores > threshold]

    return boxes, labels

# Function to perform inference and extract shapes and coordinates
def detect_shapes(image_path):
    # Preprocess the image
    img_tensor = preprocess_image(image_path)
    img_tensor = img_tensor.unsqueeze(0)  # Add batch dimension
    img_tensor = img_tensor.to(device)

    # Get predictions
    with torch.no_grad():
        prediction = model(img_tensor)

    # Postprocess the output
    boxes, labels = postprocess_output(prediction, threshold=0.5)
    boxes = boxes.cpu()
    labels = labels.cpu()

    # Output the shapes with their coordinates
    ex_shape = []
    ex_coor = []

    for i in range(len(boxes)):
        box = boxes[i].numpy()
        label = labels[i].item()
        shape_name = reverse_label_map[label]
        coordinates = {
            "xmin": int(box[0]),
            "ymin": int(box[1]),
            "xmax": int(box[2]),
            "ymax": int(box[3])
        }
        ex_shape.append(shape_name)
        ex_coor.append(coordinates)

    return ex_shape, ex_coor





Detected Shapes: ['start_end', 'start_end', 'arrow_line_down', 'arrow_line_down', 'arrow_line_down', 'arrow_line_down', 'decision', 'process', 'arrow_line_left', 'scan', 'scan', 'arrow_line_down', 'process', 'scan', 'arrow_line_right']
Coordinates for Shapes: [{'xmin': 39, 'ymin': 6, 'xmax': 150, 'ymax': 63}, {'xmin': 39, 'ymin': 437, 'xmax': 147, 'ymax': 493}, {'xmin': 88, 'ymin': 410, 'xmax': 102, 'ymax': 439}, {'xmin': 88, 'ymin': 159, 'xmax': 102, 'ymax': 200}, {'xmin': 88, 'ymin': 309, 'xmax': 103, 'ymax': 350}, {'xmin': 88, 'ymin': 59, 'xmax': 102, 'ymax': 100}, {'xmin': 23, 'ymin': 193, 'xmax': 166, 'ymax': 316}, {'xmin': 72, 'ymin': 128, 'xmax': 121, 'ymax': 144}, {'xmin': 146, 'ymin': 459, 'xmax': 269, 'ymax': 471}, {'xmin': 202, 'ymin': 345, 'xmax': 338, 'ymax': 419}, {'xmin': 17, 'ymin': 346, 'xmax': 172, 'ymax': 419}, {'xmin': 263, 'ymin': 258, 'xmax': 278, 'ymax': 353}, {'xmin': 16, 'ymin': 97, 'xmax': 180, 'ymax': 165}, {'xmin': 0, 'ymin': 96, 'xmax': 188, 'ymax': 169}, {