In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import os
os.chdir('/content/drive/MyDrive/Flow_Chart_Code/images')

In [None]:
import cv2
import os
from tqdm import tqdm

def preprocess_images(input_dir, output_dir, img_size=(1024, 1024)):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for img_name in tqdm(os.listdir(input_dir)):
        img_path = os.path.join(input_dir, img_name)
        img = cv2.imread(img_path)

        if img is None:
            print(f"Failed to load image: {img_path}")
            continue

        img_resized = cv2.resize(img, img_size)
        cv2.imwrite(os.path.join(output_dir, img_name), img_resized)

input_dir = '/content/drive/MyDrive/Flow_Chart_Code/images'
output_dir = '/content/drive/MyDrive/Flow_Chart_Code/Pre_P_Images'
preprocess_images(input_dir, output_dir)


100%|██████████| 620/620 [06:03<00:00,  1.71it/s]


In [None]:
import xml.etree.ElementTree as ET

def update_xml_annotations(xml_dir, output_dir, img_size=(1024, 1024)):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for xml_file in tqdm(os.listdir(xml_dir)):
        xml_path = os.path.join(xml_dir, xml_file)
        tree = ET.parse(xml_path)
        root = tree.getroot()

        size = root.find('size')
        original_width = int(size.find('width').text)
        original_height = int(size.find('height').text)

        for obj in root.iter('object'):
            bndbox = obj.find('bndbox')
            xmin = int(bndbox.find('xmin').text)
            ymin = int(bndbox.find('ymin').text)
            xmax = int(bndbox.find('xmax').text)
            ymax = int(bndbox.find('ymax').text)

            xmin = int(xmin * img_size[0] / original_width)
            ymin = int(ymin * img_size[1] / original_height)
            xmax = int(xmax * img_size[0] / original_width)
            ymax = int(ymax * img_size[1] / original_height)

            bndbox.find('xmin').text = str(xmin)
            bndbox.find('ymin').text = str(ymin)
            bndbox.find('xmax').text = str(xmax)
            bndbox.find('ymax').text = str(ymax)

        size.find('width').text = str(img_size[0])
        size.find('height').text = str(img_size[1])

        tree.write(os.path.join(output_dir, xml_file))

xml_dir = '/content/drive/MyDrive/Flow_Chart_Code/annots'
output_xml_dir = '/content/drive/MyDrive/Flow_Chart_Code/Pre_A_XMLs'
update_xml_annotations(xml_dir, output_xml_dir)


100%|██████████| 601/601 [03:09<00:00,  3.17it/s]


In [None]:
!pip install torch torchvision


Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)
Collecting nvidia-curand-cu12==10.3.2.106 (from torch)
  Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)
Collectin

In [None]:
import os
import xml.etree.ElementTree as ET

xml_dir = '/content/drive/MyDrive/Flow_Chart_Code/Pre_A_XMLs'

def find_unique_labels(xml_dir):
    unique_labels = set()
    xml_files = os.listdir(xml_dir)

    for xml_file in xml_files:
        xml_path = os.path.join(xml_dir, xml_file)
        tree = ET.parse(xml_path)
        root = tree.getroot()

        for obj in root.iter('object'):
            label = obj.find('name').text
            unique_labels.add(label)

    return unique_labels

unique_labels = find_unique_labels(xml_dir)
print(unique_labels)


{'process', 'start_end', 'scan', 'print', 'arrow_line_right', 'arrow_line_up', 'decision', 'arrow_line_down', 'arrow_line_left'}


In [None]:
label_map = {
    "arrow_line_down": 1,
    "decision": 2,
    "process": 3,
    "start_end": 4,
    "arrow_line_left": 5,
    "arrow_line_right": 6,
    "arrow_line_up":8,
    "print":9,
    "scan": 7  # Add all other unique labels you found here
}


In [None]:
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import functional as F
from torch.utils.data import DataLoader, Dataset
import xml.etree.ElementTree as ET
import cv2
import os

class FlowchartDataset(Dataset):
    def __init__(self, img_dir, xml_dir, label_map, transforms=None):
        self.img_dir = img_dir
        self.xml_dir = xml_dir
        self.transforms = transforms
        self.imgs = sorted([img for img in os.listdir(img_dir) if img.endswith('.jpg')])
        self.xmls = sorted([xml for xml in os.listdir(xml_dir) if xml.endswith('.xml')])
        self.label_map = label_map

        # Ensure the dataset length matches by finding common base names
        self.imgs = [img for img in self.imgs if img.replace('.jpg', '.xml') in self.xmls]
        self.xmls = [xml for xml in self.xmls if xml.replace('.xml', '.jpg') in self.imgs]

    def __len__(self):
        return len(self.imgs)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.imgs[idx])
        xml_path = os.path.join(self.xml_dir, self.xmls[idx])

        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = F.to_tensor(img)

        tree = ET.parse(xml_path)
        root = tree.getroot()

        boxes = []
        labels = []
        for obj in root.iter('object'):
            label = obj.find('name').text
            if label not in self.label_map:
                continue  # Skip labels not in the label_map
            bndbox = obj.find('bndbox')
            xmin = int(bndbox.find('xmin').text)
            ymin = int(bndbox.find('ymin').text)
            xmax = int(bndbox.find('xmax').text)
            ymax = int(bndbox.find('ymax').text)
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(self.label_map[label])

        if len(boxes) == 0:
            # Return dummy data in case there are no valid boxes for this image
            boxes = [[0, 0, 1, 1]]
            labels = [0]  # Background class

        target = {}
        target["boxes"] = torch.as_tensor(boxes, dtype=torch.float32)
        target["labels"] = torch.as_tensor(labels, dtype=torch.int64)

        return img, target

# Define the label map
label_map = {
    "arrow_line_down": 1,
    "decision": 2,
    "process": 3,
    "start_end": 4,
    "arrow_line_left": 5,
    "arrow_line_right": 6,
    "arrow_line_up":8,
    "print":9,
    "scan": 7  # Add all other unique labels you found here
}


# Load the dataset
output_dir = '/content/drive/MyDrive/Flow_Chart_Code/Pre_P_Images'
output_xml_dir = '/content/drive/MyDrive/Flow_Chart_Code/Pre_A_XMLs'
dataset = FlowchartDataset(output_dir, output_xml_dir, label_map)
data_loader = DataLoader(dataset, batch_size=4, shuffle=True, num_workers=2, collate_fn=lambda x: tuple(zip(*x)))

# Load the model pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights='FasterRCNN_ResNet50_FPN_Weights.COCO_V1')

# Get the number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# Replace the pre-trained head with a new one (number of classes is the number of unique labels in your dataset + background)
num_classes = len(label_map) + 1  # Include background as class 0
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# Train the model
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    for images, targets in data_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

    lr_scheduler.step()

    print(f"Epoch {epoch}: Loss: {losses.item()}")

# Save the model weights
save_path = '/content/drive/MyDrive/Flow_Chart_Code/faster_rcnn_flowchart.pth'
os.makedirs(os.path.dirname(save_path), exist_ok=True)  # Ensure the directory exists
torch.save(model.state_dict(), save_path)


Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:00<00:00, 186MB/s]


Epoch 0: Loss: 0.5750427842140198
Epoch 1: Loss: 0.4788261651992798
Epoch 2: Loss: 0.33107757568359375
Epoch 3: Loss: 0.2575295567512512
Epoch 4: Loss: 0.36668702960014343
Epoch 5: Loss: 0.3312632143497467
Epoch 6: Loss: 0.2637271583080292
Epoch 7: Loss: 0.2664109170436859
Epoch 8: Loss: 0.3633718490600586
Epoch 9: Loss: 0.3815934658050537


In [None]:
import torch
import torchvision.transforms.functional as F
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import fasterrcnn_resnet50_fpn

# Define the correct number of classes used during training
num_classes_trained = 10  # Update this to the actual number of classes used during training

# Load the pre-trained model
model = fasterrcnn_resnet50_fpn(weights='FasterRCNN_ResNet50_FPN_Weights.COCO_V1')
in_features = model.roi_heads.box_predictor.cls_score.in_features

# Replace the pre-trained head with a new one with the same number of classes as used during training
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes_trained)

# Load the trained model weights
model.load_state_dict(torch.load('faster_rcnn_flowchart.pth'))
model.eval()

# Now modify the head to the desired number of classes for your use case
num_classes_current = 8  # Update this to the actual number of classes you need now
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes_current)


In [1]:
import torch
import torchvision.transforms.functional as F
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches

# Define the label map
label_map = {
    "arrow_line_down": 1,
    "decision": 2,
    "process": 3,
    "start_end": 4,
    "arrow_line_left": 5,
    "arrow_line_right": 6,
    "arrow_line_up":8,
    "print":9,
    "scan": 7  # Add all other unique labels you found here
}
reverse_label_map = {v: k for k, v in label_map.items()}

# Load the pre-trained model
num_classes = len(label_map) + 1  # Include background as class 0
model = fasterrcnn_resnet50_fpn(weights='FasterRCNN_ResNet50_FPN_Weights.COCO_V1')

# Get the number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# Replace the pre-trained head with a new one (number of classes is the number of unique labels in your dataset + background)
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# Load the trained model weights
model.load_state_dict(torch.load('/content/drive/MyDrive/Flow_Chart_Code/faster_rcnn_flowchart.pth'))
model.eval()

# Move model to the device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

# Preprocess the image
def preprocess_image(image_path):
    img = Image.open(image_path).convert("RGB")
    img_tensor = F.to_tensor(img)
    return img_tensor

# Postprocess the output
def postprocess_output(prediction, threshold=0.5):
    boxes = prediction[0]['boxes']
    labels = prediction[0]['labels']
    scores = prediction[0]['scores']

    # Filter out low confidence predictions
    boxes = boxes[scores > threshold]
    labels = labels[scores > threshold]
    scores = scores[scores > threshold]

    return boxes, labels, scores

# Test the model on a single image
image_path = '/content/drive/MyDrive/Given_FlowCharts/System_Generated/4107.jpg'
img_tensor = preprocess_image(image_path)
img_tensor = img_tensor.unsqueeze(0)  # Add batch dimension

# Move the image tensor to the device
img_tensor = img_tensor.to(device)

# Get predictions
with torch.no_grad():
    prediction = model(img_tensor)

# Postprocess the output
boxes, labels, scores = postprocess_output(prediction, threshold=0.5)

# Move boxes and labels to the CPU
boxes = boxes.cpu()
labels = labels.cpu()

# Output the shapes with their coordinates
def output_shapes_and_coordinates(boxes, labels, reverse_label_map):
    shapes = []
    for i in range(len(boxes)):
        box = boxes[i].numpy()
        label = labels[i].item()
        shapes.append({
            "shape": reverse_label_map[label],
            "coordinates": {
                "xmin": int(box[0]),
                "ymin": int(box[1]),
                "xmax": int(box[2]),
                "ymax": int(box[3])
            }
        })
    return shapes

shapes = output_shapes_and_coordinates(boxes, labels, reverse_label_map)

# Print the shapes and their coordinates
for shape in shapes:
    print(f"Shape: {shape['shape']}, Coordinates: {shape['coordinates']}")

# Optionally visualize the results
def visualize_results(image_path, boxes, labels, reverse_label_map):
    img = Image.open(image_path).convert("RGB")
    plt.figure(figsize=(12, 12))
    plt.imshow(img)
    ax = plt.gca()

    for i in range(len(boxes)):
        box = boxes[i].numpy()
        label = labels[i].item()
        color = 'r'
        rect = patches.Rectangle((box[0], box[1]), box[2]-box[0], box[3]-box[1], linewidth=2, edgecolor=color, facecolor='none')
        ax.add_patch(rect)
        plt.text(box[0], box[1], reverse_label_map[label], color=color, fontsize=12, bbox=dict(facecolor='yellow', alpha=0.5))

    plt.show()

visualize_results(image_path, boxes, labels, reverse_label_map)


Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:02<00:00, 69.4MB/s]


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/Flow_Chart_Code/faster_rcnn_flowchart.pth'