In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from torchvision.models import resnet18
from PIL import Image
import pandas as pd
import os


In [18]:
import os

def load_annotations(yolo_annotations_folder):
    annotations = []
    for filename in os.listdir(yolo_annotations_folder):
        if filename.endswith(".txt"):
            image_filename = os.path.splitext(filename)[0]  # Remove the extension if it exists
            image_filename += ".JPG"  # Add the extension back

            with open(os.path.join(yolo_annotations_folder, filename), 'r') as file:
                for line in file:
                    class_label, x, y, width, height = line.strip().split()
                    x, y, width, height = float(x), float(y), float(width), float(height)
                    class_label = int(class_label)  # Convert class label to integer

                    annotations.append((image_filename, x, y, width, height, class_label))
    return annotations


In [19]:
# Implement data augmentation and preprocessing (resize and normalization)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [20]:
# Create a custom dataset class
class CustomDataset(Dataset):
    def __init__(self, annotations, image_folder, transform=None):
        self.annotations = annotations
        self.image_folder = image_folder
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        image_filename, x, y, width, height, class_label = self.annotations[index]
        image_path = os.path.join(self.image_folder, image_filename)
        image = Image.open(image_path).convert("RGB")

        if self.transform is not None:
            image = self.transform(image)

        return image, torch.tensor([x, y, width, height]), torch.tensor(class_label)

# Load your dataset
annotations = load_annotations('/kaggle/input/annotation-plates/obj_train_data')
image_folder = '/kaggle/input/pakistani-number-plates1/Cars'
dataset = CustomDataset(annotations, image_folder, transform=transform)

# Split dataset into training and validation sets
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)


In [21]:
annotations

[('DSC_1047.JPG', 0.585955, 0.675205, 0.139755, 0.11728, 0),
 ('DSC_1027.JPG', 0.662659, 0.688763, 0.117313, 0.108255, 0),
 ('DSC_0995.JPG', 0.611427, 0.642829, 0.157466, 0.088885, 0),
 ('DSC_1002.JPG', 0.447084, 0.620645, 0.158837, 0.095267, 0),
 ('DSC_1037.JPG', 0.612611, 0.707584, 0.150373, 0.12178, 0),
 ('DSC_1066.JPG', 0.651747, 0.671544, 0.131674, 0.118782, 0),
 ('DSC_1089.JPG', 0.590535, 0.641472, 0.147953, 0.124786, 0),
 ('DSC_1089.JPG', 0.93685, 0.416691, 0.04493, 0.045105, 0),
 ('DSC_1089.JPG', 0.065581, 0.389631, 0.03398, 0.036089, 0),
 ('DSC_0973.JPG', 0.452083, 0.695282, 0.1743, 0.104757, 0),
 ('DSC_1041.JPG', 0.533733, 0.673084, 0.162803, 0.109763, 0),
 ('DSC_1052.JPG', 0.584708, 0.634732, 0.137894, 0.084197, 0),
 ('DSC_1033.JPG', 0.653379, 0.65053, 0.128697, 0.109755, 0),
 ('DSC_0988.JPG', 0.636738, 0.706389, 0.121065, 0.101587, 0),
 ('DSC_1106.JPG', 0.591448, 0.656197, 0.133508, 0.085924, 0),
 ('DSC_1096.JPG', 0.60783, 0.697769, 0.150895, 0.100744, 0),
 ('DSC_1054.JPG',

In [12]:
image_folder

'/kaggle/input/pakistani-number-plates/Cars'

In [21]:
# Modify ResNet18 for object detection by adding custom regression and classification heads
class ResNet18ObjectDetection(nn.Module):
    def __init__(self, num_classes):
        super(ResNet18ObjectDetection, self).__init__()
        self.resnet18 = resnet18(pretrained=True)
        in_features = self.resnet18.fc.in_features
        self.resnet18.fc = nn.Linear(in_features, num_classes)

        # Add additional layers for bounding box regression
        self.bbox_regression = nn.Linear(in_features, 4)  # 4 for (x, y, width, height)

    def forward(self, x):
        x = self.resnet18(x)
        class_scores = x
        bbox_regression = self.bbox_regression(x)
        return class_scores, bbox_regression

# Initialize the model
num_classes = 1  # Assuming only one class (number plate)
model = ResNet18ObjectDetection(num_classes)


In [22]:
import torch.nn as nn
import torchvision.models as models

# Modify ResNet18 for object detection by adding custom regression and classification heads
class ResNet18ObjectDetection(nn.Module):
    def __init__(self, num_classes):
        super(ResNet18ObjectDetection, self).__init__()
        self.resnet18 = models.resnet18(pretrained=True)
        in_features = self.resnet18.fc.in_features

        # Classification head
        self.classification_head = nn.Linear(in_features, num_classes)

        # Bounding box regression head
        self.bbox_regression_head = nn.Linear(in_features, 4)  # 4 for (x, y, width, height)

    def forward(self, x):
        x = self.resnet18.conv1(x)
        x = self.resnet18.bn1(x)
        x = self.resnet18.relu(x)
        x = self.resnet18.maxpool(x)

        x = self.resnet18.layer1(x)
        x = self.resnet18.layer2(x)
        x = self.resnet18.layer3(x)
        x = self.resnet18.layer4(x)

        # Global average pooling
        x = self.resnet18.avgpool(x)
        x = x.view(x.size(0), -1)

        class_scores = self.classification_head(x)
        bbox_regression = self.bbox_regression_head(x)

        return class_scores, bbox_regression

# Initialize the model
num_classes = 1  # Assuming only one class (number plate)
model = ResNet18ObjectDetection(num_classes)


In [38]:
# Training loop
num_epochs = 2
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

for epoch in range(num_epochs):
    model.train()
    for images, targets, labels in train_loader:
        images, targets, labels = images.to(device), targets.to(device), labels.to(device)

        optimizer.zero_grad()
        class_scores, bbox_regression = model(images)

        # Calculate losses
        class_loss = criterion(class_scores, labels)
        bbox_loss = bbox_criterion(bbox_regression, targets)
        total_loss = class_loss + bbox_loss

        # Backpropagation and optimization
        total_loss.backward()
        optimizer.step()

    scheduler.step()
    
    

    # Validation after each epoch (optional)
    model.eval()
    val_class_loss = 0.0
    val_bbox_loss = 0.0
    total_val_samples = 0

    with torch.no_grad():
        for val_images, val_targets, val_labels in val_loader:
            val_images, val_targets, val_labels = val_images.to(device), val_targets.to(device), val_labels.to(device)
            val_class_scores, val_bbox_regression = model(val_images)

            # Calculate validation losses
            val_class_loss += criterion(val_class_scores, val_labels).item()
            val_bbox_loss += bbox_criterion(val_bbox_regression, val_targets).item()

            total_val_samples += val_labels.size(0)

    # Average validation losses
    avg_val_class_loss = val_class_loss / total_val_samples
    avg_val_bbox_loss = val_bbox_loss / total_val_samples

    print(f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {total_loss.item():.4f}, "
          f"Val Class Loss: {avg_val_class_loss:.4f}, Val BBox Loss: {avg_val_bbox_loss:.4f}")

# Training is complete
print("Training complete!")


Epoch 3/3, Train Loss: 0.0006, Val Accuracy: 1.0000, Val BBox Loss: 0.0001


In [39]:
# Initialize early stopping variables
best_val_loss = float('inf')
patience = 3  # Number of epochs to wait for improvement
early_stopping_counter = 0


# Set up loss function, optimizer, and learning rate scheduler
criterion = nn.CrossEntropyLoss()
bbox_criterion = nn.SmoothL1Loss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

# Training loop
num_epochs = 10
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

for epoch in range(num_epochs):
    model.train()
    train_class_loss = 0.0
    train_bbox_loss = 0.0

    for images, targets, labels in train_loader:
        images, targets, labels = images.to(device), targets.to(device), labels.to(device)

        optimizer.zero_grad()
        class_scores, bbox_regression = model(images)

        # Calculate losses
        class_loss = criterion(class_scores, labels)
        bbox_loss = bbox_criterion(bbox_regression, targets)
        total_loss = class_loss + bbox_loss

        # Backpropagation and optimization
        total_loss.backward()
        optimizer.step()

        train_class_loss += class_loss.item()
        train_bbox_loss += bbox_loss.item()

    avg_train_class_loss = train_class_loss / len(train_loader)
    avg_train_bbox_loss = train_bbox_loss / len(train_loader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {total_loss.item():.4f}, "
          f"Train Class Loss: {avg_train_class_loss:.4f}, Train BBox Loss: {avg_train_bbox_loss:.4f}")

    scheduler.step()

    # Validation after each epoch
    model.eval()
    val_class_loss = 0.0
    val_bbox_loss = 0.0
    total_val_samples = 0

    with torch.no_grad():
        for val_images, val_targets, val_labels in val_loader:
            val_images, val_targets, val_labels = val_images.to(device), val_targets.to(device), val_labels.to(device)
            val_class_scores, val_bbox_regression = model(val_images)

            # Calculate validation losses
            val_class_loss += criterion(val_class_scores, val_labels).item()
            val_bbox_loss += bbox_criterion(val_bbox_regression, val_targets).item()

            total_val_samples += val_labels.size(0)

    # Average validation losses
    avg_val_class_loss = val_class_loss / total_val_samples
    avg_val_bbox_loss = val_bbox_loss / total_val_samples

    print(f"Epoch {epoch + 1}/{num_epochs}, Val Class Loss: {avg_val_class_loss:.4f}, Val BBox Loss: {avg_val_bbox_loss:.4f}")

    # Check for early stopping
    val_loss = avg_val_class_loss + avg_val_bbox_loss
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        early_stopping_counter = 0
    else:
        early_stopping_counter += 1

    if early_stopping_counter >= patience:
        print("Early stopping triggered. Training stopped.")
        break

# Training is complete
print("Training complete!")


Epoch 1/10, Train Loss: 0.0170, Train Class Loss: 0.0000, Train BBox Loss: 0.0783
Epoch 1/10, Val Class Loss: 0.0000, Val BBox Loss: 0.0373
Epoch 2/10, Train Loss: 0.0206, Train Class Loss: 0.0000, Train BBox Loss: 0.0362
Epoch 2/10, Val Class Loss: 0.0000, Val BBox Loss: 0.0439
Epoch 3/10, Train Loss: 0.0033, Train Class Loss: 0.0000, Train BBox Loss: 0.0054
Epoch 3/10, Val Class Loss: 0.0000, Val BBox Loss: 0.0177
Epoch 4/10, Train Loss: 0.0035, Train Class Loss: 0.0000, Train BBox Loss: 0.0042
Epoch 4/10, Val Class Loss: 0.0000, Val BBox Loss: 0.0034
Epoch 5/10, Train Loss: 0.0019, Train Class Loss: 0.0000, Train BBox Loss: 0.0031
Epoch 5/10, Val Class Loss: 0.0000, Val BBox Loss: 0.0036
Epoch 6/10, Train Loss: 0.0025, Train Class Loss: 0.0000, Train BBox Loss: 0.0054
Epoch 6/10, Val Class Loss: 0.0000, Val BBox Loss: 0.0009
Epoch 7/10, Train Loss: 0.0073, Train Class Loss: 0.0000, Train BBox Loss: 0.0049
Epoch 7/10, Val Class Loss: 0.0000, Val BBox Loss: 0.0070
Epoch 8/10, Train Lo

In [None]:
# Training loop
for epoch in range(num_epochs):
    model.train()
    train_class_loss = 0.0
    train_bbox_loss = 0.0
    detected_regions = []  # List to store the detected regions for each image

    for images, targets, labels in train_loader:
        images, targets, labels = images.to(device), targets.to(device), labels.to(device)

        optimizer.zero_grad()
        class_scores, bbox_regression = model(images)

        # Calculate losses
        class_loss = criterion(class_scores, labels)
        bbox_loss = bbox_criterion(bbox_regression, targets)
        total_loss = class_loss + bbox_loss

        # Backpropagation and optimization
        total_loss.backward()
        optimizer.step()

        train_class_loss += class_loss.item()
        train_bbox_loss += bbox_loss.item()

        # Post-processing to get detected regions (assuming class_scores contains confidence scores)
        conf_scores, pred_boxes = torch.max(class_scores, dim=1)
        detected_images_regions = []
        for i in range(images.size(0)):
            if conf_scores[i] > confidence_threshold:  # Set a threshold for confidence score
                x, y, width, height = pred_boxes[i]  # Assuming pred_boxes contains predicted bounding boxes
                detected_images_regions.append((x.item(), y.item(), width.item(), height.item()))
        detected_regions.extend(detected_images_regions)

    avg_train_class_loss = train_class_loss / len(train_loader)
    avg_train_bbox_loss = train_bbox_loss / len(train_loader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {total_loss.item():.4f}, "
          f"Train Class Loss: {avg_train_class_loss:.4f}, Train BBox Loss: {avg_train_bbox_loss:.4f}")

    scheduler.step()


In [25]:
# # Initialize early stopping variables
# best_val_loss = float('inf')
# patience = 3  # Number of epochs to wait for improvement
# early_stopping_counter = 0


# Set up loss function, optimizer, and learning rate scheduler
criterion = nn.CrossEntropyLoss()
bbox_criterion = nn.SmoothL1Loss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
num_epochs = 10
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
confidence_threshold = 0.5
# Rest of your code...

# Training loop
for epoch in range(num_epochs):
    model.train()
    train_class_loss = 0.0
    train_bbox_loss = 0.0
    detected_regions = []  # List to store the detected regions for each image

    for images, targets, labels in train_loader:
        images, targets, labels = images.to(device), targets.to(device), labels.to(device)

        optimizer.zero_grad()
        class_scores, bbox_regression = model(images)

        # Calculate losses
        class_loss = criterion(class_scores, labels)
        bbox_loss = bbox_criterion(bbox_regression, targets)
        total_loss = class_loss + bbox_loss

        # Backpropagation and optimization
        total_loss.backward()
        optimizer.step()

        train_class_loss += class_loss.item()
        train_bbox_loss += bbox_loss.item()

        # Post-processing to get detected regions (assuming class_scores contains confidence scores)
        conf_scores, pred_boxes = torch.max(class_scores, dim=1)
        detected_images_regions = []
        for i in range(images.size(0)):
            if conf_scores[i] > confidence_threshold:  # Set a threshold for confidence score
                x, y, width, height = pred_boxes[i]  # Assuming pred_boxes contains predicted bounding boxes
                detected_images_regions.append((x.item(), y.item(), width.item(), height.item()))
        detected_regions.extend(detected_images_regions)

    avg_train_class_loss = train_class_loss / len(train_loader)
    avg_train_bbox_loss = train_bbox_loss / len(train_loader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {total_loss.item():.4f}, "
          f"Train Class Loss: {avg_train_class_loss:.4f}, Train BBox Loss: {avg_train_bbox_loss:.4f}")

    scheduler.step()

    # Validation after each epoch
    model.eval()
    val_class_loss = 0.0
    val_bbox_loss = 0.0
    total_val_samples = 0
    detected_regions_val = []  # List to store the detected regions for each validation image

    with torch.no_grad():
        for val_images, val_targets, val_labels in val_loader:
            val_images, val_targets, val_labels = val_images.to(device), val_targets.to(device), val_labels.to(device)
            val_class_scores, val_bbox_regression = model(val_images)

            # Calculate validation losses
            val_class_loss += criterion(val_class_scores, val_labels).item()
            val_bbox_loss += bbox_criterion(val_bbox_regression, val_targets).item()

            total_val_samples += val_labels.size(0)

            # Post-processing to get detected regions in validation (assuming class_scores contains confidence scores)
            conf_scores_val, pred_boxes_val = torch.max(val_class_scores, dim=1)
            detected_images_regions_val = []
            for i in range(val_images.size(0)):
                if conf_scores_val[i] > confidence_threshold:  # Set a threshold for confidence score
                    x_val, y_val, width_val, height_val = pred_boxes_val[i]  # Assuming pred_boxes contains predicted bounding boxes
                    detected_images_regions_val.append((x_val.item(), y_val.item(), width_val.item(), height_val.item()))
            detected_regions_val.extend(detected_images_regions_val)

    # Average validation losses
    avg_val_class_loss = val_class_loss / total_val_samples
    avg_val_bbox_loss = val_bbox_loss / total_val_samples

    print(f"Epoch {epoch + 1}/{num_epochs}, Val Class Loss: {avg_val_class_loss:.4f}, Val BBox Loss: {avg_val_bbox_loss:.4f}")

    # Check for early stopping
    val_loss = avg_val_class_loss + avg_val_bbox_loss
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        early_stopping_counter = 0
        patience =3

        # Save the best model
        torch.save(model.state_dict(), "best_model.pth")
    else:
        early_stopping_counter += 1

    if early_stopping_counter >= patience:
        print("Early stopping triggered. Training stopped.")
        break

# Training is complete
print("Training complete!")


TypeError: iteration over a 0-d tensor

In [None]:
# Training loop
for epoch in range(num_epochs):
    model.train()
    train_class_loss = 0.0
    train_bbox_loss = 0.0
    detected_regions = []  # List to store the detected regions for each image

    for images, targets, labels in train_loader:
        images, targets, labels = images.to(device), targets.to(device), labels.to(device)

        optimizer.zero_grad()
        class_scores, bbox_regression = model(images)

        # Calculate losses
        class_loss = criterion(class_scores, labels)
        bbox_loss = bbox_criterion(bbox_regression, targets)
        total_loss = class_loss + bbox_loss

        # Backpropagation and optimization
        total_loss.backward()
        optimizer.step()

        train_class_loss += class_loss.item()
        train_bbox_loss += bbox_loss.item()

        # Post-processing to get detected regions (assuming class_scores contains confidence scores)
        conf_scores, pred_boxes = torch.max(class_scores, dim=1)
        detected_images_regions = []
        for i in range(images.size(0)):
            # Extract the indices of elements greater than the confidence threshold
            detected_indices = torch.nonzero(conf_scores[i] > confidence_threshold).squeeze(1)
            for idx in detected_indices:
                x, y, width, height = pred_boxes[i][idx]
                detected_images_regions.append((x.item(), y.item(), width.item(), height.item()))
        detected_regions.extend(detected_images_regions)

    avg_train_class_loss = train_class_loss / len(train_loader)
    avg_train_bbox_loss = train_bbox_loss / len(train_loader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {total_loss.item():.4f}, "
          f"Train Class Loss: {avg_train_class_loss:.4f}, Train BBox Loss: {avg_train_bbox_loss:.4f}")

    scheduler.step()


In [None]:

# After the training loop, you can use the 'detected_regions' list for OCR on the training data

# Perform OCR on the detected number plate regions in the training data
train_ocr_results = ocr_on_number_plates(detected_regions, train_images)

# Display OCR results for the training data
for i, results in enumerate(train_ocr_results):
    print(f"OCR Results for Training Image {i+1}:")
    for (x, y, width, height, plate_text) in results:
        print(f"Plate Coordinates: (x={x}, y={y}), Width: {width}, Height: {height}")
        print("Plate Text:", plate_text)

In [None]:
model.eval()
test_class_loss = 0.0
test_bbox_loss = 0.0
total_test_samples = 0

with torch.no_grad():
    for images, targets, labels in test_loader:
        images, targets, labels = images.to(device), targets.to(device), labels.to(device)
        class_scores, bbox_regression = model(images)

        # Calculate test losses
        class_loss = criterion(class_scores, labels)
        bbox_loss = bbox_criterion(bbox_regression, targets)
        total_loss = class_loss + bbox_loss

        test_class_loss += class_loss.item()
        test_bbox_loss += bbox_loss.item()

        total_test_samples += labels.size(0)

# Average test losses
avg_test_class_loss = test_class_loss / total_test_samples
avg_test_bbox_loss = test_bbox_loss / total_test_samples

print(f"Test Class Loss: {avg_test_class_loss:.4f}, Test BBox Loss: {avg_test_bbox_loss:.4f}")
