Load CSV Files

In [4]:
import pandas as pd
import os
import cv2
from torch.utils.data import Dataset, DataLoader

# Paths to the CSV files
detection_train_csv_path = "D:\\Projects\\soulpage proj\\Licplatesdetection_train.csv"
recognition_train_csv_path = "D:\\Projects\\soulpage proj\\Licplatesrecognition_train.csv"

# Load the CSV files
annotations_train1 = pd.read_csv(detection_train_csv_path)
annotations_train2 = pd.read_csv(recognition_train_csv_path)

# Display first few annotations to understand the data structure
print("Annotations Train1 (License Plate Detection):")
print(annotations_train1.head())
print("\nAnnotations Train2 (License Plate Recognition):")
print(annotations_train2.head())


Annotations Train1 (License Plate Detection):
    img_id  ymin  xmin  ymax  xmax
0    1.jpg   276    94   326   169
1   10.jpg   311   395   344   444
2  100.jpg   406   263   450   434
3  101.jpg   283   363   315   494
4  102.jpg   139    42   280   222

Annotations Train2 (License Plate Recognition):
    img_id      text
0    0.jpg  117T3989
1    1.jpg  128T8086
2   10.jpg   94T3458
3  100.jpg  133T6719
4  101.jpg   68T5979


License Plate Detection Dataset

In [11]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import cv2

# Assuming the columns in annotations_train1 are ['filename', 'ymin', 'xmin', 'ymax', 'xmax']
annotations_train1['bbox'] = annotations_train1.apply(
    lambda row: [row['xmin'], row['ymin'], row['xmax'] - row['xmin'], row['ymax'] - row['ymin']],
    axis=1
)

class LicensePlateDataset(Dataset):
    def __init__(self, img_dir, annotations, transform=None):
        self.img_dir = img_dir
        self.annotations = annotations
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.annotations.iloc[idx]['filename'])
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0
        bbox = self.annotations.iloc[idx]['bbox']
        bbox = np.array(bbox, dtype=np.float32)

        if self.transform:
            image = self.transform(image)

        return image, bbox

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((224, 224))
])

train1_images_path = 'D:\\Projects\\soulpage proj\\train1_images'  # Update with the actual image directory path
train_dataset = LicensePlateDataset(train1_images_path, annotations_train1, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)


  image = cv2.imread("D:\siva resume_coe\recent\29.jpg")


License Plate Recognition Dataset

In [12]:
class CharacterRecognitionDataset(Dataset):
    def __init__(self, img_dir, annotations, transform=None):
        self.img_dir = img_dir
        self.annotations = annotations
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.annotations.iloc[idx]['filename'])
        image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        image = cv2.resize(image, (224, 224))
        image = np.expand_dims(image, axis=2) / 255.0
        label = self.annotations.iloc[idx]['text']
        
        if self.transform:
            image = self.transform(image)

        return image, label

transform_cr = transforms.Compose([
    transforms.ToTensor()
])

train2_images_path = 'D:\\Projects\\soulpage proj\\train2_images'  # Update with the actual image directory path
train_dataset_cr = CharacterRecognitionDataset(train2_images_path, annotations_train2, transform=transform_cr)
train_loader_cr = DataLoader(train_dataset_cr, batch_size=32, shuffle=True)



 Model Training

In [None]:
from transformers import DetrForObjectDetection, DetrImageProcessor
from transformers import Trainer, TrainingArguments

# Load the model and processor
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    per_device_train_batch_size=2,
    num_train_epochs=10,
    save_total_limit=2,
)

# Custom collate function
def collate_fn(batch):
    pixel_values = [item[0] for item in batch]
    pixel_values = torch.stack(pixel_values)

    targets = [{"boxes": torch.tensor([item[1]])} for item in batch]

    return {"pixel_values": pixel_values, "labels": targets}

# Trainer setup
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=collate_fn,
    train_dataset=train_dataset,
)

# Train the model
trainer.train()


License Plate Recognition Model

In [None]:
from transformers import ViTForImageClassification, ViTImageProcessor

# Load the model and processor
model_cr = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224")
processor_cr = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224")

# Define training arguments
training_args_cr = TrainingArguments(
    output_dir="./results_cr",
    evaluation_strategy="epoch",
    per_device_train_batch_size=32,
    num_train_epochs=10,
    save_total_limit=2,
)

# Custom collate function for character recognition
def collate_fn_cr(batch):
    images, labels = zip(*batch)
    pixel_values = torch.stack(images)
    labels = list(labels)

    return {"pixel_values": pixel_values, "labels": labels}

# Trainer setup for character recognition
trainer_cr = Trainer(
    model=model_cr,
    args=training_args_cr,
    data_collator=collate_fn_cr,
    train_dataset=train_dataset_cr,
)

# Train the model
trainer_cr.train()
