In [1]:
import os
import torch
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.models as models
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, classification_report
from PIL import Image
from tqdm import tqdm 

In [2]:
# Data Preparation
data_dir = "./data/"
train_dir = os.path.join(data_dir, "train")
test_dir = os.path.join(data_dir, "val")

# Define transformations for data augmentation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create training dataset
train_dataset = datasets.ImageFolder(train_dir, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Create testing dataset
test_dataset = datasets.ImageFolder(test_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [3]:
# Load a pre-trained ResNet-18 model
pretrained_resnet = models.resnet18(pretrained=True)

# Modify the model for license plate recognition
num_classes = 36  # 36 classes (0-9, A-Z)
pretrained_resnet.fc = nn.Sequential(
    nn.Linear(pretrained_resnet.fc.in_features, 512),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(512, num_classes)
)



In [4]:
# Initialize the model
model = pretrained_resnet

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Training
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0  # Initialize the running loss
    for batch_idx, (data, target) in enumerate(tqdm(train_loader)):  # Wrap train_loader with tqdm
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Calculate the average loss for the epoch
    average_loss = running_loss / len(train_loader)

    # Print the average loss and time taken for the epoch
    print(f"Epoch [{epoch+1}/{num_epochs}] - Loss: {average_loss:.4f}")

In [None]:
# Evaluation
model.eval()
test_predictions = []
test_labels = []

with torch.no_grad():
    for data, target in tqdm(test_loader):  # Wrap test_loader with tqdm
        output = model(data)
        _, predicted = output.max(1)
        test_predictions.extend(predicted.cpu().numpy())
        test_labels.extend(target.cpu().numpy())

In [None]:
# Calculate accuracy
accuracy = accuracy_score(test_labels, test_predictions)

# Classification report
class_names = train_dataset.classes
report = classification_report(test_labels, test_predictions, target_names=class_names)

print(f"Accuracy: {accuracy}")
print("Classification Report:\n", report)

# To predict

In [6]:
# Load the model's state_dict
loaded_model = models.resnet18(pretrained=True)  # Create an instance of the ResNet-18 model
loaded_model.fc = nn.Sequential(
    nn.Linear(loaded_model.fc.in_features, 512),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(512, num_classes)
)  # Modify the architecture to match the one used during training
loaded_model.load_state_dict(torch.load('your_model_state_dict.pth'))

<All keys matched successfully>

In [7]:
"""
bbox = letters_bboxes[5]
aux = roi[bbox[1]:bbox[1]+bbox[3], bbox[0]:bbox[0]+bbox[2]]
cv2.imshow('Image with Bounding Boxes', aux)
cv2.waitKey(0)
cv2.destroyAllWindows()

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
cv2_rgb_image = cv2.cvtColor(aux, cv2.COLOR_BGR2RGB)  # Convert to RGB format
pil_image = Image.fromarray(cv2_rgb_image)  # Convert to PIL image
preprocessed_image = transform(pil_image).unsqueeze(0)  # Preprocess and add batch dimension

_, uwu = cv2.threshold(cv2_rgb_image, 120, 255, cv2.THRESH_BINARY)
cv2.imshow('Image with Bounding Boxes', uwu)
cv2.waitKey(0)
cv2.destroyAllWindows()

with torch.no_grad():
    loaded_model.eval()
    output = loaded_model(preprocessed_image)

# Map the predicted class index to the corresponding letter (e.g., 0->'0', 1->'1', ..., 35->'Z')
class_names = [str(i) if i < 10 else chr(i + 55) for i in range(36)]
_, predicted = output.max(1)
predicted_letter = class_names[predicted.item()]
"""

"\nbbox = letters_bboxes[5]\naux = roi[bbox[1]:bbox[1]+bbox[3], bbox[0]:bbox[0]+bbox[2]]\ncv2.imshow('Image with Bounding Boxes', aux)\ncv2.waitKey(0)\ncv2.destroyAllWindows()\n"