In [1]:
!mkdir ~/.kaggle
!touch ~/.kaggle/kaggle.json

username = 'Andrew_Reusche92'
api_key = '63cd28af853827307d2b6546dd833488'

api_token = {"username":username,"key":api_key}

import json

with open('/root/.kaggle/kaggle.json', 'w') as file:
    json.dump(api_token, file)

!chmod 600 ~/.kaggle/kaggle.json



mkdir: cannot create directory ‘/root/.kaggle’: File exists


In [2]:
!kaggle datasets download -d ronakgohil/license-plate-dataset


Dataset URL: https://www.kaggle.com/datasets/ronakgohil/license-plate-dataset
License(s): CC0-1.0
license-plate-dataset.zip: Skipping, found more recently modified local copy (use --force to force download)


In [3]:
!unzip -q license-plate-dataset.zip -d data

replace data/archive/classes.txt? [y]es, [n]o, [A]ll, [N]one, [r]ename: A


In [4]:
#verify gpu is working
!nvidia-smi

Sun Apr 13 00:49:30 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   41C    P8             11W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [5]:
#import some libraries that may be useful

#standard libraries
import glob
import json
import os
import random

#data processing and visualization libraries
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from PIL import Image

#machine learning libraries
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

#PyTorch libraries
import torch
from torch.utils.data import Dataset
from torch.utils.data import random_split
from torchvision import transforms

#TensorFlow/ keras libraries
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, Dense, Dropout, Flatten, MaxPooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [6]:
#define data paths
data_directory= 'data/archive'
train_image_directory= os.path.join(data_directory, 'images/train')
val_images_directory= os.path.join(data_directory, 'images/val')
train_label_directory= os.path.join(data_directory, 'labels/train')
val_labels_directory= os.path.join(data_directory, 'labels/val')

In [7]:
#covert YOLO format annotations to bounding box coordinates
def convert_yolo_to_coor(label_path, image_width, image_height):
  with open(label_path, 'r') as f:
    lines= f.readlines()

    boxes= []

    for line in lines:
      cls, x_center, y_center, width, height= map(float, line.strip().split())

      x_center *= image_width
      y_center *= image_height
      width *= image_width
      height *= image_height

      x_min= x_center - width / 2
      y_min= y_center - height / 2
      x_max= x_center + width / 2
      y_max= y_center + height / 2

      boxes.append([x_min, y_min, x_max, y_max])

    return boxes

In [8]:
#pair images with object locations
def load_dataset(image_directory, label_directory):
  images= []
  labels= []
  image_paths= sorted(glob.glob(os.path.join(image_directory, '*.jpg')))

  for image_path in image_paths:
    label_filename= os.path.splitext(os.path.basename(image_path))[0] + '.txt'
    label_path= os.path.join(label_directory, label_filename)

    image= cv2.imread(image_path)
    image= cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_height, image_width= image.shape[:2]

    boxes= convert_yolo_to_coor(label_path, image_width, image_height)

    box= boxes[0]
    images.append(image)
    labels.append(box)

  return images, labels

In [9]:
#load in the training and validation data
train_images, train_labels= load_dataset(train_image_directory, train_label_directory)
print(f"Loaded {len(train_images)} training images and {len(train_labels)} training labels")

val_images, val_labels= load_dataset(val_images_directory, val_labels_directory)
print(f"Loaded {len(val_images)} validation images and {len(val_labels)} validation labels")

Loaded 1526 training images and 1526 training labels
Loaded 169 validation images and 169 validation labels


In [13]:
#create a train, val, test split
X_train, X_test, y_train, y_test= train_test_split(train_images,
                                                   train_labels,
                                                   test_size= 0.1,
                                                   random_state= 24)
X_val, y_val= val_images, val_labels

print(f"Training set {len(X_train)} images")
print(f"Validation set {len(X_val)} images")
print(f"Test set {len(X_test)} images")

Training set 1373 images
Validation set 169 images
Test set 153 images


# Preprocess Data

In [14]:
#preprocessing parameters
IMAGE_SIZE= (128, 128)
BATCH_SIZE= 32

In [22]:
#preprocessing function
def preprocess_image_and_label(image, box, image_size=IMAGE_SIZE):
  original_height, original_width= image.shape[:2]

  image= cv2.resize(image, image_size)
  image= image / 255.0

  x_min, y_min, x_max, y_max= box
  x_min= x_min * image_size[0] / original_width
  x_max= x_max * image_size[0] / original_width
  y_min= y_min * image_size[1] / original_height
  y_max= y_max * image_size[1] / original_height

  return image, np.array([x_min, y_min, x_max, y_max])

In [23]:
#function to apply preprocessing
def preprocess_dataset(images, labels):
  preprocessed_images= []
  preprocessed_labels= []

  for img, box in zip(images, labels):
    p_img, p_box= preprocess_image_and_label(img, box)
    preprocessed_images.append(p_img)
    preprocessed_labels.append(p_box)

  return np.array(preprocessed_images), np.array(preprocessed_labels)

In [24]:
#preprocess all data subsets
X_train, y_train= preprocess_dataset(X_train, y_train)
X_val, y_val= preprocess_dataset(X_val, y_val)
X_test, y_test= preprocess_dataset(X_test, y_test)