In [31]:
import pandas as pd
import numpy as np
import cv2
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
import tensorflow as tf

In [32]:
with zipfile.ZipFile('Licplatesdetection_train.zip', 'r') as zip_ref:
    zip_ref.extractall('Licplatesdetection_train')

In [33]:
with zipfile.ZipFile('Licplatesrecognition_train.zip', 'r') as zip_ref:
    zip_ref.extractall('Licplatesrecognition_train')

In [35]:
detection_annotations = pd.read_csv('Licplatesdetection_train.csv')
recognition_annotations = pd.read_csv('Licplatesrecognition_train.csv')

In [36]:
detection_annotations

Unnamed: 0,img_id,ymin,xmin,ymax,xmax
0,1.jpg,276,94,326,169
1,10.jpg,311,395,344,444
2,100.jpg,406,263,450,434
3,101.jpg,283,363,315,494
4,102.jpg,139,42,280,222
...,...,...,...,...,...
895,95.jpg,426,34,508,140
896,96.jpg,356,378,457,548
897,97.jpg,229,149,283,217
898,98.jpg,272,252,300,383


In [37]:
recognition_annotations

Unnamed: 0,img_id,text
0,0.jpg,117T3989
1,1.jpg,128T8086
2,10.jpg,94T3458
3,100.jpg,133T6719
4,101.jpg,68T5979
...,...,...
895,95.jpg,39T8707
896,96.jpg,92T589
897,97.jpg,180T706
898,98.jpg,87T7369


In [38]:
print("Detection annotations columns:", detection_annotations.columns)

Detection annotations columns: Index(['img_id', 'ymin', 'xmin', 'ymax', 'xmax'], dtype='object')


In [39]:
print("Recognition annotations columns:", recognition_annotations.columns)

Recognition annotations columns: Index(['img_id', 'text'], dtype='object')


In [40]:
detection_annotations.columns = detection_annotations.columns.str.strip()

In [41]:
recognition_annotations.columns = recognition_annotations.columns.str.strip()

In [42]:
detection_image_dir = 'Licplatesdetection_train'
recognition_image_dir = 'Licplatesrecognition_train'

In [43]:
filename_column_detection = 'img_id'  
filename_column_recognition = 'img_id'

In [44]:
detection_image_paths = [os.path.join(detection_image_dir, img_id) for img_id in detection_annotations[filename_column_detection]]
recognition_image_paths = [os.path.join(recognition_image_dir, img_id) for img_id in recognition_annotations[filename_column_recognition]]

In [45]:
def load_images(image_paths):
    images = []
    for img_path in image_paths:
        img = cv2.imread(img_path)  # Read the image file
        if img is not None:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert BGR (OpenCV default) to RGB
            images.append(img)  # Append the image to the list
    return images

In [46]:
detection_images = load_images(detection_image_paths)


In [47]:
recognition_images = load_images(recognition_image_paths)

In [48]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [49]:
# Preprocess function to normalize and resize images
def preprocess_image(image, target_size):
    image = cv2.resize(image, target_size)
    image = image / 255.0  # Normalize to [0, 1]
    return image


In [50]:
# Apply preprocessing to detection and recognition images
detection_images = [preprocess_image(img, (224, 224)) for img in detection_images]
recognition_images = [preprocess_image(img, (224, 224)) for img in recognition_images]

In [None]:
#  Prepare Data for Training

In [52]:
from sklearn.model_selection import train_test_split

In [None]:
# Split detection data
detection_train_images, detection_test_images, detection_train_labels, detection_test_labels = train_test_split(
    detection_images, detection_annotations[['xmin', 'ymin', 'xmax', 'ymax']], test_size=0.2, random_state=42)

In [None]:
# Split recognition data
recognition_train_images, recognition_test_images, recognition_train_labels, recognition_test_labels = train_test_split(
    recognition_images, recognition_annotations['license_plate_text'], test_size=0.2, random_state=42)

In [None]:
# Build a simple CNN for detection
detection_model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(4)  # Output: 4 coordinates (xmin, ymin, xmax, ymax)])

detection_model.compile(optimizer='adam', loss='mean_squared_error')
detection_model.summary()

# Train the detection model
detection_model.fit(np.array(detection_train_images), np.array(detection_train_labels), epochs=10, validation_split=0.2)

In [None]:
def extract_license_plate(image, bbox):
    xmin, ymin, xmax, ymax = bbox
    return image[ymin:ymax, xmin:xmax]

# Extract license plates from test images
detected_plates = [extract_license_plate(img, bbox) for img, bbox in zip(detection_test_images, detection_model.predict(np.array(detection_test_images)))]


In [None]:
import pytesseract

# Recognize text using Tesseract OCR
recognized_texts = [pytesseract.image_to_string(cv2.cvtColor(plate, cv2.COLOR_RGB2GRAY)) for plate in detected_plates]

# Clean the recognized texts
recognized_texts = [text.strip() for text in recognized_texts]


In [None]:
from sklearn.metrics import accuracy_score

# Evaluate detection model (IoU or similar metric)
# Evaluate recognition model
recognition_accuracy = accuracy_score(recognition_test_labels, recognized_texts)
print(f'Recognition Model Accuracy: {recognition_accuracy}')


In [None]:
submission = pd.DataFrame({'img_id': recognition_annotations['img_id'], 'license_plate_text': recognized_texts})

submission.to_csv(SampleSubmission.csv', index=False)
