In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Reshape, BatchNormalization, Dropout, Input, Lambda, Bidirectional, LSTM
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.backend import ctc_batch_cost
from sklearn.model_selection import train_test_split


#os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
#print("Available devices:", tf.config.list_physical_devices())

# Define paths
DATA_DIR = "c:\\mahidhar\\datasets"
TRAIN1_IMAGES_DIR = os.path.join(DATA_DIR, "C:\\Users\\viswa\\Downloads\\Licplatesdetection_train\\license_plates_detection_train")
TRAIN1_LABELS = os.path.join(DATA_DIR, "C:\\Users\\viswa\\Downloads\\Licplatesdetection_train.csv")
TRAIN2_IMAGES_DIR = os.path.join(DATA_DIR, "C:\\Users\\viswa\\Downloads\\Licplatesrecognition_train\\license_plates_recognition_train")
TRAIN2_LABELS = os.path.join(DATA_DIR, "C:\\Users\\viswa\\Downloads\\Licplatesrecognition_train.csv")
TEST_IMAGES_DIR = os.path.join(DATA_DIR, "C:\\Users\\viswa\\Downloads\\test\\test\\test")


# Resize dimensions
DETECTION_IMG_SIZE = (320, 640)  # Height x Width for detection
OCR_IMG_SIZE = (32, 128)  # Height x Width for OCR

# Character set for OCR
CHAR_LIST = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
CHAR_TO_INDEX = {char: i for i, char in enumerate(CHAR_LIST)}
NUM_CLASSES = len(CHAR_LIST) + 1  # +1 for CTC blank token

# Load Training Set 1 (Bounding Boxes)
def load_training_set1():
    if not os.path.exists(TRAIN1_LABELS):
        print(f"Error: Labels file {TRAIN1_LABELS} not found.")
        return None, None
    try:
        df = pd.read_csv(TRAIN1_LABELS)
        images, labels = [], []
        for idx, row in df.iterrows():
            img_path = os.path.join(TRAIN1_IMAGES_DIR, row['img_id'])
            if os.path.exists(img_path):
                img = cv2.imread(img_path)
                if img is None:
                    print(f"Warning: Could not load image {img_path}")
                    continue
                img = cv2.resize(img, DETECTION_IMG_SIZE[::-1])  # Width x Height
                images.append(img_to_array(img) / 255.0)
                labels.append([row['ymin'], row['xmin'], row['ymax'], row['xmax']])
            else:
                print(f"Warning: Missing file {img_path}")
        if not images:
            print("Error: No valid images loaded for detection training.")
            return None, None
        print(f"Loaded {len(images)} images for detection training.")
        return np.array(images), np.array(labels)
    except Exception as e:
        print(f"Error loading Training Set 1: {e}")
        return None, None

# Load Training Set 2 (Character Recognition)
def load_training_set2():
    if not os.path.exists(TRAIN2_LABELS):
        print(f"Error: Labels file {TRAIN2_LABELS} not found.")
        return None, None
    try:
        df = pd.read_csv(TRAIN2_LABELS)
        images, labels = [], []
        for idx, row in df.iterrows():
            img_path = os.path.join(TRAIN2_IMAGES_DIR, row['img_id'])
            if os.path.exists(img_path):
                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                if img is None:
                    print(f"Warning: Could not load image {img_path}")
                    continue
                img = cv2.resize(img, OCR_IMG_SIZE[::-1])  # Width x Height
                images.append(np.expand_dims(img, axis=-1) / 255.0)
                labels.append(row['text'])
            else:
                print(f"Warning: Missing file {img_path}")
        if not images:
            print("Error: No valid images loaded for OCR training.")
            return None, None
        print(f"Loaded {len(images)} images for OCR training.")
        return np.array(images), labels
    except Exception as e:
        print(f"Error loading Training Set 2: {e}")
        return None, None

# Encode labels for OCR
def encode_labels(labels):
    max_len = max(len(text) for text in labels)
    encoded_labels = [[CHAR_TO_INDEX.get(c, -1) for c in text.upper()] for text in labels]
    for i, label in enumerate(encoded_labels):
        if -1 in label:
            print(f"Warning: Unknown characters in label '{labels[i]}' mapped to blank token.")
            encoded_labels[i] = [c if c != -1 else NUM_CLASSES - 1 for c in label]
    return pad_sequences(encoded_labels, maxlen=max_len, padding='post', value=NUM_CLASSES - 1)

# Load datasets
X_train1, y_train1 = load_training_set1()
X_train2, y_train2 = load_training_set2()

if X_train1 is None or X_train2 is None:
    print("Dataset loading failed. Exiting.")
    exit(1)

# Train License Plate Detection Model
detection_model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(DETECTION_IMG_SIZE[0], DETECTION_IMG_SIZE[1], 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    Dropout(0.25),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(4)  # Bounding box coordinates
])
detection_model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])
detection_model.fit(X_train1, y_train1, epochs=20, batch_size=32, validation_split=0.1, verbose=1)
detection_model.save("detection_model.h5")
print("Detection model training complete and saved as 'detection_model.h5'.")

# Train OCR Model with CTC Loss
max_len = max(len(text) for text in y_train2)
y_train2_encoded = encode_labels(y_train2)

# Define OCR model
input_layer = Input(shape=(OCR_IMG_SIZE[0], OCR_IMG_SIZE[1], 1), name='input')  # (32, 128, 1)
x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_layer)  # (32, 128, 32)
x = MaxPooling2D(pool_size=(2, 2))(x)  # (16, 64, 32)
x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)  # (16, 64, 64)
x = BatchNormalization()(x)  # (16, 64, 64)
x = MaxPooling2D(pool_size=(2, 2))(x)  # (8, 32, 64)

# Calculate feature dimension explicitly
feature_dim = (OCR_IMG_SIZE[0] // 4) * 64  # 8 * 64 = 512
x = Reshape(target_shape=(OCR_IMG_SIZE[1] // 4, feature_dim))(x)  # (32, 512)
x = Bidirectional(LSTM(128, return_sequences=True))(x)  # (32, 256)
x = Dropout(0.25)(x)  # (32, 256)
output_layer = Dense(NUM_CLASSES, activation='softmax', name='output')(x)  # (32, 37)

# CTC loss model
labels = Input(shape=(max_len,), name='labels')
input_length = Input(shape=(1,), name='input_length')
label_length = Input(shape=(1,), name='label_length')
ctc_loss = Lambda(lambda args: ctc_batch_cost(args[1], args[0], args[2], args[3]), name='ctc')([output_layer, labels, input_length, label_length])
ocr_training_model = Model(inputs=[input_layer, labels, input_length, label_length], outputs=ctc_loss)
ocr_training_model.compile(optimizer=Adam(learning_rate=0.001), loss=lambda y_true, y_pred: y_pred)

# Prepare training data
input_length_train = np.ones((len(X_train2), 1)) * (OCR_IMG_SIZE[1] // 4)  # 32 timesteps
label_length_train = np.array([[min(len(text), max_len)] for text in y_train2])

# Train OCR model
detection_model.fit(
    X_train1, y_train1, 
    epochs=20, 
    batch_size=8, 
    validation_split=0.1, 
    verbose=1, 
    steps_per_epoch=len(X_train1) // 32  # Ensure steps are defined
)


# Save models
ocr_training_model.save("ocr_training_model.h5")
ocr_prediction_model = Model(inputs=input_layer, outputs=output_layer)
ocr_prediction_model.save("ocr_prediction_model.h5")
print("OCR model training complete and saved as 'ocr_training_model.h5' and 'ocr_prediction_model.h5'.")


# Define the folder path where models will be saved
MODEL_SAVE_PATH = "saved_models"
os.makedirs(MODEL_SAVE_PATH, exist_ok=True)  # Create folder if it doesn't exist

# Save Detection Model
detection_model.save(os.path.join(MODEL_SAVE_PATH, "detection_model.h5"))

# Save OCR Training Model
ocr_training_model.save(os.path.join(MODEL_SAVE_PATH, "ocr_training_model.h5"))

# Save OCR Prediction Model
ocr_prediction_model.save(os.path.join(MODEL_SAVE_PATH, "ocr_prediction_model.h5"))

print(f"Models saved in '{MODEL_SAVE_PATH}' folder.")

import tensorflow as tf
import numpy as np

def decode_prediction(pred):
    """
    Decode CTC output from the OCR prediction model.
    Input:
        - pred: Dense tensor (batch_size, timesteps, num_classes) with softmax probabilities.
    Returns:
        - Decoded text as a string.
    """
    input_len = np.ones(pred.shape[0]) * pred.shape[1]  # timesteps = 32
    
    # Use CTC greedy decoder on the dense prediction tensor
    decoded, _ = tf.keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)

    # Ensure decoded is a list and get the first element
    if isinstance(decoded, (tuple, list)):
        decoded = decoded[0]  # Extract tensor if it's in a tuple/list

    # Convert tensor to numpy array (handling both SparseTensor and EagerTensor cases)
    if isinstance(decoded, tf.SparseTensor):
        decoded_dense = tf.sparse.to_dense(decoded, default_value=-1).numpy()
    else:
        decoded_dense = decoded.numpy()

    # Convert indices to characters
    result = []
    for seq in decoded_dense:
        chars = [CHAR_LIST[i] for i in seq if i != -1 and 0 <= i < len(CHAR_LIST)]
        result.append(''.join(chars))
    
    return result[0] if result else ""



print("Model training and setup complete!")

Loaded 900 images for detection training.
Loaded 900 images for OCR training.
Epoch 1/20


In [3]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import load_model

# Define paths
DATA_DIR = "E:\\mahidhar\\datasets"
TEST_IMAGES_DIR = os.path.join(DATA_DIR, "test")
MODEL_SAVE_PATH = "saved_models"

# Character set for OCR
CHAR_LIST = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"

# Define the custom loss function
def custom_loss(y_true, y_pred):
    return tf.reduce_mean(tf.abs(y_true - y_pred))

# Load trained models
detection_model = load_model(
    os.path.join(MODEL_SAVE_PATH, "detection_model.h5"), 
    custom_objects={"custom_loss": custom_loss}
)

ocr_model = load_model(
    os.path.join(MODEL_SAVE_PATH, "ocr_prediction_model.h5"), 
    custom_objects={"custom_loss": custom_loss}
)

# Function to decode OCR predictions
def decode_prediction(pred):
    input_len = np.ones(pred.shape[0]) * pred.shape[1]  
    decoded, _ = tf.keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)
    if isinstance(decoded, (tuple, list)):
        decoded = decoded[0]  # Extract tensor if it's in a tuple/list
    decoded_dense = tf.sparse.to_dense(decoded, default_value=-1).numpy()
    result = []
    for seq in decoded_dense:
        chars = [CHAR_LIST[i] for i in seq if i != -1 and 0 <= i < len(CHAR_LIST)]
        result.append(''.join(chars))
    return result[0] if result else ""

# Function to detect the license plate
def predict_license_plate(image):
    image_resized = cv2.resize(image, (640, 320)) / 255.0  # Fix input shape
    image_resized = np.expand_dims(image_resized, axis=0)  # Add batch dimension
    
    bbox = detection_model.predict(image_resized)[0]  # Get bounding box prediction

    ymin, xmin, ymax, xmax = bbox

    ymin = int(ymin * 320)
    xmin = int(xmin * 640)
    ymax = int(ymax * 320)
    xmax = int(xmax * 640)

    print(f"Predicted BBox: ymin={ymin}, xmin={xmin}, ymax={ymax}, xmax={xmax}")

    return ymin, xmin, ymax, xmax

# Function to recognize text from cropped license plate
def recognize_text(plate_image):
    plate_image = cv2.cvtColor(plate_image, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
    plate_image = cv2.resize(plate_image, (128, 32)) / 255.0  # Resize & normalize
    plate_image = np.expand_dims(plate_image, axis=(0, -1))  # Add batch & channel dimension

    predicted_text = ocr_model.predict(plate_image)
    return decode_prediction(predicted_text)

# Load test images
test_filenames = os.listdir(TEST_IMAGES_DIR)
X_test = []
for filename in test_filenames:
    img_path = os.path.join(TEST_IMAGES_DIR, filename)
    img = cv2.imread(img_path)
    if img is not None:
        X_test.append((filename, img))
    else:
        print(f"Warning: Could not read image {filename}")

# Process test images and save predictions
predictions = []
for filename, img in X_test:
    ymin, xmin, ymax, xmax = predict_license_plate(img)

    ymin, ymax = max(0, ymin), min(img.shape[0], ymax)
    xmin, xmax = max(0, xmin), min(img.shape[1], xmax)

    cropped_plate = img[ymin:ymax, xmin:xmax]  

    img_with_bbox = img.copy()
    cv2.rectangle(img_with_bbox, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)  
    cv2.imshow(f"Detected Plate - {filename}", img_with_bbox)  

    if cropped_plate.size == 0 or ymin == ymax or xmin == xmax:
        text_prediction = "ERROR"
    else:
        text_prediction = recognize_text(cropped_plate)

        cv2.putText(cropped_plate, text_prediction, (5, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
        cv2.imshow(f"OCR Output - {filename}", cropped_plate)  

    predictions.append((filename, text_prediction))

    cv2.waitKey(0)
    cv2.destroyAllWindows()

# Save predictions to CSV
submission_df = pd.DataFrame(predictions, columns=["filename", "predicted_text"])
submission_df.to_csv("test_predictions.csv", index=False)

print("Test predictions saved in 'test_predictions.csv'")


OSError: Unable to open file (unable to open file: name = 'saved_models\detection_model.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)