In [None]:
!pip install transformers datasets torch torchvision pandas opencv-python matplotlib


In [None]:
! pip install flash_attn timm einops


In [None]:
! pip install pytesseract

In [None]:
!sudo apt-get install tesseract-ocr -y
!sudo apt-get install tesseract-ocr-ara  # Install Arabic language pack


In [None]:
import pandas as pd
import json
from datasets import DatasetDict, load_dataset
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from transformers import  AutoModelForCausalLM, AutoTokenizer, AutoImageProcessor, AutoProcessor
import torch
import os
from PIL import Image, ImageDraw, ImageFont
import torch
import numpy as np
import pytesseract

In [None]:



# Load detection annotations CSV into DataFrame
detection_annotations_df = pd.read_csv('/content/Licplatesdetection_train.csv')

# Load recognition annotations CSV into DataFrame
recognition_annotations_df = pd.read_csv('/content/Licplatesrecognition_train.csv')


print(detection_annotations_df.head())
print(recognition_annotations_df.head())

In [None]:

def convert_to_coco_format(df, category_id):
    images = []
    annotations = []
    for idx, row in df.iterrows():
        image_id = row['img_id']
        if image_id not in [img['id'] for img in images]:
            images.append({
                "id": image_id,
                "file_name": f"/content/Licplatesdetection_train/{image_id}",
                "height": 1024,
                "width": 1024,
            })
        annotations.append({
            "id": idx,
            "image_id": image_id,
            "category_id": category_id,
            "bbox": [row['xmin'], row['ymin'], row['xmax'] - row['xmin'], row['ymax'] - row['ymin']],
            "area": (row['xmax'] - row['xmin']) * (row['ymax'] - row['ymin']),
            "iscrowd": 0,
        })
    return images, annotations

# Assuming category_id for detection is 1
detection_images, detection_annotations = convert_to_coco_format(detection_annotations_df, category_id=1)

# Assuming category_id for recognition is 2
recognition_images = []
recognition_annotations = []
for idx, row in recognition_annotations_df.iterrows():
    image_id = row['img_id']
    recognition_images.append({
        "id": image_id,
        "file_name": f"/content/Licplatesrecognition_train/{image_id}",
        "height": 1024,
        "width": 1024,
    })
    recognition_annotations.append({
        "image_id": image_id,
        "plate_text": row['text'],
    })

# Save to JSON files
detection_coco_data = {"images": detection_images, "annotations": detection_annotations, "categories": [{"id": 1, "name": "detection"}]}
with open('detection_annotations_coco.json', 'w') as f:
    json.dump(detection_coco_data, f)

recognition_coco_data = {"images": recognition_images, "annotations": recognition_annotations, "categories": [{"id": 2, "name": "recognition"}]}
with open('recognition_annotations_coco.json', 'w') as f:
    json.dump(recognition_coco_data, f)


In [None]:

# Load detection dataset from JSON file
with open('detection_annotations_coco.json', 'r') as f:
    detection_dataset = json.load(f)

# Load recognition dataset from JSON file
with open('recognition_annotations_coco.json', 'r') as f:
    recognition_dataset = json.load(f)

# Continue with your processing using detection_dataset and recognition_dataset


In [None]:


# Load detection dataset
detection_dataset = load_dataset('json', data_files={'train': '/content/detection_annotations_coco.json'}, field='annotations')

# Load recognition dataset
recognition_dataset = load_dataset('json', data_files={'train': '/content/recognition_annotations_coco.json'}, field='annotations')

# Combine both datasets into a DatasetDict
dataset_dict = DatasetDict({'detection': detection_dataset['train'], 'recognition': recognition_dataset['train']})


/content/Licplatesdetection_train

In [None]:
# Function to preprocess the recognition dataset
def preprocess_recognition(image_path, processor):
    # Load the image from file
    image = Image.open(image_path)

    # Ensure image is in RGB mode
    if image.mode != "RGB":
        image = image.convert("RGB")

    # Resize image to match expected input size
    image = image.resize((1024, 1024))

    # Convert image to torch.Tensor and normalize
    image_tensor = torch.tensor(np.array(image)).permute(2, 0, 1).float() / 255.0

    # Prepare inputs for AutoImageProcessor
    encoded_inputs = processor(images=image_tensor, do_rescale=False, return_tensors="pt")

    return encoded_inputs





In [None]:
# Load the model, tokenizer, and image processor
model_name = "microsoft/Florence-2-large"
processor = AutoImageProcessor.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True).eval()
processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)

# Move model to the GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)




In [None]:

def predict_license_plates(encoded_inputs):
    # Ensure pixel values are on the correct device
    pixel_values = encoded_inputs['pixel_values'].to(device)

    # Forward pass through the model
    with torch.no_grad():
        outputs = model(pixel_values=pixel_values)

    # Assuming the model's output logits are for token predictions, decode them
    predicted_ids = torch.argmax(outputs.logits, dim=-1)
    predicted_texts = processor.tokenizer.batch_decode(predicted_ids, skip_special_tokens=True)

    return predicted_texts


In [None]:
# Define the prediction function
def run_example(image, task_prompt, model, processor, device):
    inputs = processor(text=task_prompt, images=image, return_tensors="pt")
    inputs = {k: v.to(device) for k, v in inputs.items()}

    generated_ids = model.generate(
        input_ids=inputs["input_ids"],
        pixel_values=inputs["pixel_values"],
        max_new_tokens=1024,
        early_stopping=False,
        do_sample=False,
        num_beams=3,
    )

    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
    parsed_answer = processor.post_process_generation(
        generated_text,
        task=task_prompt,
        image_size=(image.width, image.height)
    )

    # Filter to include only license plates
    if '<OD>' in parsed_answer:
        od_results = parsed_answer['<OD>']
        filtered_bboxes = []
        filtered_labels = []
        for bbox, label in zip(od_results['bboxes'], od_results['labels']):
            if label.lower() == 'vehicle registration plate':
                filtered_bboxes.append(bbox)
                filtered_labels.append(label)
        parsed_answer['<OD>'] = {'bboxes': filtered_bboxes, 'labels': filtered_labels}

    return parsed_answer

In [None]:
# Function to plot bounding boxes on an image
def plot_bbox(image, data):
    # Create a figure and axes
    fig, ax = plt.subplots()

    # Display the image
    ax.imshow(image)

    # Plot each bounding box
    for bbox, label in zip(data['<OD>']['bboxes'], data['<OD>']['labels']):
        # Unpack the bounding box coordinates
        x1, y1, x2, y2 = bbox
        # Create a Rectangle patch
        rect = patches.Rectangle((x1, y1), x2-x1, y2-y1, linewidth=1, edgecolor='r', facecolor='none')
        # Add the rectangle to the Axes
        ax.add_patch(rect)
        # Annotate the label
        plt.text(x1, y1, label, color='white', fontsize=8, bbox=dict(facecolor='red', alpha=0.5))

    # Remove the axis ticks and labels
    ax.axis('off')

    # Show the plot
    plt.show()

In [None]:
def extract_text_from_image(image, bbox):
    x1, y1, x2, y2 = map(int, bbox)
    cropped_image = image.crop((x1, y1, x2, y2))
    text = pytesseract.image_to_string(cropped_image, config='--psm 13')
    return text.strip()


In [None]:
def display_image_with_bboxes(image, bboxes, texts):
    draw = ImageDraw.Draw(image)
    font_path = "/path/to/arabic/font.ttf"  # Replace with the path to an Arabic font file

    try:
        font = ImageFont.truetype(font_path, size=14)  # Adjust size as needed
    except IOError:
        font = ImageFont.load_default()

    for bbox, text in zip(bboxes, texts):
        x1, y1, x2, y2 = map(int, bbox)
        draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
        try:
            draw.text((x1, y1), text, fill="red", font=font)
        except UnicodeEncodeError as e:
            print(f"UnicodeEncodeError: {e} | Text: {text}")

    plt.imshow(image)
    plt.axis('off')
    plt.show()

In [None]:
# Define the evaluation function
def evaluate_model_on_test_data(model, processor, test_data_dir, task_prompt, device):
    results = []
    for file_name in os.listdir(test_data_dir):
        if file_name.endswith(('.jpg', '.png')):
            file_path = os.path.join(test_data_dir, file_name)
            print(f"Evaluating file: {file_path}")
            image = Image.open(file_path).convert("RGB")
            # Get predictions
            result = run_example(image, task_prompt, model, processor, device)

            # Debugging: print the raw result
            print(f"Parsed Answer: {result}")
            print(f"Raw Result: {result}")

            # Extract text from license plates
            license_plate_texts = []
            for bbox in result['<OD>']['bboxes']:
                text = extract_text_from_image(image, bbox)
                license_plate_texts.append(text)

            # Display the image with bounding boxes and detected text
            display_image_with_bboxes(image, result['<OD>']['bboxes'], license_plate_texts)

            results.append((file_name, result, license_plate_texts))

    return results

In [None]:

# Example mapping of labels to columns
class_to_index = {
    "class_0": 0,
    "class_1": 1,
    "class_2": 2,
    "class_3": 3,
    "class_4": 4,
    "class_5": 5,
    "class_6": 6,
    "class_7": 7,
    "class_8": 8,
    "class_9": 9,
    "vehicle registration plate": 9  # Assuming "vehicle registration plate" is class_9
}

In [None]:
# Path to your test data
test_data_dir = "/content/test"
task_prompt = '<OD>'

# Evaluate model on test data
results = evaluate_model_on_test_data(model, processor, test_data_dir, task_prompt, device)

# Create the submission DataFrame
submission_df = pd.DataFrame(columns=["id"] + list(range(10)) + ["license_plate_text"])
submission_df.set_index("id", inplace=True)

In [None]:
# Process results and populate the DataFrame
for img_id, result, texts in results:
    row = np.zeros(10, dtype=int)
    for label in result["<OD>"]["labels"]:
        if label in class_to_index:
            row[class_to_index[label]] = 1
    license_plate_text = ", ".join(texts) if texts else ""
    submission_df.loc[img_id] = list(row) + [license_plate_text]

# Reset index to include the 'id' column
submission_df.reset_index(inplace=True)

# Save the DataFrame to a CSV file
submission_file_path = "/content/submission.csv"
submission_df.to_csv(submission_file_path, index=False)

print(f"Submission file saved to {submission_file_path}")

In [None]:
def calculate_accuracy(model, processor, test_data_dir, task_prompt, device):
    correct_predictions = 0
    total_predictions = 0

    for file_name in os.listdir(test_data_dir):
        if file_name.endswith(('.jpg', '.png')):
            file_path = os.path.join(test_data_dir, file_name)
            print(f"Evaluating file: {file_path}")
            image = Image.open(file_path).convert("RGB")
            # Get predictions
            result = run_example(image, task_prompt, model, processor, device)

            # Perform your accuracy evaluation logic here based on model predictions

            # Dummy example: Assuming model predicts 'vehicle registration plate' correctly
            if '<OD>' in result and any(label.lower() == 'vehicle registration plate' for label in result['<OD>']['labels']):
                correct_predictions += 1
            total_predictions += 1

    if total_predictions > 0:
        accuracy = correct_predictions / total_predictions
        print(f"Accuracy: {accuracy:.2%} ({correct_predictions}/{total_predictions} correct predictions)")

    return accuracy if total_predictions > 0 else None

# Example usage:
test_data_dir = "/content/test/"

# Calculate accuracy
accuracy = calculate_accuracy(model, processor, test_data_dir, task_prompt, device)

# Print or use accuracy as needed
if accuracy is not None:
    print(f"Overall Accuracy: {accuracy:.2%}")
else:
    print("No predictions made or no test data found.")


In [None]:
# Save model state_dict and configuration
torch.save(model.state_dict(), '/content/models/model.pth')


# Save processor
processor.save_pretrained('/content/models/')
