In [None]:
!pip install google-cloud-vision



In [None]:
#AIzaSyDoKXO6yuioOZp9HWZ7xv2iYEJAMG-tc98

In [None]:
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/content/qyyim-436618-1805f9b72f40.json"

In [None]:
# Import necessary libraries
from google.cloud import vision
import io
import re

# Function to detect text using Google Vision API
def detect_text(image_path):
    """Detects text in the file at the given image path."""
    # Initialize the Vision API client
    client = vision.ImageAnnotatorClient()

    # Read the image file into memory
    with io.open(image_path, 'rb') as image_file:
        content = image_file.read()

    # Create an Image object for the Vision API
    image = vision.Image(content=content)

    # Perform text detection on the image
    response = client.text_detection(image=image)
    texts = response.text_annotations

    # Check for errors in the response
    if response.error.message:
        raise Exception(f'{response.error.message}')

    # Extract the full text from the image
    full_text = texts[0].description if texts else ''
    return full_text

# Function to create label indices and label set
def create_label_indices(lines):
    label_indices = {}
    label_set = set()
    for idx, line in enumerate(lines):
        line_clean = line.strip(':').strip()
        # Map labels to their indices
        labels = [
            'المالك', 'هوية المالك', 'رقم الهوية', 'رقم الهيكل', 'رقم اللوحة',
            'ماركة المركبة', 'الماركة', 'الوزن', 'نوع التسجيل', 'طراز المركبة',
            'الموديل', 'حمولة المركبة', 'اللون', 'سنة الصنع', 'اللون الأساسي'
        ]
        for label in labels:
            if label in line_clean:
                label_indices[label] = idx
                label_set.add(label)
    return label_indices, label_set

# Extraction functions using label indices

def extract_owner_name(lines, label_indices):
    label = 'المالك'
    exclusion_list = ['القامة', 'سرباع', 'وزارة الداخلية', 'رخصة سير', 'التعامل مع الهوية']
    if label in label_indices:
        idx = label_indices[label]
        name = lines[idx].split(':')[-1].strip()
        if not name and idx + 1 < len(lines):
            name = lines[idx + 1].strip()
        if name:
            return f"المالك: {name}"
    else:
        # If label not found, try to find the owner's name in other lines
        for line in lines:
            line_clean = line.strip()
            if (re.match(r'^[\u0621-\u064A\s]{3,}$', line_clean) and
                line_clean not in exclusion_list and
                'وزارة' not in line_clean and
                'رخصة' not in line_clean and
                'التعامل' not in line_clean and
                len(line_clean.split()) > 2):
                return f"المالك: {line_clean}"
    return "المالك: غير متوفر"

def extract_owner_id(lines, label_indices):
    labels = ['هوية المالك', 'رقم الهوية', 'رقم السجل']
    for label in labels:
        if label in label_indices:
            idx = label_indices[label]
            id_line = lines[idx]
            owner_id = re.search(r'\b\d{10}\b', id_line)
            if not owner_id and idx + 1 < len(lines):
                id_line_next = lines[idx + 1]
                owner_id = re.search(r'\b\d{10}\b', id_line_next)
            if owner_id:
                return f"هوية المالك: {owner_id.group()}"
    # Fallback: search all lines for a 10-digit number
    for line in lines:
        owner_id = re.search(r'\b\d{10}\b', line)
        if owner_id:
            return f"هوية المالك: {owner_id.group()}"
    return "هوية المالك: غير متوفر"

def extract_chassis_number(lines, label_indices):
    labels = ['رقم الهيكل']
    for label in labels:
        if label in label_indices:
            idx = label_indices[label]
            chassis_line = lines[idx]
            match = re.search(r'رقم الهيكل[:\s]*(\S+)', chassis_line)
            if match:
                chassis_number = match.group(1)
                return f"رقم الهيكل: {chassis_number}"
            elif idx + 1 < len(lines):
                chassis_number = lines[idx + 1].strip()
                return f"رقم الهيكل: {chassis_number}"
        else:
            # Fallback: look for a line with 17-character alphanumeric string
            for line in lines:
                if re.match(r'^[A-HJ-NPR-Z0-9]{17}$', line):
                    return f"رقم الهيكل: {line.strip()}"
    return "رقم الهيكل: غير متوفر"

def extract_plate_number(lines, label_indices, label_set):
    labels = ['رقم اللوحة']
    for label in labels:
        if label in label_indices:
            idx = label_indices[label]
            plate_line = lines[idx]
            match = re.search(r'رقم اللوحة[:\s]*(.*?)(?:رقم|$)', plate_line)
            if match:
                plate_info = match.group(1).strip()
                plate_info = re.split(r'\s*رقم', plate_info)[0].strip()
                return f"رقم اللوحة: {plate_info}"
            else:
                plate_numbers = []
                next_idx = idx + 1
                while next_idx < len(lines):
                    next_line = lines[next_idx]
                    if next_line.strip(':').strip() in label_set:
                        break
                    if next_line:
                        plate_numbers.append(next_line)
                    next_idx += 1
                if plate_numbers:
                    return f"رقم اللوحة: " + ' '.join(plate_numbers)
    return "رقم اللوحة: غير متوفر"

def extract_vehicle_brand(lines, label_indices):
    labels = ['ماركة المركبة', 'الماركة']
    for label in labels:
        if label in label_indices:
            idx = label_indices[label]
            brand_line = lines[idx]
            match = re.search(r'(?:ماركة المركبة|الماركة)[:\s]*(.*)', brand_line)
            if match:
                brand = match.group(1).strip()
                if not brand and idx + 1 < len(lines):
                    brand = lines[idx + 1].strip()
                return f"ماركة المركبة: {brand}"
    known_brands = ['فورد', 'تويوتا', 'نيسان', 'هوندا', 'شيفروليه', 'مرسيدس']
    for line in lines:
        for brand in known_brands:
            if brand in line:
                return f"ماركة المركبة: {brand}"
    return "ماركة المركبة: غير متوفر"

def extract_vehicle_weight(lines, label_indices):
    labels = ['الوزن']
    for label in labels:
        if label in label_indices:
            idx = label_indices[label]
            weight_line = ' '.join(lines[idx:idx+2])
            weight_match = re.search(r'الوزن[:\s]*(\d+)', weight_line)
            if weight_match:
                return f"وزن المركبة: {weight_match.group(1)}"
    return "وزن المركبة: غير متوفر"

def extract_registration_type(lines, label_indices):
    label = 'نوع التسجيل'
    if label in label_indices:
        idx = label_indices[label]
        reg_line = lines[idx]
        match = re.search(r'نوع التسجيل[:\s]*(.*)', reg_line)
        if match:
            reg_type = match.group(1).strip()
            return f"نوع التسجيل: {reg_type}"
        elif idx + 1 < len(lines):
            reg_type = lines[idx + 1].strip()
            return f"نوع التسجيل: {reg_type}"
    return "نوع التسجيل: غير متوفر"

def extract_vehicle_color(lines, label_indices):
    labels = ['اللون', 'اللون الأساسي']
    for label in labels:
        if label in label_indices:
            idx = label_indices[label]
            color_line = lines[idx]
            match = re.search(r'(?:اللون|اللون الأساسي)[:\s]*(.*)', color_line)
            if match:
                color = match.group(1).strip()
                color = color.replace('الأساسي:', '').strip()
                return f"اللون: {color}"
            elif idx + 1 < len(lines):
                color = lines[idx + 1].strip()
                return f"اللون: {color}"
    return "اللون: غير متوفر"

def extract_year_of_manufacture(lines, label_indices):
    labels = ['سنة الصنع']
    for label in labels:
        if label in label_indices:
            idx = label_indices[label]
            year_line = ' '.join(lines[idx:idx+2])
            year_match = re.search(r'سنة الصنع[:\s]*(\d{4})', year_line)
            if year_match:
                return f"سنة الصنع: {year_match.group(1)}"
            else:
                year_match = re.search(r'\b(19|20)\d{2}\b', year_line)
                if year_match:
                    return f"سنة الصنع: {year_match.group(0)}"
    return "سنة الصنع: غير متوفر"

# Main function to process the text and call each field extraction function
def format_vehicle_registration_text(detected_text):
    # Split the text into lines and remove empty lines
    lines = [line.strip() for line in detected_text.split('\n') if line.strip()]

    # Create label indices and label set
    label_indices, label_set = create_label_indices(lines)
    # Call each function and collect the results
    output_lines = [
        extract_owner_name(lines, label_indices),
        extract_owner_id(lines, label_indices),
        extract_chassis_number(lines, label_indices),
        extract_plate_number(lines, label_indices, label_set),
        extract_vehicle_brand(lines, label_indices),
        extract_vehicle_weight(lines, label_indices),
        extract_registration_type(lines, label_indices),
        extract_vehicle_color(lines, label_indices),
        extract_year_of_manufacture(lines, label_indices)
    ]

    # Join all lines into the final formatted output
    return "\n".join(output_lines)

# Path to the image file (update with your uploaded image path)
image_path = "/content/VecRegBlack.jpg"  # Update this path with the actual image

# Step 1: Detect and extract the text from the image
detected_text = detect_text(image_path)

# Step 2: Format the extracted text into the desired structure
formatted_text = format_vehicle_registration_text(detected_text)

# Step 3: Print the formatted text
print(formatted_text)


المالك: سلطان بن محمد بن حامد الاحمدي الحربي
هوية المالك: ١٠٧٣١٥٥٦٠٦
رقم الهيكل: FAHP2D9XH141198
رقم اللوحة: د رص ۵۰۷۲
ماركة المركبة: فورد
وزن المركبة: ١٧٤٠
نوع التسجيل: خاص
اللون: اسود
سنة الصنع: ۲۰۱۷
