In [1]:
from PIL import Image
import pytesseract
from geopy.geocoders import Nominatim

# Path to tesseract executable (Windows example, adjust for your OS)
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

# Initialize Nominatim geolocator for offline geocoding
geolocator = Nominatim(user_agent="offline_geocoder")

# Sample address database for fallback lookup
address_database = {
    "123 Elm Street, Springfield, IL": (39.7817, -89.6501),
    "789 Oak Avenue, Chicago, IL": (41.8781, -87.6298),
}

# Function to extract text using OCR
def extract_text_from_image(image_path):
    try:
        # Open the image
        image = Image.open(image_path)
        # Perform OCR to extract text
        text = pytesseract.image_to_string(image)
        print("[INFO] Extracted Text:")
        print(text)
        return text
    except Exception as e:
        print(f"[ERROR] Unable to process image: {e}")
        return None

# Function to find geolocation for an address
def get_geolocation(address):
    try:
        # Try geocoding with Nominatim
        location = geolocator.geocode(address)
        if location:
            print(f"[INFO] Address Found: Latitude={location.latitude}, Longitude={location.longitude}")
            return location.latitude, location.longitude
        else:
            # Fallback to local database
            print("[WARNING] Address not found in geocoder. Checking local database...")
            coordinates = address_database.get(address)
            if coordinates:
                print(f"[INFO] Address Found in Database: Latitude={coordinates[0]}, Longitude={coordinates[1]}")
                return coordinates
            else:
                print("[ERROR] Address not found in any source.")
                return None
    except Exception as e:
        print(f"[ERROR] Geocoding failed: {e}")
        return None

# Main Function
def main():
    # Specify the image path
    image_path = "F:\Vit bhopal\year 3\EPICS project\Capture.PNG"  # Replace with your image file path

    # Step 1: Extract text from the image
    extracted_text = extract_text_from_image(image_path)

    if extracted_text:
        # Step 2: Extract address (example: regex or specific parsing logic for your use case)
        # In this example, assume the extracted address is a full line from the OCR result
        address = "123 Elm Street, Springfield, IL"  # Replace this with parsing logic from the OCR text
        print(f"[INFO] Parsed Address: {address}")

        # Step 3: Get geolocation for the parsed address
        coordinates = get_geolocation(address)

        if coordinates:
            print(f"[SUCCESS] Coordinates for the address are: Latitude={coordinates[0]}, Longitude={coordinates[1]}")
        else:
            print("[FAILURE] Unable to find coordinates for the address.")

if __name__ == "__main__":
    main()


[INFO] Extracted Text:
DEED OF LAND TRANSFER

This Deed of Land Transfer is made on the Sth of October, 2024, between:

GRANTOR: John Smith

‘Address: 123 Elm Street, Springfield, IL, 62704

GRANTEE: Jane Doe

‘Address: 456 Oak Avenue, Springfield, IL, 62705

LEGAL DESCRIPTION OF PROPERTY:

‘The property situated at 789 Maple Road, Springfield, IL, 62703, legally described as Lot 5, Block 2,
Maple Subdivision.

The Grantor hereby conveys the property to the Grantee for the sum of $250,000.

‘Signed: John Smith

‘Witnessed: Mary Johnson

Date: 05-October-2024

[INFO] Parsed Address: 123 Elm Street, Springfield, IL
[INFO] Address Found in Database: Latitude=39.7817, Longitude=-89.6501
[SUCCESS] Coordinates for the address are: Latitude=39.7817, Longitude=-89.6501
