ENG-573 Capstone - GPS Localization via Street Signs

Prasanna Chandrasekar

In [None]:
!pip install google-cloud-vision
!pip install folium
!pip install requests

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import torch
from PIL import Image

import numpy as np
import cv2
from torchvision import transforms
import matplotlib.pyplot as plt
import os
import math

from google.colab.patches import cv2_imshow

from google.cloud import vision
import io
from io import BytesIO
import requests


import folium
from IPython.display import display

import os

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = '/content/drive/MyDrive/SUMMER_CAPSTONE/mygpsalt-05c5371e4966.json'
api_key = "AIzaSyDR3zzei1mk3Sr7FROVAarL6yqja7EcHSA"

In [None]:

"""
Load & Run the custom YOLOv5 model.
"""
model = torch.hub.load('ultralytics/yolov5', 'custom', path='/content/drive/MyDrive/SUMMER_CAPSTONE/best_v5.pt')
model.eval()

In [None]:
def getStreet_google_OCR(Img, x1, y1, x2, y2):
    """
    Gets the street name from a given image using Google's OCR API.
    Args:
      Img: A PIL image.
      x1, y1, x2, y2: The coordinates of the bounding box around the street name.
    Returns:
      A list of strings containing the street name and its elements.
    """

    croppedImg = Img.crop((x1, y1, x2, y2))

    buffered = BytesIO()
    croppedImg.save(buffered, format="PNG")
    content = buffered.getvalue()

    client = vision.ImageAnnotatorClient()
    image = vision.Image(content=content)

    im = vision.Image(image)
    response = client.text_detection(image=im)
    texts = response.text_annotations

    return [text.description for text in texts]


In [None]:
def get_lat_lon(address):
    """
    Gets latitude and longitude from a given address using the Google Geocoding API.
    Args:
      address: A string containing the address as street, zip code, and country.
      api_key: The Google Maps API key.
    Returns:
      A dictionary containing latitude and longitude if successful, None otherwise.
    """

    # Base URL for Geocoding API
    url = "https://maps.googleapis.com/maps/api/geocode/json?"

    # Address parameters
    params = {
      "address": address,
      "key": api_key
    }
    print(address)
    # Send request and handle response
    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.json()
        # Check if there are results
        if data['results']:
            # Get the first result (assuming it's the most relevant)
            location = data['results'][0]['geometry']['location']
            print("Predicted Location: ", location['lat'], location['lng'])
            return {
              "lat": location['lat'],
              "lon": location['lng']
          }
        else:
            print("No results found for this address.")
    else:
        print(f"Error: {response.status_code}")
    return None

In [None]:
# Fixed values - these are based on the default parameters of the iPhone 14 wide angle (main camera) sensor.
FoV_horizontal = math.degrees(2 * math.atan(7.6/(2*26)))
FoV_vertical = math.degrees(2 * math.atan(9.5/(2*26)))

# Camera matrix K
K = np.array([
    [1565.7702703272157, 0.0, 964.2389356041999],
    [0.0, 1562.3561924508267, 537.4247202074102],
    [0.0, 0.0, 1.0],
])


def getPlateDistance(plateDims):
    """
    Function to get the distance of a car license plate in the scene from the camera for use later.
    Args:
      PlateDims - A list with the x1,y1,x2,y2 of a license plate.
    Returns:
      distCar - The distance from the vehicle to the license plate.
    """

    realPlateWidth = 0.3  # The width of a license Plate in meters
    realPlateHeight = 0.15  # the height of a license plate in meters

    distCar = K[0][0] * realPlateWidth / abs(plateDims[2] - plateDims[0])
    print("This is the distance from license plate -> ", distCar)

    return distCar


def getSignDistance(plateDims, signDims):
    """
    A Function to get the distance of a street sign in a scene from the camera - this will be converted to add to the lat/lon values.
    Args:
      plateDims - A list with the x1,y1,x2,y2 of a license plate.
      signDims - A list with the x1,y1,x2,y2 of a sign.
    Returns:
      The distance from the vehicle to the sign as latitude and longitude but in meters.

    """
    # The horizontal distance between a license plate and a sign in pixels.
    if signDims[0] >= plateDims[0]:
        h_dist = signDims[0] - plateDims[2]
    elif signDims[0] <= plateDims[0]:
        h_dist = signDims[2] - plateDims[0]

    # The vertical distance between a license plate and a sign in pixels.
    v_dist = abs(signDims[3] - plateDims[1])

    theta_H = (h_dist/3840)*FoV_horizontal
    theta_V = (v_dist/2160)*FoV_vertical

    distCar = getPlateDistance(plateDims)

    distCarSign_lat = distCar * math.tan(theta_H)
    distCarSign_lon = distCar * math.tan(theta_V)
    print("Horizontal and vertical changes -> ", distCarSign_lat, distCarSign_lon)

    return [distCarSign_lat, distCarSign_lon]


def meters_to_lat_lon(meters, latitude):
    """
    A function to convert the distances in meters into decimal latitude/longitude coordinates
    Args:
      meters - A list of the distances for latitude and longitude in meters.
      latitude - the starting latitude for the geolocated point.
    Returns:
      A list of the distances in latitude and longitude as decimal coordinates.
    """
    # Convert meters to degrees of latitude
    lat_degree_in_meters = 111000  # ~ Approximate meters per degree of latitude
    delta_lat = meters[0] / lat_degree_in_meters

    # Convert meters to degrees of longitude
    lon_degree_in_meters = 111000 * math.cos(math.radians(latitude))
    delta_lon = meters[1] / lon_degree_in_meters
    print('Latitude and longitude for ', meters, ' meters is ', delta_lat, delta_lon)
    return [delta_lat, delta_lon]


def getNewLatLon(diffLatLon, latLon):
    """
    Function to get the final latitude and longitude values by adding the estimations to the geolocated coordinates.
    Args:
      diffLatLon - the difference in distances in latitude and longitude in decimal coordinates.
      latLon - the geolocated coordinates.
    Returns:
      A list of the new FINAL latitude and longitude values.
    """
    newLat = latLon['lat'] + diffLatLon[0]
    newLon = latLon['lon'] + diffLatLon[1]

    print("This is the new lat and lon: ", [newLat, newLon])

    return [newLat, newLon]


In [None]:
# Change it as needed to represent the area of the source data.
zipCode = 68108

def videoPath(file):
    """
    A function to process video inputs every 60th frame for applying the image model.
    Args:
      file - the video file used as input - it is assumed that the file is of resolution 3840 x 2160 at 30 fps.
    Returns:
      A list of the new latitude and longitude values for all the frame points processed.
    """
    cap = cv2.VideoCapture(file)
    frame_count = 0
    frame_interval = 60
    tempList = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame_count += 1

        if frame_count % frame_interval == 0:
            # Convert the frame to RGB (OpenCV uses BGR by default)
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            # Convert the frame to a PIL image
            PIL_img = Image.fromarray(frame_rgb)
            tempList.append(processImageModel(PIL_img))
    return tempList



def processImageModel(img):
    """
    A central function to process an input image/frame into the YOLO model and get the final coordinates for the vehicle from several other functions.
    Args:
      img - the input frame/image.
    Returns:
      A list of the new latitude and longitude values for the frame.
    """
    outputs = model(img)

    streetNames = []

    boxes = outputs.xyxy[0].cpu().numpy()  # Bounding boxes in xyxy format
    scores = outputs.xyxy[0][:, 4].cpu().numpy()  # Confidence scores
    labels = outputs.xyxy[0][:, 5].cpu().numpy()  # Class labels

    print("Boxes:", boxes)
    print("Scores:", scores)
    print("Labels:", labels)

    # Filter predictions by confidence score
    score_threshold = 0.75
    indices = np.where(scores > score_threshold)[0]
    filtered_boxes = boxes[indices]
    filtered_scores = scores[indices]
    filtered_labels = labels[indices]

    img_cv = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)

    plateDims = []
    signDims = []
    # Draw bounding boxes
    for box, score, label in zip(filtered_boxes, filtered_scores, filtered_labels):
        if label == 1:
            x1, y1, x2, y2 = map(int, box[:4])
            signDims.append([x1, y1, x2, y2])
            streetNames.append(getStreet_google_OCR(img, x1, y1, x2, y2))

            cv2.rectangle(img_cv, (x1, y1), (x2, y2), (25, 51, 255), 4)
            cv2.putText(img_cv, f'{label}: {score:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (25, 51, 255), 2)
        if label == 0:
            x1, y1, x2, y2 = map(int, box[:4])
            plateDims.append([x1, y1, x2, y2])

            cv2.rectangle(img_cv, (x1, y1), (x2, y2), (49, 245, 59), 4)
            cv2.putText(img_cv, f'{label}: {score:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (49, 245, 59), 2)

    # Show the resulting image:
    # cv2.imwrite('output.jpg', img_cv)
    # cv2_imshow(img_cv)
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()

    # Check if there are at least two signs and at least one license plate.
    if len(plateDims) > 0 and len(signDims) > 1:
        signDist = getSignDistance(plateDims[0], signDims[0])
        print(signDist)

    # Remove newlines from street names - the first element of each street list contains all the words on a street sign.
    for s in streetNames:
        s[0] = s[0].replace("\n", " ")
    print(streetNames)

    address = streetNames[0][0] + ' and ' + streetNames[1][0] + ', ' + zipCode + ', USA'
    position = get_lat_lon(address)
    diffLatLon = meters_to_lat_lon(signDist, position['lat'])
    newLatLon = getNewLatLon(diffLatLon, position)
    return newLatLon


def FileParse(folder_dir):
    coordinatesList = []
    for files in os.listdir(folder_dir):
        print(files)
        if files.endswith('.MOV'):
            coordinateList += videoPath(folder_dir + '/' + files)
        elif files.endswith('.png'):
            img = Image.open(folder_dir + '/' + files)
            coordinatesList.append(processImageModel(img))
    return coordinatesList

estimatedPosition = FileParse('/content/drive/MyDrive/SUMMER_CAPSTONE/TestCaseImgz/final_tests')


In [None]:
"""
Folium Map System - Enables the viewing of the different coordinates on a map.
"""

location1 = estimatedPosition[0]
# location2 = [41.2354329, -95.99383390000001]
# location3 = [41.24898, -96.02409]


zoom_level = 15

# Create a map object
my_map = folium.Map(location=location3, zoom_start=zoom_level)

# Add a marker
folium.Marker(
    location1,
    popup='estimation',
    icon=folium.Icon(icon='star', color='green')
).add_to(my_map)

# # Add a marker
# folium.Marker(
#     location2,
#     popup='Geolocation',
#     icon=folium.Icon(icon='star', color='red')
# ).add_to(my_map)

# # Add a marker
# folium.Marker(
#     location3,
#     popup='My Pos',
#     icon=folium.Icon(icon='star')
# ).add_to(my_map)

# Display the map
display(my_map)
