## Image geolocalisation using StreetClip

This example demonstrates how to use StreetClip to determine the geographical location of a place based on its name or description.

### Packages

In [None]:
import os
import re

import geopandas as gpd
import pandas as pd
from folium import Map
from PIL import Image
from tqdm import tqdm
from transformers import CLIPModel, CLIPProcessor


### Functions

In [None]:
def dms_to_decimal(degrees, minutes, seconds, direction):
    decimal = degrees + minutes / 60 + seconds / 3600
    if direction in ["S", "W"]:
        decimal *= -1
    return decimal


def parse_dms(dms_list):
    # Join list into one big string
    dms_string = "\n".join(dms_list)

    pattern = r"(\d+)Â°(\d+)'(\d+(?:\.\d+)?)\" ([NSEW])"
    matches = re.findall(pattern, dms_string)

    latitudes = []
    longitudes = []

    for i in range(0, len(matches), 2):  # Each pair: lat, lon
        lat_deg, lat_min, lat_sec, lat_dir = matches[i]
        lon_deg, lon_min, lon_sec, lon_dir = matches[i + 1]

        lat = dms_to_decimal(int(lat_deg), int(lat_min), float(lat_sec), lat_dir)
        lon = dms_to_decimal(int(lon_deg), int(lon_min), float(lon_sec), lon_dir)

        latitudes.append(lat)
        longitudes.append(lon)

    return latitudes, longitudes

### Settings

In [None]:
# Get processor and model
model = CLIPModel.from_pretrained("geolocal/StreetCLIP")
processor = CLIPProcessor.from_pretrained("geolocal/StreetCLIP")

In [None]:
# File paths
dir_base = os.path.abspath(os.path.join(os.getcwd(), "..", ".."))
file_path_locations = os.path.join(dir_base, "data", "geolocalisation", "coordinates.xlsx")
file_path_all_locations = os.path.join(dir_base, "data", "geolocalisation", "geometries", "simplemaps_worldcities_basicv1.901", "worldcities.xlsx")
dir_path_images = os.path.join(dir_base, "data", "geolocalisation", "images")

# Read locations
df_locations = pd.read_excel(file_path_locations)

# Get latitudes and longitudes from DMS coordinates
latitudes, longitudes = parse_dms(df_locations["coordinates"].tolist())
df_locations["latitude"] = latitudes
df_locations["longitude"] = longitudes

# Convert to GeoDataFrame
gdf_locations = gpd.GeoDataFrame(df_locations, geometry=gpd.points_from_xy(df_locations.longitude, df_locations.latitude), crs="EPSG:4326")

# Read all locations
df_all_locations = pd.read_excel(file_path_all_locations)
df_all_locations = df_all_locations[["id", "city", "country", "lat", "lng"]].rename(columns={"id": "ID", "lat": "latitude", "lng": "longitude"})
gdf_all_locations = gpd.GeoDataFrame(
    df_all_locations, geometry=gpd.points_from_xy(df_all_locations.longitude, df_all_locations.latitude), crs="EPSG:4326"
)

In [None]:
# Display locations
df_locations.head()

In [None]:
# Display all locations
df_all_locations.head()

### Geolocalisation

In [None]:
# Wrapper around the model
def model_wrapper(processor, model, choices, image):
    """
    Apply the model to the image and choices, returning the most probable label and its probability.
    """
    # Prepare model inputs
    inputs = processor(text=choices, images=image, return_tensors="pt", padding=True)

    # Get model outputs
    outputs = model(**inputs)
    logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
    probs = logits_per_image.softmax(dim=1)  # we can take the softmax to get the label probabilities

    # Sort outputs
    sorted_indices = probs[0].argsort(descending=True)
    sorted_labels = [choices[i] for i in sorted_indices]
    sorted_probs = probs[0][sorted_indices].tolist()

    # Return most probable label and its probability
    return sorted_labels[0], sorted_probs[0]


def geolocate_image(processor, model, gdf_all_locations, image):
    """
    Geolocate an image using the model and a GeoDataFrame of locations.
    """
    # Get the most probable country from all locations
    country, probabilty = model_wrapper(processor, model, gdf_all_locations["country"].unique().tolist(), image)

    # Filter locations by country
    gdf_country_locations = gdf_all_locations[gdf_all_locations["country"] == country]

    # Get the most probable city in the country
    city, probability = model_wrapper(processor, model, gdf_country_locations["city"].unique().tolist(), image)

    # Filter locations by city
    gdf_city_location = gdf_country_locations[gdf_country_locations["city"] == city]

    # Return
    return gdf_city_location


gdf_responses_ls = []
for _, row in tqdm(df_locations.iterrows(), total=len(df_locations)):
    # Get image file path
    file_path_image = os.path.join(dir_path_images, f"{row['ID']:03d}.jpg")

    # Read image
    image = Image.open(file_path_image)

    # Geolocate image
    gdf_response = geolocate_image(processor, model, gdf_all_locations, image)

    # Append results
    gdf_responses_ls.append(gdf_response)

gdf_responses = gpd.GeoDataFrame(pd.concat(gdf_responses_ls, ignore_index=True), crs="EPSG:4326")

### Explore locations

In [None]:
# Explore locations
m = Map()
gdf_locations.explore(m=m, name="Locations", cmap="tab20", column="ID", marker_kwds={"radius": 5}, style_kwds={"fillOpacity": 1}, legend=False)
gdf_responses.explore(m=m, name="Responses", cmap="tab20", column="ID", marker_kwds={"radius": 5}, style_kwds={"fillOpacity": 0})
m
