## Geolocalisation using streetclip

### Packages

In [None]:
# Load model directly
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
import requests
import pandas as pd
import geopandas as gpd

### Settings

In [None]:
# Get processor and model
model = CLIPModel.from_pretrained("geolocal/StreetCLIP")
processor = CLIPProcessor.from_pretrained("geolocal/StreetCLIP")


In [None]:
# Get countries and cities
file_path_cities = r'c:\Users\white_rn\Documents\GitHub\image_registration\data\simplemaps_worldcities_basicv1.901\worldcities.csv'
df_cities = pd.read_csv(file_path_cities)
gdf_cities = gpd.GeoDataFrame(df_cities, geometry=gpd.points_from_xy(df_cities.lng, df_cities.lat), crs="EPSG:4326")
countries = gdf_cities["country"].unique().tolist()
cities = gdf_cities["city"].unique().tolist()
print(f"Total number of unique countries: {len(countries)}")
print(f"Total number of unique cities:    {len(cities)}")

In [None]:
# Get image
image_idx = 1
urls = [
    "https://huggingface.co/geolocal/StreetCLIP/resolve/main/sanfrancisco.jpeg",
    "https://www.discoverholland.com/product-images/948e5a43-41f5-4d1d-ae2a-4e79ecaed08a.jpg",
    "https://media.indebuurt.nl/delft/2022/02/16141631/NieuweGracht2021.jpg",
    "https://media.indebuurt.nl/delft/2022/02/16124419/stationsplein2020googlestreetview.jpg",
    "https://media.indebuurt.nl/delft/2022/02/16141643/coenderstraat2021-1.jpg",
    "https://st.depositphotos.com/2461183/3549/i/950/depositphotos_35491923-stock-photo-view-of-the-streets-in.jpg",
    "https://thumbs.dreamstime.com/b/valencia-city-spain-street-view-beautiful-luxurious-building-palm-trees-sunny-day-102790750.jpg",
    "https://thumbs.dreamstime.com/b/street-view-historical-center-valencia-spain-november-city-84349150.jpg",
]
image = Image.open(requests.get(urls[image_idx], stream=True).raw)

### Geolocalisation

In [None]:
# Wrapper around the model
def model_wrapper(processor, model, choices, image):
    """
    Apply the model to the image and choices, returning sorted labels and probabilities.
    """
    # Prepare model inputs
    inputs = processor(text=choices, images=image, return_tensors="pt", padding=True)

    # Get model outputs
    outputs = model(**inputs)
    logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
    probs = logits_per_image.softmax(dim=1)  # we can take the softmax to get the label probabilities

    # Sort outputs
    sorted_indices = probs[0].argsort(descending=True)
    sorted_labels = [choices[i] for i in sorted_indices]
    sorted_probs = probs[0][sorted_indices].tolist()

    # Return sorted labels and probabilities
    return sorted_labels, sorted_probs

# Get countries from dataframe
countries = gdf_cities["country"].unique().tolist()
print(f"Number of unique countries: {len(countries)}")

# Get model predictions for countries
countries, probs = model_wrapper(processor, model, countries, image)
print(f"Most probable country: {countries[0]} ({probs[0]:.4f})")

# Get cities in the most probable country
gdf_cities_in_country = gdf_cities[gdf_cities["country"] == countries[0]]
cities = gdf_cities_in_country["city"].unique().tolist()
print(f"Number of unique cities in {countries[0]}: {len(cities)}")

# Get model predictions for cities in the most probable country
cities, probs = model_wrapper(processor, model, cities, image)
print(f"Most probable city: {cities[0]} ({probs[0]:.4f})")