In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from geopy.geocoders import Nominatim

In [None]:
def auto_crop_black_borders(img, threshold=10):
    """
    Crop black borders from the right and bottom of an image.
    
    Parameters:
        img: Input image (NumPy array)
        threshold: Pixel intensity threshold to consider a pixel as "non-black"
    
    Returns:
        Cropped image (without black borders)
    """
    if len(img.shape) == 3:
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    else:
        gray = img

    # Create a binary mask of non-black pixels
    mask = gray > threshold

    # Find the bounding box of the non-black area
    rows = np.any(mask, axis=1)
    cols = np.any(mask, axis=0)

    if not np.any(rows) or not np.any(cols):
        return img  # nothing to crop

    y_min, y_max = np.where(rows)[0][[0, -1]]
    x_min, x_max = np.where(cols)[0][[0, -1]]

    cropped = img[y_min:y_max+1, x_min:x_max+1]
    return cropped

In [None]:
def equirectangular_to_perspective(equi_img, fov, theta, height, width):
    """
    Simplified conversion from equirectangular to perspective.
    Only horizontal rotation (theta). No vertical tilt or stretch correction.

    Parameters:
        equi_img: Equirectangular input image (OpenCV format)
        fov: Horizontal field of view in degrees
        theta: Yaw angle in degrees (0 = front, 90 = right, etc.)
        height, width: Output dimensions

    Returns:
        Perspective view image
    """
    equ_h, equ_w = equi_img.shape[:2]

    # Convert angles to radians
    fov_rad = np.deg2rad(fov)
    theta_rad = np.deg2rad(theta)

    # Grid of x, y in normalized view space
    x = np.linspace(-np.tan(fov_rad / 2), np.tan(fov_rad / 2), width)
    y = np.linspace(-1, 1, height)  # keep vertical stretch simple
    x, y = np.meshgrid(x, -y)  # flip y for image orientation
    z = np.ones_like(x)

    # Normalize direction vectors
    norm = np.sqrt(x**2 + y**2 + z**2)
    x /= norm
    y /= norm
    z /= norm

    # Rotate around Y axis (theta)
    x_rot = np.cos(theta_rad) * x + np.sin(theta_rad) * z
    z_rot = -np.sin(theta_rad) * x + np.cos(theta_rad) * z

    # Convert to spherical coordinates
    lon = np.arctan2(x_rot, z_rot)
    lat = np.arcsin(y)

    # Map to image coordinates
    u = (lon + np.pi) / (2 * np.pi) * equ_w
    v = (np.pi / 2 - lat) / np.pi * equ_h

    # Remap
    u = u.astype(np.float32)
    v = v.astype(np.float32)
    perspective = cv2.remap(equi_img, u, v, interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_WRAP)

    return perspective

In [None]:
# equi = cv2.imread('images/argentina/1741691006_-30.9927065_-68.8548332.jpg')
# equi = cv2.imread('images/italy/1741643380_46.4216176_8.4171613.jpg')
equi = cv2.imread('images/new_zealand/1741634412_-45.0330189_168.8001333.jpg')
# equi = cv2.imread('images/new_zealand/1741634793_-45.0532313_168.8153141.jpg')
# equi = cv2.imread('images/austria/1741629943_47.1592688_12.9692941.jpg')
# equi = cv2.imread('images/austria/1741630217_46.952463_10.1505049.jpg')
# equi = cv2.imread('images/norway/1741632139_59.5316586_11.001019.jpg')

img = cv2.cvtColor(equi, cv2.COLOR_BGR2RGB)  # Convert from BGR to RGB

plt.imshow(img)
plt.axis('off')
plt.show()

In [None]:
img = auto_crop_black_borders(img)

plt.imshow(img)
plt.axis('off')
plt.show()

In [None]:
perspective_img = equirectangular_to_perspective(img, fov=100, theta=0, height=480, width=600)
# cv2.imwrite('view_front.jpg', perspective_img)

plt.imshow(perspective_img)
plt.axis('off')
plt.show()

In [None]:
cv2.imwrite('test_image_new_zealand.jpg', perspective_img)

In [None]:
import torch
from geoclip import GeoCLIP

model = GeoCLIP()

In [None]:
locator = Nominatim(user_agent="abcd")

In [None]:

image_path = "test_image_norway2.jpg"

top_pred_gps, top_pred_prob = model.predict(image_path, top_k=5)

print("Top 5 GPS Predictions")
print("=====================")
for i in range(5):
    lat, lon = top_pred_gps[i]
    location = locator.reverse(", ".join(map(str, top_pred_gps[i].tolist())), language="en", addressdetails = False, zoom = 0)
    print(f"Prediction {i+1}: ({lat:.6f}, {lon:.6f})")
    print(f"Location: {location}")
    print(f"Probability: {top_pred_prob[i]:.6f}")
    print("")

In [None]:
perspective_img = equirectangular_to_perspective(img, fov=90, theta=270, height=480, width=600)

plt.imshow(perspective_img)
plt.axis('off')
plt.show()