Iteration 1

In [None]:
from google.colab import files
from IPython.display import Image

#Importing the image from your files
uploaded = files.upload()

img_name = list(uploaded.keys())[0]
display(Image(filename=img_name))

import numpy as np
import matplotlib.pyplot as plt
from IPython.display import Image

from PIL import Image as PILImage

img_name = "Ocean360Picture.jpg"

img = PILImage.open(img_name)
img_matrix = np.array(img)
rows,cols,channel = img_matrix.shape
print(f"Original image shape:", img_matrix.shape)  # (rows, cols, color_channels)
display(Image(filename=img_name))

num_tiles = 36
tile_height = rows // num_tiles
tile_width = cols // num_tiles

tiles = []  # will store all tiles
for i in range(num_tiles):
    row_tiles = []
    for j in range(num_tiles):
        # extract tile (i,j)
        tile = img_matrix[
            i * tile_height:(i + 1) * tile_height,
            j * tile_width:(j + 1) * tile_width
        ]
        row_tiles.append(tile)
    tiles.append(row_tiles)


plt.imshow(tiles[20][20])
plt.title("Tile (20,20)")
plt.axis("off")
plt.show()

rows_combined = [np.hstack(row) for row in tiles]
reconstructed = np.vstack(rows_combined)

plt.imshow(reconstructed)
plt.title("Reconstructed 360° Image from 36x36 Tiles")
plt.axis("off")

num_tiles = 36
tile_height = reconstructed.shape[0] // num_tiles
tile_width = reconstructed.shape[1] // num_tiles

# Draw vertical lines
for x in range(0, reconstructed.shape[1], tile_width):
    plt.vlines(x, 0, reconstructed.shape[0], color='red', linewidth=0.5, alpha=0.6)

# Draw horizontal lines
for y in range(0, reconstructed.shape[0], tile_height):
    plt.hlines(y, 0, reconstructed.shape[1], color='red', linewidth=0.5, alpha=0.6)

plt.show()

Iteration 2

In [None]:
# ======================================
# 360° ERP Image → Spherical Projection Pipeline
# Steps 1–5
# ======================================

# STEP 1: Add and display image

# from google.colab import files

from IPython.display import Image as ColabImage
import matplotlib.pyplot as plt
import numpy as np
import cv2

# Upload ERP image (e.g., Ocean360Picture.jpg)

# uploaded = files.upload()
# filename = list(uploaded.keys())[0]
# ColabImage(filename)

filename = "Ocean360Picture.jpg"

# Read image in RGB format
erp = cv2.imread(filename)
erp = cv2.cvtColor(erp, cv2.COLOR_BGR2RGB)
H, W, _ = erp.shape

print(f"Height is {H}, and width is {W}")

plt.figure(figsize=(10,5))
plt.imshow(erp)
plt.title("Step 1: Original ERP (Equirectangular Projection) Image")
plt.axis("off")
plt.show()

# ======================================
# STEP 2: Create 36x36 Matrix Placeholder
# ======================================
num_tiles = 36
tile_matrix = [[None for _ in range(num_tiles)] for _ in range(num_tiles)]
print(f"Step 2: Created {num_tiles}x{num_tiles} matrix for spherical tiles.")

# ======================================
# STEP 3: Reconstruct Image as Sphere
# ======================================
# Create meshgrid of longitude (λ) and latitude (φ)
u = np.linspace(0, W - 1, num_tiles)
v = np.linspace(0, H - 1, num_tiles)
U, V = np.meshgrid(u, v)

# Convert ERP pixel coords → spherical angles
lam = 2 * np.pi * (U / W - 0.5)       # longitude [-π, π]
phi = np.pi * (0.5 - V / H)           # latitude [π/2, -π/2]

# Convert spherical → Cartesian coords (X,Y,Z)
X = np.cos(phi) * np.cos(lam)
Y = np.sin(phi)
Z = np.cos(phi) * np.sin(lam)

# Sample colors from ERP for each spherical coordinate
U_int = np.clip(U.astype(int), 0, W - 1)
V_int = np.clip(V.astype(int), 0, H - 1)
colors = erp[V_int, U_int, :] / 255.0  # normalize to [0,1] for matplotlib

# Plot 3D sphere
fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(X, Y, Z, facecolors=colors, rstride=1, cstride=1, linewidth=0, antialiased=False)
ax.set_box_aspect([1,1,1])
ax.set_title("Step 3: Seamless 360° Spherical Reconstruction (36×36 Grid)")
ax.axis("off")
plt.show()

# ======================================
# STEP 4: Split ERP into Spherical Tiles
# ======================================
delta_lambda = 2 * np.pi / num_tiles   # longitude step
delta_phi = np.pi / num_tiles          # latitude step

for j in range(num_tiles):
    for i in range(num_tiles):
        # Tile angular bounds
        lam_min = -np.pi + i * delta_lambda
        lam_max = -np.pi + (i + 1) * delta_lambda
        phi_max =  np.pi/2 - j * delta_phi
        phi_min =  np.pi/2 - (j + 1) * delta_phi

        # Convert angular bounds → ERP pixel coordinates
        u_min = int(((lam_min + np.pi) / (2 * np.pi)) * W)
        u_max = int(((lam_max + np.pi) / (2 * np.pi)) * W)
        v_min = int(((np.pi/2 - phi_max) / np.pi) * H)
        v_max = int(((np.pi/2 - phi_min) / np.pi) * H)

        # Clip to valid image range
        u_min, u_max = np.clip([u_min, u_max], 0, W)
        v_min, v_max = np.clip([v_min, v_max], 0, H)

        # Extract tile region from ERP
        tile_matrix[j][i] = erp[v_min:v_max, u_min:u_max, :]

print("Step 4: Image successfully divided into 36×36 spherical tiles.")

# Quick check: display one random tile
test_i, test_j = np.random.randint(0, num_tiles, 2)
tile = tile_matrix[test_j][test_i]
plt.figure(figsize=(4,4))
plt.imshow(tile)
plt.title(f"Sample Tile [{test_j}, {test_i}] (Lat/Long patch on sphere)")
plt.axis("off")
plt.show()

# ======================================
# STEP 5: ERP Image with Non-Distorted Grid Lines (Equal Spherical Angles)
# ======================================

fig, ax = plt.subplots(figsize=(12,6))
ax.imshow(erp)
ax.set_title("Step 5: ERP Image with Spherical-Angle Grid (36×36)")
ax.axis("off")

# Define tile counts
num_tiles = 36

# Draw vertical lines (longitude) — evenly spaced in λ
for i in range(1, num_tiles):
    x = i * (W / num_tiles)
    ax.plot([x, x], [0, H], color='red', linewidth=0.7)

# Draw horizontal lines (latitude) — spaced by equal φ angles
phi_vals = np.linspace(np.pi/2, -np.pi/2, num_tiles + 1)  # 36 equal latitude divisions
y_positions = H * (0.5 - (phi_vals / np.pi))  # convert φ → ERP pixel coordinate

for y in y_positions:
    ax.plot([0, W], [y, y], color='red', linewidth=0.7)

plt.show()

for j in range(num_tiles):
    y_min = int(y_positions[j])
    y_max = int(y_positions[j+1])
    pixel_height = y_max - y_min
    print(f"Tile {j}: pixel height = {pixel_height}")



Iteration 3

In [None]:
# ======================================
# 360° ERP Image → Spherical Projection Pipeline
# Steps 1–5
# ======================================

# STEP 1: Add and display image

# from google.colab import files

from IPython.display import Image as ColabImage
import matplotlib.pyplot as plt
import numpy as np
import cv2
import math

# Upload ERP image (e.g., Ocean360Picture.jpg)

# uploaded = files.upload()
# filename = list(uploaded.keys())[0]
# ColabImage(filename)

filename = "Street360Picture.jpg"

# Read image in RGB format
erp = cv2.imread(filename)
erp = cv2.cvtColor(erp, cv2.COLOR_BGR2RGB)
H, W, _ = erp.shape

print(f"Height is {H}, and width is {W}")

plt.figure(figsize=(10,5))
plt.imshow(erp)
plt.title("Step 1: Original ERP (Equirectangular Projection) Image")
plt.axis("off")
plt.show()

# Unlike simply slicing a rectangular region from the original ERP image, which causes distortion
# (especially near the poles), this method defines each tile directly on the sphere using uniform
# latitude and longitude steps.
#
# It also differs from a forward approach where each ERP pixel is projected to the sphere and
# then back to ERP for each tile, which is more complex and can leave gaps.
#
# Here, each tile pixel is reverse-mapped to fractional coordinates in the original ERP image and
# sampled using interpolation. This ensures angular uniformity, reduces distortion, and produces
# rectified tiles that accurately represent the spherical surface.

widthDegree = 20
heightDegree = 20

num_lon_tiles = int(360 / widthDegree)  # 18
num_lat_tiles = int(180 / heightDegree)  # 9

# we first decide on our tile width and derive height from it

tileWidth = int(round(W / num_lon_tiles))
tileHeight = int(round(H / num_lat_tiles))

# tileWidth = int(W / num_lon_tiles)
# tileHeight = int(round((H/W * tileWidth)))

print(f"Wdith will be {tileWidth}, height will be {tileHeight}")

# this will be the array of new tiles, where instead of each element being a list of pixels, its the actual image. same indices though
finalTiles = [[None for _ in range(num_lon_tiles)] for _ in range(num_lat_tiles)]

# === Step 5: Loop over each tile (lat × lon grid) ===
for lat_i in range(num_lat_tiles):
    # Latitude boundaries for this tile
    lat_max = 90.0 - lat_i * heightDegree    # top edge (starts from +90° at north pole)
    lat_min = lat_max - heightDegree         # bottom edge, go a single step down

    # this returns an array of specific latitude values, where length of array equals tileHeight. Basically, for each
    # pixel, we get the corresponding lattitude.
    lat_vals = np.linspace(lat_max, lat_min, tileHeight)

    # now, loop through each longitude (horizontally). Now can actually access each tile
    for lon_j in range(num_lon_tiles):

        # Longitude boundaries for this tile
        lon_min = lon_j * widthDegree - 180.0
        # to get max, go one stepi n this direction
        lon_max = lon_min + widthDegree

        # Uniform sampling horizontally: left to right. For each pixel, we get the corresponding longitude
        lon_vals = np.linspace(lon_min, lon_max, tileWidth)

        # === Step 7: Convert lat/lon → ERP pixel coordinates. Note these will be fractions. You're basically sampling parts of the sphere and projecting them, rather than
        # projecting the original pixels.
        # erp_x and erp_y as 1D vectors
        erp_x_vec = (lon_vals + 180.0) / 360.0 * (W - 1)   # shape: (tileWidth,)
        erp_y_vec = (90.0 - lat_vals) / 180.0 * (H - 1)    # shape: (tileHeight,)

        # broadcast to 2D arrays on the fly. We need to do this basically because cv2.remap expects a 2D matrix for each map, which is used for say
        # non rectangular tilling or sampling where map_x and map_y aren't of the same dimesnion, so kinda non uniform sampling. However since we do
        # do it uniformly, we can keep it a vector up until the point we pass it into our model. It's basically expecting a matrix where say for map_x,
        # map_x[i][j] is at horizontal pixel x and veftical pixel j, what's the longitude. Or after beign converted, what's the fracitonal decimal point  corresponging
        # to that longitude in the original image. Same idea with map_y

        # np.tile repeats the vector into multiple rows. So copies current row tileHeight times.

        # np.repeat changes it so that each value in the inner vector is the same, but now each row has a different number
        # from the other rows. By repeating across axis 1, means we repeat each inner vector
        map_x = np.tile(erp_x_vec, (tileHeight, 1)).astype(np.float32)  # (tileHeight, tileWidth)
        map_y = np.repeat(erp_y_vec[:, np.newaxis], tileWidth, axis=1).astype(np.float32)

        # Each output pixel tile_img[i,j] samples the original ERP at coordinates (map_x[i,j], map_y[i,j]).
        # map_x and map_y are floating-point, so remap uses interpolation to compute the pixel value.
        # BORDER_WRAP ensures horizontal wrapping at longitude ±180°, preserving continuity in 360° images.

        tile_img = cv2.remap(
            erp, map_x, map_y,
            interpolation=cv2.INTER_LINEAR,
            borderMode=cv2.BORDER_WRAP
        )

        # Save this tile
        finalTiles[lat_i][lon_j] = tile_img

        # Optional: visualize the first few tiles
        # plt.figure(figsize=(4, 2))
        # plt.imshow(tile_img)
        # plt.title(f"Tile lat:{lat_i} lon:{lon_j}")
        # plt.axis("off")
        # plt.show()

        # === Step 9: Combine tiles back into a single ERP-like image ===

rows_combined = []
for lat_i in range(num_lat_tiles):
    # Stack all longitude tiles in this latitude row
    row_tiles = finalTiles[lat_i]
    row_img = np.hstack(row_tiles)
    rows_combined.append(row_img)

# Stack all latitude rows vertically (north → south)
erp_reconstructed = np.vstack(rows_combined)

print("Reconstructed ERP shape:", erp_reconstructed.shape)

plt.figure(figsize=(12, 6))
plt.imshow(erp_reconstructed)
plt.title("Reconstructed ERP from 20°×20° rectified tiles")
plt.axis("off")

plt.figure(figsize=(12,6))
plt.imshow(erp)
plt.title("Step 1: Original ERP (Equirectangular Projection) Image")
plt.axis("off")
plt.show()


longNum = 41
latNum = 4

plt.figure(figsize=(12, 6))
plt.imshow(finalTiles[latNum][longNum])
plt.title("Reconstructed Tile")
plt.axis("off")

ogImage = np.array(erp)

plt.figure(figsize=(12, 6))
plt.imshow(ogImage[latNum*tileHeight:latNum*tileHeight + tileHeight, longNum*tileWidth:longNum*tileWidth + tileWidth])
plt.title("Original Tile")
plt.axis("off")



Iteration 4

In [None]:
# ============================================================
# 360° ERP Image → Rectified Tile Projection → Saliency Models
# DeepLabV3 | U²-NetP | BASNet
# ============================================================

from IPython.display import Image as ColabImage
import matplotlib.pyplot as plt
import numpy as np
import cv2
import math
import torch
import torch.nn.functional as F
from torchvision import transforms, models
import os

# ============================================================
# STEP 1: Load ERP Image
# ============================================================

filename = "Ocean360Picture.jpg"
erp = cv2.imread(filename)
erp = cv2.cvtColor(erp, cv2.COLOR_BGR2RGB)
H, W, _ = erp.shape
print(f"Height = {H}, Width = {W}")

plt.figure(figsize=(10, 5))
plt.imshow(erp)
plt.title("Original ERP (Equirectangular Projection) Image")
plt.axis("off")
plt.show()

# ============================================================
# STEP 2: Rectified Tiling (20°×20°)
# ============================================================

widthDegree, heightDegree = 20, 20
num_lon_tiles = int(360 / widthDegree)
num_lat_tiles = int(180 / heightDegree)

tileWidth = int(round(W / num_lon_tiles))
tileHeight = int(round(H / num_lat_tiles))
print(f"Tile width = {tileWidth}, Tile height = {tileHeight}")

finalTiles = [[None for _ in range(num_lon_tiles)] for _ in range(num_lat_tiles)]

for lat_i in range(num_lat_tiles):
    lat_max = 90.0 - lat_i * heightDegree
    lat_min = lat_max - heightDegree
    lat_vals = np.linspace(lat_max, lat_min, tileHeight)

    for lon_j in range(num_lon_tiles):
        lon_min = lon_j * widthDegree - 180.0
        lon_max = lon_min + widthDegree
        lon_vals = np.linspace(lon_min, lon_max, tileWidth)

        erp_x_vec = (lon_vals + 180.0) / 360.0 * (W - 1)
        erp_y_vec = (90.0 - lat_vals) / 180.0 * (H - 1)

        map_x = np.tile(erp_x_vec, (tileHeight, 1)).astype(np.float32)
        map_y = np.repeat(erp_y_vec[:, np.newaxis], tileWidth, axis=1).astype(np.float32)

        tile_img = cv2.remap(erp, map_x, map_y, interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_WRAP)
        finalTiles[lat_i][lon_j] = tile_img

# ============================================================
# STEP 3: Reconstruct ERP from Rectified Tiles
# ============================================================

erp_reconstructed = np.vstack([np.hstack(row) for row in finalTiles])
print("Reconstructed ERP shape:", erp_reconstructed.shape)

plt.figure(figsize=(12, 6))
plt.imshow(erp_reconstructed)
plt.title("Reconstructed ERP from 20°×20° Rectified Tiles")
plt.axis("off")
plt.show()

# ============================================================
# STEP 4: DeepLabV3
# ============================================================

plt.figure(figsize=(12, 6))
plt.imshow(erp_reconstructed)
plt.title("Reconstructed ERP from 20°×20° Rectified Tiles")
plt.axis("off")
plt.show()

deeplab = models.segmentation.deeplabv3_resnet50(pretrained=True).eval()
transform_dl = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

saliencyTiles_dl = [[None for _ in range(num_lon_tiles)] for _ in range(num_lat_tiles)]

for lat_i in range(num_lat_tiles):
    for lon_j in range(num_lon_tiles):
        tile_img = finalTiles[lat_i][lon_j]
        if tile_img is None:
            continue
        input_tensor = transform_dl(tile_img).unsqueeze(0)
        with torch.no_grad():
            output = deeplab(input_tensor)["out"][0]
            saliency = torch.sigmoid(output[0]).cpu().numpy()
        saliency_resized = cv2.resize(saliency, (tileWidth, tileHeight))
        saliencyTiles_dl[lat_i][lon_j] = saliency_resized

erp_sal_dl = np.vstack([np.hstack(row) for row in saliencyTiles_dl])
erp_sal_dl = (erp_sal_dl - erp_sal_dl.min()) / (erp_sal_dl.max() - erp_sal_dl.min() + 1e-8)

plt.figure(figsize=(12, 6))
plt.imshow(erp_sal_dl, cmap="inferno")
plt.title("Saliency Map (DeepLabV3 on Reconstructed ERP Tiles)")
plt.axis("off")
plt.show()

# ============================================================
# STEP 5: U²-NetP
# ============================================================

plt.figure(figsize=(12, 6))
plt.imshow(erp_reconstructed)
plt.title("Reconstructed ERP from 20°×20° Rectified Tiles")
plt.axis("off")
plt.show()

!pip install -q gdown
!git clone -q https://github.com/xuebinqin/U-2-Net.git
%cd U-2-Net

from model.u2net import U2NETP
!gdown -q 1rbSTGKAE-MTxBYHd-51l2hMOQPT_7EPy -O u2netp.pth

u2netp = U2NETP(3, 1)
u2netp.load_state_dict(torch.load("u2netp.pth", map_location="cpu"))
u2netp.eval()

transform_u2 = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((320, 320)),
    transforms.ToTensor()
])

saliencyTiles_u2 = [[None for _ in range(num_lon_tiles)] for _ in range(num_lat_tiles)]

for lat_i in range(num_lat_tiles):
    for lon_j in range(num_lon_tiles):
        tile_img = finalTiles[lat_i][lon_j]
        if tile_img is None:
            continue
        input_tensor = transform_u2(tile_img).unsqueeze(0)
        with torch.no_grad():
            d1, *_ = u2netp(input_tensor)
            pred = F.interpolate(d1, size=(tileHeight, tileWidth), mode="bilinear", align_corners=False)
            saliency = pred.squeeze().cpu().numpy()
        saliency = (saliency - saliency.min()) / (saliency.max() - saliency.min() + 1e-8)
        saliencyTiles_u2[lat_i][lon_j] = saliency

erp_sal_u2 = np.vstack([np.hstack(row) for row in saliencyTiles_u2])
plt.figure(figsize=(12, 6))
plt.imshow(erp_sal_u2, cmap="inferno")
plt.title("Saliency Map (U²-NetP on Reconstructed ERP Tiles)")
plt.axis("off")
plt.show()


# ============================================================
# STEP 6: BASNet
# ============================================================

plt.figure(figsize=(12, 6))
plt.imshow(erp_reconstructed)
plt.title("Reconstructed ERP from 20°×20° Rectified Tiles")
plt.axis("off")
plt.show()

#!git clone -q https://github.com/xuebinqin/BASNet.git
#from BASNet.model import BASNet
#!gdown -q 1-Yg0cxgrNhHP-016FPdp902BR-kSsA4P -O BASNet.pth

!curl -L -o BASNet.pth "https://huggingface.co/creative-graphic-design/BASNet-checkpoints/resolve/main/basnet.pth"

from BASNet.model import BASNet
import torch

basnet = BASNet(3, 1)
basnet.load_state_dict(torch.load("BASNet.pth", map_location="cpu"))
basnet.eval()

transform_bas = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((320, 320)),
    transforms.ToTensor()
])

saliencyTiles_bas = [[None for _ in range(num_lon_tiles)] for _ in range(num_lat_tiles)]

for lat_i in range(num_lat_tiles):
    for lon_j in range(num_lon_tiles):
        tile_img = finalTiles[lat_i][lon_j]
        if tile_img is None:
            continue
        input_tensor = transform_bas(tile_img).unsqueeze(0)
        with torch.no_grad():
            d1, *_ = basnet(input_tensor)
            pred = F.interpolate(d1, size=(tileHeight, tileWidth), mode="bilinear", align_corners=False)
            saliency = pred.squeeze().cpu().numpy()
        saliency = (saliency - saliency.min()) / (saliency.max() - saliency.min() + 1e-8)
        saliencyTiles_bas[lat_i][lon_j] = saliency

erp_sal_bas = np.vstack([np.hstack(row) for row in saliencyTiles_bas])
plt.figure(figsize=(12, 6))
plt.imshow(erp_sal_bas, cmap="inferno")
plt.title("Saliency Map (BASNet on Reconstructed ERP Tiles)")
plt.axis("off")
plt.show()

Iteration 5

In [None]:
# ============================================================
# 360° ERP Image → Rectified Tile Projection → Saliency Models
# DeepLabV3 | U²-NetP | BASNet | MLNet | SalGAN
# ============================================================

from IPython.display import Image as ColabImage
import matplotlib.pyplot as plt
import numpy as np
import cv2
import math
import torch
import torch.nn.functional as F
from torchvision import transforms
import os

# ============================================================
# STEP 0: Setup
# ============================================================

!pip install -q gdown
!git clone -q https://github.com/xuebinqin/U-2-Net.git
%cd U-2-Net
from model.u2net import U2NETP
!gdown -q 1rbSTGKAE-MTxBYHd-51l2hMOQPT_7EPy -O u2netp.pth

u2netp = U2NETP(3, 1)
u2netp.load_state_dict(torch.load("u2netp.pth", map_location="cpu"))
u2netp.eval()

transform_u2 = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((320, 320)),
    transforms.ToTensor()
])

# ============================================================
# STEP 1–6: Define Processing Function
# ============================================================

def process_image(filename):
    print("\n" + "="*60)
    print(f"Processing: {filename}")
    print("="*60)

    # STEP 1: Load ERP Image
    erp = cv2.imread(filename)
    if erp is None:
        print(f"⚠️ File not found: {filename}")
        return
    erp = cv2.cvtColor(erp, cv2.COLOR_BGR2RGB)
    H, W, _ = erp.shape
    print(f"Height = {H}, Width = {W}")

    plt.figure(figsize=(10, 5))
    plt.imshow(erp)
    plt.title(f"Original ERP: {os.path.basename(filename)}")
    plt.axis("off")
    plt.show()

    # STEP 2: Rectified Tiling (20°×20°)
    widthDegree, heightDegree = 20, 20
    num_lon_tiles = int(360 / widthDegree)
    num_lat_tiles = int(180 / heightDegree)

    tileWidth = int(round(W / num_lon_tiles))
    tileHeight = int(round(H / num_lat_tiles))
    print(f"Tile width = {tileWidth}, Tile height = {tileHeight}")

    finalTiles = [[None for _ in range(num_lon_tiles)] for _ in range(num_lat_tiles)]

    for lat_i in range(num_lat_tiles):
        lat_max = 90.0 - lat_i * heightDegree
        lat_min = lat_max - heightDegree
        lat_vals = np.linspace(lat_max, lat_min, tileHeight)
        for lon_j in range(num_lon_tiles):
            lon_min = lon_j * widthDegree - 180.0
            lon_max = lon_min + widthDegree
            lon_vals = np.linspace(lon_min, lon_max, tileWidth)

            erp_x_vec = (lon_vals + 180.0) / 360.0 * (W - 1)
            erp_y_vec = (90.0 - lat_vals) / 180.0 * (H - 1)

            map_x = np.tile(erp_x_vec, (tileHeight, 1)).astype(np.float32)
            map_y = np.repeat(erp_y_vec[:, np.newaxis], tileWidth, axis=1).astype(np.float32)

            tile_img = cv2.remap(erp, map_x, map_y, interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_WRAP)
            finalTiles[lat_i][lon_j] = tile_img

    # STEP 3: Reconstruct ERP from Rectified Tiles
    erp_reconstructed = np.vstack([np.hstack(row) for row in finalTiles])
    print("Reconstructed ERP shape:", erp_reconstructed.shape)

    plt.figure(figsize=(12, 6))
    plt.imshow(erp_reconstructed)
    plt.title("Reconstructed ERP from 20°×20° Rectified Tiles")
    plt.axis("off")
    plt.show()

    # STEP 5: Run U²-NetP (Global)
    input_full = transform_u2(erp_reconstructed).unsqueeze(0)
    with torch.no_grad():
        d1, *_ = u2netp(input_full)
        pred_full = F.interpolate(d1, size=(H, W), mode="bilinear", align_corners=False)
        saliency_full = pred_full.squeeze().cpu().numpy()

    saliency_full_resized = (saliency_full - saliency_full.min()) / (saliency_full.max() - saliency_full.min() + 1e-8)

    plt.figure(figsize=(12, 6))
    plt.imshow(saliency_full_resized, cmap="inferno")
    plt.title("U²-NetP Saliency Map (Full ERP)")
    plt.axis("off")
    plt.show()

    # STEP 5b: Run U²-NetP (Tiles)
    saliencyTiles_u2 = [[None for _ in range(num_lon_tiles)] for _ in range(num_lat_tiles)]

    for lat_i in range(num_lat_tiles):
        for lon_j in range(num_lon_tiles):
            tile_img = finalTiles[lat_i][lon_j]
            if tile_img is None:
                continue
            input_tensor = transform_u2(tile_img).unsqueeze(0)
            with torch.no_grad():
                d1, *_ = u2netp(input_tensor)
                pred = F.interpolate(d1, size=(tileHeight, tileWidth), mode="bilinear", align_corners=False)
                saliency = pred.squeeze().cpu().numpy()
            saliency = (saliency - saliency.min()) / (saliency.max() - saliency.min() + 1e-8)
            saliencyTiles_u2[lat_i][lon_j] = saliency

    erp_sal_u2 = np.vstack([np.hstack(row) for row in saliencyTiles_u2])
    plt.figure(figsize=(12, 6))
    plt.imshow(erp_sal_u2, cmap="inferno")
    plt.title("Saliency Map (U²-NetP on Reconstructed ERP Tiles)")
    plt.axis("off")
    plt.show()

    # STEP 6: Percentage Difference
    erp_sal_u2_resized = cv2.resize(erp_sal_u2, (W, H))
    diff = np.abs(saliency_full_resized - erp_sal_u2_resized)
    percent_diff = diff / (saliency_full_resized + 1e-8) * 100
    mean_diff = np.mean(percent_diff)
    print(f"\nAverage U²-NetP saliency difference between full ERP and tile-based ERP: {mean_diff:.2f}%")

    # STEP 7: Fusion
    # Compute global scene complexity to adapt fusion weight (α)
    gray_img = cv2.cvtColor(erp_reconstructed, cv2.COLOR_RGB2GRAY)
    lap_var = cv2.Laplacian(gray_img, cv2.CV_64F).var()   # local detail measure
    entropy = -np.sum(np.histogram(gray_img, bins=256, range=(0,255), density=True)[0] *
                      np.log2(np.histogram(gray_img, bins=256, range=(0,255), density=True)[0] + 1e-8))

    # Normalize both metrics to [0,1] scale
    lap_var_norm = np.clip(lap_var / 500.0, 0, 1)
    entropy_norm = np.clip(entropy / 8.0, 0, 1)

    # Adaptive fusion weight (α)
    # More detail → lower α (favor local)
    # Less detail → higher α (favor global)
    alpha = 0.3 + 0.7 * (1 - lap_var_norm * entropy_norm)

    print(f"Scene detail (Laplacian variance): {lap_var:.2f}, Entropy: {entropy:.2f}")
    print(f"Adaptive fusion weight α = {alpha:.2f}  (higher = more global, lower = more local)")

    erp_sal_u2_resized = cv2.resize(erp_sal_u2, (W, H))
    saliency_fused = alpha * saliency_full_resized + (1 - alpha) * erp_sal_u2_resized
    saliency_fused = cv2.GaussianBlur(saliency_fused, (5, 5), 1)
    saliency_fused = (saliency_fused - saliency_fused.min()) / (saliency_fused.max() - saliency_fused.min() + 1e-8)

    plt.figure(figsize=(12, 6))
    plt.imshow(saliency_fused, cmap="inferno")
    plt.title(f"Adaptive Fused Saliency Map (α={alpha:.2f})")
    plt.axis("off")
    plt.show()

    # Compare fused vs. global
    diff_fused = np.abs(saliency_full_resized - saliency_fused)
    relative_diff = diff_fused / ((saliency_full_resized + saliency_fused) / 2 + 1e-8) * 100
    mean_diff_fused = np.mean(relative_diff)
    print(f"Average relative difference between fused and global saliency: {mean_diff_fused:.2f}%")
    print("-" * 60)

# ============================================================
# STEP 8: Run for All Files
# ============================================================

file_list = [
    "/content/Ocean360Picture.jpg",
    "/content/aerial-drone-panorama-view-village-located-near-river-hills-fields-godrays-clouds-moldova.jpg",
    "/content/new-york-city-manhattan.jpg"
]

for f in file_list:
    process_image(f)