# MERE GROUPING PROCESSINHG

In [7]:
import os
import json
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.models import vgg19, VGG19_Weights
from torchvision import transforms
from PIL import Image
import pickle
import re
showcase_dir = "/Users/wangzhuoyulucas/SMART /generatedImg/show-case-FID"
groupings_output = "/Users/wangzhuoyulucas/SMART /data_server/urban_data/tencities/GenAI_density/groupings_newFID.json"

# 1) Define multiple regex patterns
patterns = [
    # a) original pattern: something containing "_cv.jpg" and ending "_gt.png"
    re.compile(r'_cv\.jpg.*_gt\.png$', re.IGNORECASE),
    
    # b) pattern for e.g. "34_45_r0_d0_cv.jpggt-Chicago"
    #    or "61_52_r0_d1_cv.jpggt-Mexico.png"
    #    We look for "_cv.jpggt-" + (some city text) + optional extension
    #    NOTE: Adjust character class [^.]+ if city names can have spaces, etc.
    re.compile(r'_cv\.jpggt-[^.]+(\.png|\.jpg|\.jpeg|\.tiff|\.bmp)?$', re.IGNORECASE),
]

groupings_list = []

for fname in os.listdir(showcase_dir):
    # Skip directories
    if os.path.isdir(os.path.join(showcase_dir, fname)):
        continue
    
    # Check each pattern; if any matches, we collect it
    for pat in patterns:
        if pat.search(fname):
            groupings_list.append(fname)
            break  # stop checking other patterns once matched

# Save to JSON
with open(groupings_output, 'w') as f:
    json.dump(groupings_list, f, indent=2)

print(f"Found {len(groupings_list)} matching filenames.")
print(f"Groupings extracted and saved to: {groupings_output}")

Found 299 matching filenames.
Groupings extracted and saved to: /Users/wangzhuoyulucas/SMART /data_server/urban_data/tencities/GenAI_density/groupings_newFID.json


# GROUPING + GENERATED IMAGE PATHS MAPPINGS


In [None]:
import os
import json
import re

# Define your directories
showcase_dir = "/Users/wangzhuoyulucas/SMART /generatedImg/show-case-FID"
groupings_output = "/Users/wangzhuoyulucas/SMART /data_server/urban_data/tencities/GenAI_density/groupings_newFID.json"

# 1) Define multiple regex patterns
patterns = [
    # a) original pattern: something containing "_cv.jpg" and ending with "_gt.png"
    re.compile(r'_cv\.jpg.*_gt\.png$', re.IGNORECASE),
    
    # b) pattern for e.g. "34_45_r0_d0_cv.jpggt-Chicago" or "61_52_r0_d1_cv.jpggt-gt-Mexico.png"
    #    This regex captures the city name (removing the extension).
    re.compile(r'_cv\.jpggt-(.+?)(\.png|\.jpg|\.jpeg|\.tiff|\.bmp)$', re.IGNORECASE),
]

# This list will hold dictionaries for each grouping.
groupings_list = []

for fname in os.listdir(showcase_dir):
    # Skip directories
    if os.path.isdir(os.path.join(showcase_dir, fname)):
        continue
    
    # Check each pattern; if any matches, we process this file.
    for pat in patterns:
        m = pat.search(fname)
        if m:
            # Initialize variables
            base = None
            city = None

            # Case 1: filenames like "14_16_r0_d0_cv.jpgStockholm._gt.png"
            if "_cv.jpggt-" in fname:
                base = fname.split("_cv.jpggt-")[0]
                city = m.group(1)  # capture group from regex
                print(city)
                # If the city string starts with "gt-", remove it.
                if city.lower().startswith("gt-"):
                    city = city[3:]
            elif "_cv.jpg" in fname:
                base = fname.split("_cv.jpg")[0]
                remainder = fname.split("_cv.jpg")[1]
                if remainder.endswith("_gt.png"):
                    city = remainder[:-len("_gt.png")]
                else:
                    city = remainder
            # Case 2: filenames like "34_45_r0_d0_cv.jpggt-Chicago" or "61_52_r0_d1_cv.jpggt-gt-Mexico.png"
            
            else:
                continue

            # Clean up the city string (remove extra whitespace and trailing punctuation such as . or _ or -)
            city = city.strip().rstrip("._-")

            # Create generated image paths using the specified pattern:
            # {base}___{city}____{i}.png   for i in 0, 1, 2.
            generated_paths = [f"{base}___{city}____{i}.png" for i in range(3)]
            grouping_item = {
                "original": fname,
                "base": base,
                "city": city,
                "generated": generated_paths
            }
            groupings_list.append(grouping_item)
            break

output_data = {"groupings": groupings_list}

with open(groupings_output, 'w') as f:
    json.dump(output_data, f, indent=2)

print(f"Found {len(groupings_list)} matching filenames.")
print(f"Groupings extracted and saved to: {groupings_output}")


Mexico
Chicago
Found 299 matching filenames.
Groupings extracted and saved to: /Users/wangzhuoyulucas/SMART /data_server/urban_data/tencities/GenAI_density/groupings_newFID.json


# Real processing

## By group, ground truth vs generated, per group per specific city

In [2]:
import os
import json
import numpy as np
import pandas as pd
import torch
import torchmetrics
import torchvision.transforms as transforms
from PIL import Image
import concurrent.futures
from tqdm import tqdm  # progress bar

# Global base directory (all images are stored here)
base_dir = "/Users/wangzhuoyulucas/SMART /generatedImg/show-case-FID"

# -------------------------------
# Helper Function: Load and Preprocess Image
# -------------------------------
def load_image(image_path, device):
    """
    Load an image from disk, resize to 224x224, convert to tensor, and send it to device.
    """
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor()
    ])
    image = Image.open(image_path).convert("RGB")
    return transform(image).unsqueeze(0).to(device)

# -------------------------------
# Helper Functions to Compute Metrics from Preloaded Tensors
# -------------------------------
def compute_l2_distance_tensor(img1, img2):
    return torchmetrics.functional.pairwise_euclidean_distance(img1.flatten(1), img2.flatten(1)).item()

def compute_psnr_tensor(img1, img2, device):
    psnr_metric = torchmetrics.image.PeakSignalNoiseRatio().to(device)
    return psnr_metric(img1, img2).item()

def compute_ssim_tensor(img1, img2, device):
    ssim_metric = torchmetrics.image.StructuralSimilarityIndexMeasure().to(device)
    return ssim_metric(img1, img2).item()

def compute_lpips_tensor(img1, img2, device):
    lpips_metric = torchmetrics.image.LearnedPerceptualImagePatchSimilarity(net_type='alex').to(device)
    return lpips_metric(img1, img2).item()

# -------------------------------
# Function to Process a Single Grouping
# -------------------------------
def process_grouping(grouping):
    """
    Process one grouping: load the original and generated images (from base_dir),
    compute metrics for each generated image against the original, and return a dictionary
    with the results.
    
    The returned dict has keys:
      - base, city
      - For each metric in ["L2", "PSNR", "SSIM", "LPIPS"]:
          * img{i}_{metric} for each generated image (i starting at 1)
          * mean_{metric}, max_{metric}, up25per_{metric}
    """
    try:
        # We'll use CPU for all metric calculations.
        device = torch.device("cpu")
    
        # Retrieve info from the grouping.
        original_filename = grouping["original"]
        base_name = grouping.get("base", "")
        city = grouping.get("city", "")
    
        original_path = os.path.join(base_dir, original_filename)
        try:
            orig_tensor = load_image(original_path, device)
        except Exception as e:
            print(f"Error loading original image {original_filename}: {e}")
            return None
    
        # List to hold metric dictionaries for each generated image.
        gen_metrics = []
        for gen_filename in grouping.get("generated", []):
            generated_path = os.path.join(base_dir, gen_filename)
            try:
                gen_tensor = load_image(generated_path, device)
            except Exception as e:
                print(f"Error loading generated image {gen_filename}: {e}")
                continue
    
            # Compute metrics between original and generated image.
            l2   = compute_l2_distance_tensor(orig_tensor, gen_tensor)
            psnr = compute_psnr_tensor(orig_tensor, gen_tensor, device)
            ssim = compute_ssim_tensor(orig_tensor, gen_tensor, device)
            lpips = compute_lpips_tensor(orig_tensor, gen_tensor, device)
    
            gen_metrics.append({
                "L2": l2,
                "PSNR": psnr,
                "SSIM": ssim,
                "LPIPS": lpips
            })
    
        # Prepare the row dictionary to be inserted into the DataFrame.
        row = {"base": base_name, "city": city}
        metrics_names = ["L2", "PSNR", "SSIM", "LPIPS"]
    
        # For each metric, store individual generated image values and compute aggregate stats.
        for metric in metrics_names:
            # Extract the list of values for this metric.
            values = [m[metric] for m in gen_metrics]
            # Save individual image metric values as columns (img1_{metric}, img2_{metric}, etc.)
            for i, val in enumerate(values):
                row[f"img{i+1}_{metric}"] = val
            # If there are fewer than 3 generated images, fill missing columns with None.
            for i in range(len(values), 3):
                row[f"img{i+1}_{metric}"] = None
    
            # Compute aggregated statistics if values are available.
            if values:
                row[f"mean_{metric}"]   = np.mean(values)
                row[f"max_{metric}"]    = np.max(values)
                row[f"up25per_{metric}"] = np.percentile(values, 75)  # 75th percentile = upper 25%
            else:
                row[f"mean_{metric}"]   = None
                row[f"max_{metric}"]    = None
                row[f"up25per_{metric}"] = None
    
        return row
    except Exception as e:
        print(f"Error in process_grouping: {e}")
        return None

# -------------------------------
# Main Processing: Parallel over Groupings and Save Results
# -------------------------------
def main():
    # Path to your grouping JSON file.
    groupings_file = "/Users/wangzhuoyulucas/SMART /data_server/urban_data/tencities/GenAI_density/groupings_newFID.json"
    
    # Load grouping data.
    with open(groupings_file, 'r') as f:
        groupings_data = json.load(f)
    groupings = groupings_data.get("groupings", [])
    print(f"Loaded {len(groupings)} groupings.")
    
    # Determine number of worker threads: available CPUs minus 3 (at least 1 worker).
    num_workers = max(os.cpu_count() - 3, 1)
    print(f"Using {num_workers} worker(s) for CPU processing (threads).")
    
    results = []
    # Use ThreadPoolExecutor to avoid pickling issues in interactive environments.
    with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor:
        # Create futures and wrap with tqdm for a progress bar.
        futures = [executor.submit(process_grouping, grouping) for grouping in groupings]
        for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures), desc="Processing groupings"):
            try:
                row = future.result()
                if row is not None:
                    results.append(row)
            except Exception as e:
                print(f"Worker raised an exception: {e}")
    
    # Create a DataFrame from the results.
    df = pd.DataFrame(results)
    
    # Optionally, reorder columns so that base and city come first.
    cols = df.columns.tolist()
    ordered_cols = [col for col in ["base", "city"] if col in cols] + [col for col in cols if col not in ["base", "city"]]
    df = df[ordered_cols]
    
    # Save the DataFrame to a CSV file.
    output_csv = "/Users/wangzhuoyulucas/SMART /generatedImg/groupings_metrics.csv"
    df.to_csv(output_csv, index=False)
    print(f"Saved results to {output_csv}")

if __name__ == '__main__':
    main()


  from .autonotebook import tqdm as notebook_tqdm


Loaded 299 groupings.
Using 5 worker(s) for CPU processing (threads).


  self.load_state_dict(torch.load(model_path, map_location="cpu"), strict=False)
Processing groupings: 100%|██████████| 299/299 [04:59<00:00,  1.00s/it]

Saved results to /Users/wangzhuoyulucas/SMART /generatedImg/groupings_metrics.csv





# Process the csv to get by city metrics

In [5]:
import pandas as pd
import numpy as np

# Path to the CSV created from your groupings
input_csv = "/Users/wangzhuoyulucas/SMART /generatedImg/groupings_metrics.csv"

# Read the CSV into a DataFrame
df = pd.read_csv(input_csv)

# List of image metrics you computed in your groupings
metrics = ["L2", "PSNR", "SSIM", "LPIPS"]

# Aggregated statistic types we want (similar to your evaluation CSV for population)
# Here we include:
# - "N": the number of groupings (count)
# - "mean": the average of the per-grouping aggregated value (e.g., mean_L2)
# - "max": the average of the per-grouping max value (you might also consider taking the overall max)
# - "up25per": the average of the per-grouping 75th percentile (upper 25%)
agg_stats = ["N", "mean", "max", "up25per"]

# First, compute overall (all groupings) aggregated stats.
overall = {}
overall["N"] = len(df)
for metric in metrics:
    overall[f"mean_{metric}"]   = df[f"mean_{metric}"].mean()
    overall[f"max_{metric}"]    = df[f"max_{metric}"].mean()      # or use .max() if desired
    overall[f"up25per_{metric}"] = df[f"up25per_{metric}"].mean()

# Get a sorted list of cities present in the CSV
cities = sorted(df["city"].unique())

# Compute aggregated stats for each city.
city_results = {}
for city in cities:
    city_df = df[df["city"] == city]
    stats = {}
    stats["N"] = len(city_df)
    for metric in metrics:
        stats[f"mean_{metric}"]   = city_df[f"mean_{metric}"].mean()
        stats[f"max_{metric}"]    = city_df[f"max_{metric}"].mean()
        stats[f"up25per_{metric}"] = city_df[f"up25per_{metric}"].mean()
    city_results[city] = stats

# Now build a new DataFrame with rows for each metric and aggregator type.
rows = []
for metric in metrics:
    for agg in agg_stats:
        row = {"Category": metric, "Metrics": agg}
        # Overall column:
        if agg == "N":
            row["over-all"] = overall["N"]
        else:
            row["over-all"] = overall[f"{agg}_{metric}"]
        # Columns for each city:
        for city in cities:
            if agg == "N":
                row[city] = city_results[city]["N"]
            else:
                row[city] = city_results[city][f"{agg}_{metric}"]
        rows.append(row)

# Create the aggregated DataFrame.
agg_df = pd.DataFrame(rows)

# Optional: reorder the columns if you want "Category" and "Metrics" first,
# then "over-all" and then each city.
cols = ["Category", "Metrics", "over-all"] + cities
agg_df = agg_df[cols]

# Save to a new CSV.
output_csv = "/Users/wangzhuoyulucas/SMART /generatedImg/evaluation_metrics_by_city_image.csv"
agg_df.to_csv(output_csv, index=False)

print(f"Aggregated image metrics saved to {output_csv}")
print(agg_df)


Aggregated image metrics saved to /Users/wangzhuoyulucas/SMART /generatedImg/evaluation_metrics_by_city_image.csv
   Category  Metrics    over-all     Chicago    HongKong     Kigali  \
0        L2        N  299.000000   28.000000   26.000000  28.000000   
1        L2     mean  107.665870  128.592145  110.178674  80.310286   
2        L2      max  113.930284  137.288838  114.751334  82.454910   
3        L2  up25per  110.351644  131.794583  112.887297  81.473838   
4      PSNR        N  299.000000   28.000000   26.000000  28.000000   
5      PSNR     mean   11.353975    9.632400   11.202151  13.785047   
6      PSNR      max   11.831617   10.114967   11.683435  14.061837   
7      PSNR  up25per   11.620405    9.963504   11.408916  13.910833   
8      SSIM        N  299.000000   28.000000   26.000000  28.000000   
9      SSIM     mean    0.114218    0.083126    0.094877   0.149616   
10     SSIM      max    0.128970    0.097078    0.105875   0.159843   
11     SSIM  up25per    0.121684  

## DIVERSITY CHECKS, generated2generated, 

In [3]:
import os
import json
import numpy as np
import pandas as pd
import torch
import torchmetrics
import torchvision.transforms as transforms
from PIL import Image
import concurrent.futures
from tqdm import tqdm
import tempfile

# Import ms_ssim from pytorch_msssim (install with pip install pytorch-msssim)
from pytorch_msssim import ms_ssim

# Global base directory (all images are stored here)
base_dir = "/Users/wangzhuoyulucas/SMART /generatedImg/show-case-FID"

# -------------------------------
# Helper Function: Load and Preprocess Image
# -------------------------------
def load_image(image_path, device):
    """
    Load an image from disk, resize to 224x224, convert to tensor (range [0,1]),
    and add a batch dimension.
    """
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor()
    ])
    image = Image.open(image_path).convert("RGB")
    return transform(image).unsqueeze(0).to(device)

# -------------------------------
# Function to Process a Single Grouping for Generated Images
# -------------------------------
def process_generated_grouping(grouping):
    """
    For a grouping containing three generated images, compute pairwise comparisons
    for SSIM, MS-SSIM, and LPIPS. The comparisons are:
      - img1 vs. img2  (prefix "img1_2")
      - img2 vs. img3  (prefix "img2_3")
      - img1 vs. img3  (prefix "img1_3")
    Then compute the mean over the three pairs.
    Returns a dict with:
      - base, city
      - For each metric:
          * e.g. "img1_2SSIM", "img2_3SSIM", "img1_3SSIM" and "mean_SSIM"
          * similarly for "MS_SSIM" and "LPIPS"
    """
    try:
        device = torch.device("cpu")
        base_name = grouping.get("base", "")
        city = grouping.get("city", "")
        gen_filenames = grouping.get("generated", [])
        
        # Ensure exactly three generated images: if less, fill with None; if more, take first three.
        if len(gen_filenames) < 3:
            gen_filenames = gen_filenames + [None]*(3 - len(gen_filenames))
        elif len(gen_filenames) > 3:
            gen_filenames = gen_filenames[:3]
        
        # Load the three images.
        images = []
        for fname in gen_filenames:
            if fname is None:
                images.append(None)
            else:
                full_path = os.path.join(base_dir, fname)
                try:
                    img_tensor = load_image(full_path, device)
                except Exception as e:
                    print(f"Error loading generated image {fname}: {e}")
                    img_tensor = None
                images.append(img_tensor)
        
        # If any image is missing, skip metric computation.
        if any(img is None for img in images):
            row = {"base": base_name, "city": city}
            for metric in ["SSIM", "MS_SSIM", "LPIPS"]:
                row["img1_2" + metric] = None
                row["img2_3" + metric] = None
                row["img1_3" + metric] = None
                row["mean_" + metric] = None
            return row

        # Initialize torchmetrics for SSIM and LPIPS.
        ssim_metric = torchmetrics.image.StructuralSimilarityIndexMeasure(data_range=1.0)
        lpips_metric = torchmetrics.image.LearnedPerceptualImagePatchSimilarity(net_type='alex')
        
        # Define the three pairs and their prefixes.
        pairs = [
            ((0, 1), "img1_2"),
            ((1, 2), "img2_3"),
            ((0, 2), "img1_3")
        ]
        
        results = {}
        for (i, j), prefix in pairs:
            # Compute SSIM
            ssim_val = ssim_metric(images[i], images[j]).item()
            # Compute MS-SSIM using pytorch-msssim (images already in [0,1], shape (1, C, H, W))
            ms_ssim_val = ms_ssim(images[i], images[j], data_range=1.0, size_average=True).item()
            # Compute LPIPS
            lpips_val = lpips_metric(images[i], images[j]).item()
            results[prefix + "SSIM"] = ssim_val
            results[prefix + "MS_SSIM"] = ms_ssim_val
            results[prefix + "LPIPS"] = lpips_val
        
        # Compute mean for each metric.
        mean_ssim = np.mean([results["img1_2SSIM"], results["img2_3SSIM"], results["img1_3SSIM"]])
        mean_ms_ssim = np.mean([results["img1_2MS_SSIM"], results["img2_3MS_SSIM"], results["img1_3MS_SSIM"]])
        mean_lpips = np.mean([results["img1_2LPIPS"], results["img2_3LPIPS"], results["img1_3LPIPS"]])
        
        row = {"base": base_name, "city": city}
        row.update(results)
        row["mean_SSIM"] = mean_ssim
        row["mean_MS_SSIM"] = mean_ms_ssim
        row["mean_LPIPS"] = mean_lpips
        
        return row
    except Exception as e:
        print("Error in process_generated_grouping:", e)
        return None

# -------------------------------
# Main Processing Pipeline
# -------------------------------
def main():
    # Path to your grouping JSON file (update path as needed)
    groupings_file = "/Users/wangzhuoyulucas/SMART /data_server/urban_data/tencities/GenAI_density/groupings_newFID.json"
    with open(groupings_file, 'r') as f:
        groupings_data = json.load(f)
    groupings = groupings_data.get("groupings", [])
    print(f"Loaded {len(groupings)} groupings.")
    
    # Use a ThreadPoolExecutor to process in parallel.
    num_workers = max(os.cpu_count() - 3, 1)
    print(f"Using {num_workers} worker(s) (threads) for processing.")
    results = []
    
    with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor:
        futures = [executor.submit(process_generated_grouping, grouping) for grouping in groupings]
        for future in tqdm(concurrent.futures.as_completed(futures),
                           total=len(futures),
                           desc="Processing groupings"):
            try:
                row = future.result()
                if row is not None:
                    results.append(row)
            except Exception as e:
                print(f"Worker raised an exception: {e}")
    
    # Create DataFrame from results.
    df = pd.DataFrame(results)
    
    # Optionally reorder columns so that 'base' and 'city' are first.
    cols = df.columns.tolist()
    ordered_cols = [col for col in ["base", "city"] if col in cols] + [col for col in cols if col not in ["base", "city"]]
    df = df[ordered_cols]
    
    # Save the DataFrame to CSV in a writable directory.
    output_csv = os.path.join("/Users/wangzhuoyulucas/SMART /generatedImg/groupings_generated_metrics.csv")
    df.to_csv(output_csv, index=False)
    print(f"Saved results to {output_csv}")

if __name__ == '__main__':
    main()


Loaded 299 groupings.
Using 5 worker(s) (threads) for processing.


  self.load_state_dict(torch.load(model_path, map_location="cpu"), strict=False)
Processing groupings: 100%|██████████| 299/299 [02:29<00:00,  2.00it/s]

Saved results to /Users/wangzhuoyulucas/SMART /generatedImg/groupings_generated_metrics.csv





# Consistency ResNet Predictions

In [None]:
import os
import json
import re
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torchvision import models, transforms
from PIL import Image
from tqdm import tqdm

# -------------------------------
# Settings and Device
# -------------------------------
# For prediction on CPU
device = torch.device("cpu")
print(f"Using device: {device}")

# Transformation (same as training)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])
# For de-normalization in prediction (max values)
max_values = [135.9, 5379.6, 38447.0]

# -------------------------------
# Model Setup
# -------------------------------
# Define model architecture (ResNet50 with regression head)
model = models.resnet50(weights=None)
model.fc = nn.Sequential(
    nn.Linear(model.fc.in_features, 3),
    nn.Sigmoid()
)
# Load trained weights (update path as needed)
model.load_state_dict(torch.load("/Users/wangzhuoyulucas/SMART /generatedImg/resnet50_regression.pth",
                                 map_location=device,
                                 weights_only=True))
model = model.to(device)
model.eval()

# -------------------------------
# Dataset for Generated Images (using groupings_newFID.json)
# -------------------------------
class GeneratedImagesDataset(Dataset):
    """
    This dataset loads generated images from a folder based on the list of generated
    filenames in a groupings JSON file (groupings_newFID.json). It filters out any filenames
    that contain "tifcon.png".
    
    Expected generated filename format (from groupings):
      "61_52_r0_d1__Mexico___0.png"
    where:
      - The base is the substring before the first "__"
      - The city is the substring between "__" and "___"
    """
    def __init__(self, image_folder, groupings_json_path, transform=None):
        self.image_folder = image_folder
        self.transform = transform
        
        # Load the groupings JSON file and collect generated filenames
        with open(groupings_json_path, 'r') as f:
            data = json.load(f)
        groupings = data.get("groupings", [])
        
        filenames = []
        for grouping in groupings:
            filenames.extend(grouping.get("generated", []))
        # Remove duplicates
        filenames = list(set(filenames))
        # Filter out filenames that contain "tifcon.png"
        self.filenames = [f for f in filenames if "tifcon.png" not in f.lower()]
    
    def __len__(self):
        return len(self.filenames)
    
    def __getitem__(self, idx):
        fname = self.filenames[idx]
        img_path = os.path.join(self.image_folder, fname)
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, fname

def extract_base_and_city(fname):
    """
    Expected filename format: base__city___index.png
    Returns (base, city)
    """
    parts = fname.split("__")
    if len(parts) >= 2:
        base = parts[0]
        city_part = parts[1]
        city = city_part.split("___")[0]
    else:
        base = fname
        city = ""
    return base, city

if __name__ == '__main__':
    # -------------------------------
    # Prediction Pipeline for Generated Images
    # -------------------------------
    generated_images_folder = "/Users/wangzhuoyulucas/SMART /generatedImg/show-case-FID"  # update as needed
    groupings_json_path = "/Users/wangzhuoyulucas/SMART /data_server/urban_data/tencities/GenAI_density/groupings_newFID.json"  # update as needed

    # For interactive environments, set num_workers=0; for standalone scripts, you can use a higher number.
    num_workers = 0

    dataset = GeneratedImagesDataset(generated_images_folder, groupings_json_path, transform=transform)
    dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=num_workers)

    predictions = []
    print("Predicting on generated images...")
    with torch.no_grad():
        for images, filenames in tqdm(dataloader, desc="Predicting", total=len(dataloader)):
            images = images.to(device)
            outputs = model(images).squeeze().cpu().numpy()
            # Denormalize predictions: multiply each output by corresponding max_value and convert to float
            predicted_values = [float(outputs[i] * max_values[i]) for i in range(3)]
            fname = filenames[0]
            base, city = extract_base_and_city(fname)
            predictions.append({
                "generated_image": os.path.join(generated_images_folder, fname),
                "base": base,
                "city": city,
                "predicted_values": predicted_values
            })

    # -------------------------------
    # Ground Truth Matching
    # -------------------------------
    # Create mapping from base to official city using groupings_newFID.json
    with open(groupings_json_path, 'r') as f:
        groupings_data = json.load(f)
    groupings_list = groupings_data.get("groupings", [])
    base_to_city = {}
    for grouping in groupings_list:
        b = grouping.get("base", "").lower()
        c = grouping.get("city", "").lower()
        if b:
            base_to_city[b] = c

    # Load the "all.json" file containing ground truth info (assumes JSON Lines format)
    all_json_path = "/Users/wangzhuoyulucas/SMART /generatedImg/src/all.json"  # update as needed
    all_data = []
    with open(all_json_path, 'r') as f:
        for line in f:
            line = line.strip()
            if line:
                all_data.append(json.loads(line))

    # Function to extract three numbers from prompt (assumes order: area, volume, population)
    def extract_numbers_from_prompt(prompt):
        numbers = re.findall(r"[\d]+\.[\d]+", prompt)
        if len(numbers) >= 3:
            return [float(numbers[0]), float(numbers[1]), float(numbers[2])]
        return None

    final_results = []
    for pred in predictions:
        base = pred["base"]
        # Use the mapping to get the official city from the groupings
        city = base_to_city.get(base.lower(), pred["city"].lower())
        match = None
        for item in all_data:
            target_path = item.get("target", "").lower()
            # Both base and official city must appear in the target path.
            if (base.lower() in target_path) and (city in target_path):
                match = item
                break
        if match is not None:
            ground_truth = extract_numbers_from_prompt(match.get("prompt", ""))
            target_path = match.get("target", "")
            if ground_truth is not None:
                ground_truth = [float(val) for val in ground_truth]
        else:
            ground_truth = None
            target_path = None

        final_results.append({
            "base": base,
            "city": city,
            "generated_image": pred["generated_image"],
            "predicted_values": pred["predicted_values"],
            "target_path": target_path,
            "ground_truth": ground_truth
        })

    # -------------------------------
    # Save Final Results
    # -------------------------------
    output_json_path = "/Users/wangzhuoyulucas/SMART /generatedImg/final_predictions_ResNet.json"
    with open(output_json_path, "w") as f:
        json.dump(final_results, f, indent=4)

    print(f"Final results saved to {output_json_path}")
    print("Done!")

In [3]:
import os
import json
import re
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torchvision import models, transforms
from PIL import Image
from tqdm import tqdm

# -------------------------------
# Settings and Device
# -------------------------------
# For prediction on CPU
device = torch.device("cpu")
print(f"Using device: {device}")

# Transformation (same as training)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])
# For de-normalization in prediction (max values)
max_values = [135.9, 5379.6, 38447.0]

# -------------------------------
# Model Setup
# -------------------------------
# Define model architecture (ResNet50 with regression head)
model = models.resnet50(weights=None)
model.fc = nn.Sequential(
    nn.Linear(model.fc.in_features, 3),
    nn.Sigmoid()
)
# Load trained weights (update path as needed)
model.load_state_dict(torch.load("/Users/wangzhuoyulucas/SMART /generatedImg/resnet50_regression.pth",
                                 map_location=device,
                                 weights_only=True))
model = model.to(device)
model.eval()

# -------------------------------
# Dataset for Generated Images (using groupings_newFID.json)
# -------------------------------
class GeneratedImagesDataset(Dataset):
    """
    This dataset loads generated images from a folder based on the list of generated
    filenames in a groupings JSON file (groupings_newFID.json). It filters out any filenames
    that contain "tifcon.png".
    
    Expected generated filename format (from groupings):
      "61_52_r0_d1__Mexico___0.png"
    where:
      - The base is the substring before the first "__"
      - The city is the substring between "__" and "___"
    """
    def __init__(self, image_folder, groupings_json_path, transform=None):
        self.image_folder = image_folder
        self.transform = transform
        
        # Load the groupings JSON file and collect generated filenames
        with open(groupings_json_path, 'r') as f:
            data = json.load(f)
        groupings = data.get("groupings", [])
        
        filenames = []
        for grouping in groupings:
            filenames.extend(grouping.get("generated", []))
        # Remove duplicates
        filenames = list(set(filenames))
        # Filter out filenames that contain "tifcon.png"
        self.filenames = [f for f in filenames if "tifcon.png" not in f.lower()]
    
    def __len__(self):
        return len(self.filenames)
    
    def __getitem__(self, idx):
        fname = self.filenames[idx]
        img_path = os.path.join(self.image_folder, fname)
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, fname

def extract_base_and_city_style(fname):
    """
    Expected filename format: base__city_style___index.png
    Returns (base, city_style)
    """
    parts = fname.split("__")
    if len(parts) >= 2:
        base = parts[0]
        city_style = parts[1].split("___")[0]
    else:
        base = fname
        city_style = ""
    return base, city_style

# -------------------------------
# Create mapping from base to official city using groupings_newFID.json
# -------------------------------
with open("/Users/wangzhuoyulucas/SMART /data_server/urban_data/tencities/GenAI_density/groupings_newFID.json", 'r') as f:
    groupings_data = json.load(f)
groupings_list = groupings_data.get("groupings", [])
# Create a dictionary mapping base (in lowercase) to the official city (in lowercase)
base_to_city = {}
for grouping in groupings_list:
    b = grouping.get("base", "").lower()
    c = grouping.get("city", "").lower()
    if b:
        base_to_city[b] = c

if __name__ == '__main__':
    # -------------------------------
    # Prediction Pipeline for Generated Images
    # -------------------------------
    # Set the folder that contains generated images (update path as needed)
    generated_images_folder = "/Users/wangzhuoyulucas/SMART /generatedImg/show-case-FID"  # update as needed
    groupings_json_path = "/Users/wangzhuoyulucas/SMART /data_server/urban_data/tencities/GenAI_density/groupings_newFID.json"   # update as needed

    # For interactive environments, set num_workers=0; for scripts, you can use a higher number (e.g. 5).
    num_workers = 0

    dataset = GeneratedImagesDataset(generated_images_folder, groupings_json_path, transform=transform)
    dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=num_workers)

    # Predict using the model
    predictions = []  # list to store prediction results
    print("Predicting on generated images...")
    with torch.no_grad():
        for images, filenames in tqdm(dataloader, desc="Predicting", total=len(dataloader)):
            images = images.to(device)
            outputs = model(images).squeeze().cpu().numpy()
            # Denormalize predictions: multiply each output by corresponding max_value and convert to float
            predicted_values = [float(outputs[i] * max_values[i]) for i in range(3)]
            
            # Extract base and city_style from the filename.
            fname = filenames[0]
            base, city_style = extract_base_and_city_style(fname)
            
            predictions.append({
                "generated_image": os.path.join(generated_images_folder, fname),
                "base": base,
                "city_style": city_style,
                "predicted_values": predicted_values
            })

    # -------------------------------
    # Ground Truth Matching
    # -------------------------------
    # Load the "all.json" file containing ground truth info (JSON Lines format)
    all_json_path = "/Users/wangzhuoyulucas/SMART /generatedImg/src/all.json"  # update path as needed
    all_data = []
    with open(all_json_path, 'r') as f:
        for line in f:
            line = line.strip()
            if line:
                all_data.append(json.loads(line))

    # Function to extract three numbers from prompt (assumes order: area, volume, population)
    def extract_numbers_from_prompt(prompt):
        numbers = re.findall(r"[\d]+\.[\d]+", prompt)
        if len(numbers) >= 3:
            return [float(numbers[0]), float(numbers[1]), float(numbers[2])]
        return None

    # For each prediction, update the city using the groupings mapping and search for a matching ground truth in all_data.
    final_results = []
    for pred in predictions:
        base = pred["base"]
        # Use the mapping to get the official city (if available)
        city = base_to_city.get(base.lower(), "").lower()
        # Also keep the city_style as extracted from the filename
        city_style = pred["city_style"].lower()
        match = None
        for item in all_data:
            target_path = item.get("target", "").lower()
            # Check if both base and the official city are in the target path.
            if (base.lower() in target_path) and (city in target_path):
                match = item
                break
        if match is not None:
            ground_truth = extract_numbers_from_prompt(match.get("prompt", ""))
            target_path = match.get("target", "")
            if ground_truth is not None:
                ground_truth = [float(val) for val in ground_truth]
        else:
            ground_truth = None
            target_path = None

        final_results.append({
            "base": base,
            "city_style": city_style,   # extracted from filename
            "city": city,               # official city from groupings
            "generated_image": pred["generated_image"],
            "predicted_values": pred["predicted_values"],
            "target_path": target_path,
            "ground_truth": ground_truth
        })

    # -------------------------------
    # Save Final Results
    # -------------------------------
    output_json_path = "/Users/wangzhuoyulucas/SMART /generatedImg/final_predictions_ResNet.json"
    with open(output_json_path, "w") as f:
        json.dump(final_results, f, indent=4)

    print(f"Final results saved to {output_json_path}")

Using device: cpu
Predicting on generated images...


Predicting: 100%|██████████| 897/897 [01:05<00:00, 13.65it/s]


Final results saved to /Users/wangzhuoyulucas/SMART /generatedImg/final_predictions_ResNet.json


## with prompt also extracted

In [6]:
import os
import json
import re
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torchvision import models, transforms
from PIL import Image
from tqdm import tqdm

# -------------------------------
# Settings and Device
# -------------------------------
# For prediction on CPU
device = torch.device("cpu")
print(f"Using device: {device}")

# Transformation (same as training)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])
# For de-normalization in prediction (max values)
max_values = [135.9, 5379.6, 38447.0]

# -------------------------------
# Model Setup
# -------------------------------
# Define model architecture (ResNet50 with regression head)
model = models.resnet50(weights=None)
model.fc = nn.Sequential(
    nn.Linear(model.fc.in_features, 3),
    nn.Sigmoid()
)
# Load trained weights (update path as needed)
model.load_state_dict(torch.load("/Users/wangzhuoyulucas/SMART /generatedImg/resnet50_regression.pth",
                                 map_location=device,
                                 weights_only=True))
model = model.to(device)
model.eval()

# -------------------------------
# Dataset for Generated Images (using groupings_newFID.json)
# -------------------------------
class GeneratedImagesDataset(Dataset):
    """
    This dataset loads generated images from a folder based on the list of generated
    filenames in a groupings JSON file (groupings_newFID.json). It filters out any filenames
    that contain "tifcon.png".
    
    Expected generated filename format (from groupings):
      "61_52_r0_d1__Mexico___0.png"
    where:
      - The base is the substring before the first "__"
      - The city is the substring between "__" and "___"
    """
    def __init__(self, image_folder, groupings_json_path, transform=None):
        self.image_folder = image_folder
        self.transform = transform
        
        # Load the groupings JSON file and collect generated filenames
        with open(groupings_json_path, 'r') as f:
            data = json.load(f)
        groupings = data.get("groupings", [])
        
        filenames = []
        for grouping in groupings:
            filenames.extend(grouping.get("generated", []))
        # Remove duplicates
        filenames = list(set(filenames))
        # Filter out filenames that contain "tifcon.png"
        self.filenames = [f for f in filenames if "tifcon.png" not in f.lower()]
    
    def __len__(self):
        return len(self.filenames)
    
    def __getitem__(self, idx):
        fname = self.filenames[idx]
        img_path = os.path.join(self.image_folder, fname)
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, fname

def extract_base_and_city_style(fname):
    """
    Expected filename format: base__city_style___index.png
    Returns (base, city_style)
    """
    parts = fname.split("__")
    if len(parts) >= 2:
        base = parts[0]
        city_style = parts[1].split("___")[0]
    else:
        base = fname
        city_style = ""
    return base, city_style

# -------------------------------
# Create mapping from base to official city using groupings_newFID.json
# -------------------------------
with open("/Users/wangzhuoyulucas/SMART /data_server/urban_data/tencities/GenAI_density/groupings_newFID.json", 'r') as f:
    groupings_data = json.load(f)
groupings_list = groupings_data.get("groupings", [])
# Create a dictionary mapping base (in lowercase) to the official city (in lowercase)
base_to_city = {}
for grouping in groupings_list:
    b = grouping.get("base", "").lower()
    c = grouping.get("city", "").lower()
    if b:
        base_to_city[b] = c

if __name__ == '__main__':
    # -------------------------------
    # Prediction Pipeline for Generated Images
    # -------------------------------
    # Set the folder that contains generated images (update path as needed)
    generated_images_folder = "/Users/wangzhuoyulucas/SMART /generatedImg/show-case-FID"  # update as needed
    groupings_json_path = "/Users/wangzhuoyulucas/SMART /data_server/urban_data/tencities/GenAI_density/groupings_newFID.json"   # update as needed

    # For interactive environments, set num_workers=0; for scripts, you can use a higher number (e.g. 5).
    num_workers = 0

    dataset = GeneratedImagesDataset(generated_images_folder, groupings_json_path, transform=transform)
    dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=num_workers)

    # Predict using the model
    predictions = []  # list to store prediction results
    print("Predicting on generated images...")
    with torch.no_grad():
        for images, filenames in tqdm(dataloader, desc="Predicting", total=len(dataloader)):
            images = images.to(device)
            outputs = model(images).squeeze().cpu().numpy()
            # Denormalize predictions: multiply each output by corresponding max_value and convert to float
            predicted_values = [float(outputs[i] * max_values[i]) for i in range(3)]
            
            # Extract base and city_style from the filename.
            fname = filenames[0]
            base, city_style = extract_base_and_city_style(fname)
            
            predictions.append({
                "generated_image": os.path.join(generated_images_folder, fname),
                "base": base,
                "city_style": city_style,
                "predicted_values": predicted_values
            })

    # -------------------------------
    # Ground Truth Matching
    # -------------------------------
    # Load the "all.json" file containing ground truth info (JSON Lines format)
    all_json_path = "/Users/wangzhuoyulucas/SMART /generatedImg/src/all.json"  # update path as needed
    all_data = []
    with open(all_json_path, 'r') as f:
        for line in f:
            line = line.strip()
            if line:
                all_data.append(json.loads(line))

    # Function to extract three numbers from prompt (assumes order: area, volume, population)
    def extract_numbers_from_prompt(prompt):
        numbers = re.findall(r"[\d]+\.[\d]+", prompt)
        if len(numbers) >= 3:
            return [float(numbers[0]), float(numbers[1]), float(numbers[2])]
        return None

    # For each prediction, update the city using the groupings mapping and search for a matching ground truth in all_data.
    final_results = []
    for pred in predictions:
        base = pred["base"]
        # Use the mapping to get the official city (if available)
        city = base_to_city.get(base.lower(), "").lower()
        # Also keep the city_style as extracted from the filename
        city_style = pred["city_style"].lower()
        match = None
        for item in all_data:
            target_path = item.get("target", "").lower()
            # Check if both base and the official city are in the target path.
            if (base.lower() in target_path) and (city in target_path):
                match = item
                break
        if match is not None:
            prompt = match.get("prompt", "")
            ground_truth = extract_numbers_from_prompt(prompt)
            target_path = match.get("target", "")
            if ground_truth is not None:
                ground_truth = [float(val) for val in ground_truth]
        else:
            ground_truth = None
            target_path = None
            prompt = None

        final_results.append({
            "base": base,
            "city_style": city_style,   # extracted from filename
            "city": city,               # official city from groupings
            "generated_image": pred["generated_image"],
            "predicted_values": pred["predicted_values"],
            "target_path": target_path,
            "ground_truth": ground_truth,
            "prompt": prompt          # added prompt from the matched item
        })

    # -------------------------------
    # Save Final Results
    # -------------------------------
    output_json_path = "/Users/wangzhuoyulucas/SMART /generatedImg/final_predictions_ResNet.json"
    with open(output_json_path, "w") as f:
        json.dump(final_results, f, indent=4)

    print(f"Final results saved to {output_json_path}")


Using device: cpu
Predicting on generated images...


Predicting: 100%|██████████| 897/897 [01:13<00:00, 12.22it/s]


Final results saved to /Users/wangzhuoyulucas/SMART /generatedImg/final_predictions_ResNet.json


# With clip score calculated (897 images about 40 mins)

In [11]:
import os
import json
import re
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torchvision import models, transforms
from PIL import Image
from tqdm import tqdm
import concurrent.futures
from torchmetrics.multimodal.clip_score import CLIPScore

# -------------------------------
# Settings and Device
# -------------------------------
device = torch.device("cpu")
print(f"Using device: {device}")

# -------------------------------
# Transformation for Prediction (ResNet)
# -------------------------------
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])
max_values = [135.9, 5379.6, 38447.0]

# -------------------------------
# Model Setup: ResNet50 with Regression Head
# -------------------------------
model = models.resnet50(weights=None)
model.fc = nn.Sequential(
    nn.Linear(model.fc.in_features, 3),
    nn.Sigmoid()
)
model.load_state_dict(torch.load("/Users/wangzhuoyulucas/SMART /generatedImg/resnet50_regression.pth",
                                 map_location=device,
                                 weights_only=True))
model = model.to(device)
model.eval()

# -------------------------------
# Dataset for Generated Images
# -------------------------------
class GeneratedImagesDataset(Dataset):
    """
    Loads generated images based on filenames from a groupings JSON file.
    Filters out any filenames that contain "tifcon.png".
    """
    def __init__(self, image_folder, groupings_json_path, transform=None):
        self.image_folder = image_folder
        self.transform = transform
        
        with open(groupings_json_path, 'r') as f:
            data = json.load(f)
        groupings = data.get("groupings", [])
        
        filenames = []
        for grouping in groupings:
            filenames.extend(grouping.get("generated", []))
        filenames = list(set(filenames))
        self.filenames = [f for f in filenames if "tifcon.png" not in f.lower()]
    
    def __len__(self):
        return len(self.filenames)
    
    def __getitem__(self, idx):
        fname = self.filenames[idx]
        img_path = os.path.join(self.image_folder, fname)
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, fname

def extract_base_and_city_style(fname):
    """
    Expected filename format: base__city_style___index.png
    Returns (base, city_style)
    """
    parts = fname.split("__")
    if len(parts) >= 2:
        base = parts[0]
        city_style = parts[1].split("___")[0]
    else:
        base = fname
        city_style = ""
    return base, city_style

# -------------------------------
# Mapping from Base to Official City
# -------------------------------
with open("/Users/wangzhuoyulucas/SMART /data_server/urban_data/tencities/GenAI_density/groupings_newFID.json", 'r') as f:
    groupings_data = json.load(f)
groupings_list = groupings_data.get("groupings", [])
base_to_city = {}
for grouping in groupings_list:
    b = grouping.get("base", "").lower()
    c = grouping.get("city", "").lower()
    if b:
        base_to_city[b] = c

# -------------------------------
# Main Pipeline
# -------------------------------
if __name__ == '__main__':
    # Paths
    generated_images_folder = "/Users/wangzhuoyulucas/SMART /generatedImg/show-case-FID"
    groupings_json_path = "/Users/wangzhuoyulucas/SMART /data_server/urban_data/tencities/GenAI_density/groupings_newFID.json"
    all_json_path = "/Users/wangzhuoyulucas/SMART /generatedImg/src/all.json"
    
    # Create dataset and dataloader for prediction
    num_workers = 0  # For interactive environments; adjust as needed
    dataset = GeneratedImagesDataset(generated_images_folder, groupings_json_path, transform=transform)
    dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=num_workers)
    
    # -------------------------------
    # Prediction: Run the ResNet model on generated images
    # -------------------------------
    predictions = []
    print("Predicting on generated images...")
    with torch.no_grad():
        for images, filenames in tqdm(dataloader, desc="Predicting", total=len(dataloader)):
            images = images.to(device)
            outputs = model(images).squeeze().cpu().numpy()
            # Denormalize predictions
            predicted_values = [float(outputs[i] * max_values[i]) for i in range(3)]
            fname = filenames[0]
            base, city_style = extract_base_and_city_style(fname)
            predictions.append({
                "generated_image": os.path.join(generated_images_folder, fname),
                "base": base,
                "city_style": city_style,
                "predicted_values": predicted_values
            })
    
    # -------------------------------
    # Ground Truth Matching and Prompt Extraction
    # -------------------------------
    all_data = []
    with open(all_json_path, 'r') as f:
        for line in f:
            line = line.strip()
            if line:
                all_data.append(json.loads(line))
    
    def extract_numbers_from_prompt(prompt):
        numbers = re.findall(r"[\d]+\.[\d]+", prompt)
        if len(numbers) >= 3:
            return [float(numbers[0]), float(numbers[1]), float(numbers[2])]
        return None
    
    final_results = []
    for pred in predictions:
        base = pred["base"]
        city = base_to_city.get(base.lower(), "").lower()
        city_style = pred["city_style"].lower()
        match = None
        for item in all_data:
            target_path = item.get("target", "").lower()
            if (base.lower() in target_path) and (city in target_path):
                match = item
                break
        if match is not None:
            prompt = match.get("prompt", "")
            ground_truth = extract_numbers_from_prompt(prompt)
            target_path = match.get("target", "")
            if ground_truth is not None:
                ground_truth = [float(val) for val in ground_truth]
        else:
            ground_truth = None
            target_path = None
            prompt = None
    
        final_results.append({
            "base": base,
            "city_style": city_style,
            "city": city,
            "generated_image": pred["generated_image"],
            "predicted_values": pred["predicted_values"],
            "target_path": target_path,
            "ground_truth": ground_truth,
            "prompt": prompt
        })
    
    # -------------------------------
    # Define a simple transform for CLIP (resize to 224x224 and convert to tensor)
    # Note: transforms.ToTensor() will produce values between 0 and 1.
    # Thus, we set do_rescale=False when creating the CLIPScore metric.
    clip_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor()
    ])
    
    # -------------------------------
    # Compute CLIP Scores for Each Result using torchmetrics (CPU, Parallelized)
    # -------------------------------
    # We instantiate a new CLIPScore metric for each sample to avoid the warning
    # about calling compute before update.
    def compute_clip_score_for_result(result):
        if result["prompt"] is None:
            result["clip_score"] = None
            return result
        try:
            image = Image.open(result["generated_image"]).convert("RGB")
        except Exception as e:
            print(f"Error loading image {result['generated_image']}: {e}")
            result["clip_score"] = None
            return result
        image_tensor = clip_transform(image).unsqueeze(0).to(device)  # shape: (1, 3, 224, 224)
        # Instantiate a new CLIPScore metric with do_rescale=False for each sample.
        clip_metric_instance = CLIPScore(model_name_or_path="openai/clip-vit-large-patch14").to(device)
        score = clip_metric_instance(image_tensor, result["prompt"])
        result["clip_score"] = score.item()
        return result
    
    # Use ThreadPoolExecutor with tqdm progress bar to process CLIP score computation in parallel
    with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
        final_results = list(tqdm(
            executor.map(compute_clip_score_for_result, final_results),
            total=len(final_results),
            desc="Computing CLIP Scores"
        ))
    
    # -------------------------------
    # Save Final Results with CLIP Scores
    # -------------------------------
    output_json_path = "/Users/wangzhuoyulucas/SMART /generatedImg/final_predictions_with_clip.json"
    with open(output_json_path, "w") as f:
        json.dump(final_results, f, indent=4)
    
    print(f"Final results with CLIP scores saved to {output_json_path}")


Using device: cpu
Predicting on generated images...


Predicting: 100%|██████████| 897/897 [00:55<00:00, 16.25it/s]
Computing CLIP Scores: 100%|██████████| 897/897 [29:45<00:00,  1.99s/it]

Final results with CLIP scores saved to /Users/wangzhuoyulucas/SMART /generatedImg/final_predictions_with_clip.json





In [14]:
pip install openpyxl


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting openpyxl
  Using cached openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Using cached et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB)
Using cached openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
Using cached et_xmlfile-2.0.0-py3-none-any.whl (18 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-2.0.0 openpyxl-3.1.5
Note: you may need to restart the kernel to use updated packages.


## Process the clips to get top 5 bottom 5 and by city

In [1]:
import pandas as pd

# -------------------------------
# 1. Load the JSON file into a DataFrame
# -------------------------------
json_path = "/Users/wangzhuoyulucas/SMART /generatedImg/final_predictions_with_clip.json"
df = pd.read_json(json_path)

# Ensure that 'clip_score' is numeric (it might be None or invalid in some rows)
df['clip_score'] = pd.to_numeric(df['clip_score'], errors='coerce')

# Optionally, filter out rows where clip_score is NaN:
df = df.dropna(subset=['clip_score'])

# -------------------------------
# 2. Compute overall statistics for clip_score
# -------------------------------
overall_mean = df['clip_score'].mean()
overall_max = df['clip_score'].max()
q25 = df['clip_score'].quantile(0.25)
q75 = df['clip_score'].quantile(0.75)

print("Overall clip_score statistics:")
print(f"  Mean: {overall_mean:.2f}")
print(f"  Max: {overall_max:.2f}")
print(f"  25th Percentile: {q25:.2f}")
print(f"  75th Percentile: {q75:.2f}")

# -------------------------------
# 3. Compute mean clip_score per city
# -------------------------------
city_means = df.groupby('city', as_index=False)['clip_score'].mean().rename(columns={'clip_score': 'mean_clip_score'})
print("\nMean clip_score per city:")
print(city_means)

# -------------------------------
# 4. For each city, get top 5 and bottom 5 entries by clip_score
# -------------------------------
def assign_top_bottom(group):
    """
    For each city's group, mark the top 5 rows (highest clip_score) as 'top5'
    and the bottom 5 rows (lowest clip_score) as 'bottom5'. If the group has fewer than 10 rows,
    rows may be in both categories.
    """
    group = group.copy()
    group = group.sort_values('clip_score', ascending=False)
    group['ranking'] = ''
    top5_index = group.head(5).index
    bottom5_index = group.tail(5).index
    group.loc[top5_index, 'ranking'] = 'top5'
    group.loc[bottom5_index, 'ranking'] = 'bottom5'
    return group

# Apply the ranking function per city.
df_ranked = df.groupby('city', group_keys=False).apply(assign_top_bottom)

# If you want to store only the top5 and bottom5 rows per city (i.e. remove any rows that are not in these groups)
df_top_bottom = df_ranked[df_ranked['ranking'] != '']

# -------------------------------
# 5. Save the results to CSV files
# -------------------------------
# Save the top5 and bottom5 detailed rows (with all columns) to CSV.
output_csv_path = "/Users/wangzhuoyulucas/SMART /generatedImg/clip_score_top_bottom_by_city.csv"
df_top_bottom.to_csv(output_csv_path, index=False)
print(f"\nDetailed top/bottom rows saved to: {output_csv_path}")

# Optionally, save overall and per-city summary stats to another CSV.
summary_csv_path = "/Users/wangzhuoyulucas/SMART /generatedImg/clip_score_summary.csv"
summary_stats = pd.DataFrame({
    "overall_mean": [overall_mean],
    "overall_max": [overall_max],
    "25th_percentile": [q25],
    "75th_percentile": [q75]
})
# Save city-level means and overall stats (they can be stored in separate sheets or files; here we append them)
with pd.ExcelWriter(summary_csv_path.replace('.csv', '.xlsx')) as writer:
    city_means.to_excel(writer, sheet_name="City_Means", index=False)
    summary_stats.to_excel(writer, sheet_name="Overall_Stats", index=False)
print(f"Summary statistics saved to: {summary_csv_path.replace('.csv', '.xlsx')}")


Overall clip_score statistics:
  Mean: 17.51
  Max: 19.12
  25th Percentile: 17.28
  75th Percentile: 17.77

Mean clip_score per city:
        city  mean_clip_score
0    chicago        17.324560
1   hongkong        17.437904
2     kigali        16.383949
3   kinshasa        17.364836
4     mexico        17.547715
5     munich        17.668256
6    orlando        18.446414
7   saopaulo        17.373903
8  singapore        17.562303
9  stockholm        17.533081

Detailed top/bottom rows saved to: /Users/wangzhuoyulucas/SMART /generatedImg/clip_score_top_bottom_by_city.csv
Summary statistics saved to: /Users/wangzhuoyulucas/SMART /generatedImg/clip_score_summary.xlsx


  df_ranked = df.groupby('city', group_keys=False).apply(assign_top_bottom)


# r2 for CONSISTENCY 

In [8]:
pip install scikit-learn


Collecting scikit-learn
  Downloading scikit_learn-1.6.1-cp312-cp312-macosx_12_0_arm64.whl.metadata (31 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Using cached joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Using cached threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.6.1-cp312-cp312-macosx_12_0_arm64.whl (11.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.2/11.2 MB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m [36m0:00:01[0m
[?25hUsing cached joblib-1.4.2-py3-none-any.whl (301 kB)
Using cached threadpoolctl-3.5.0-py3-none-any.whl (18 kB)
Installing collected packages: threadpoolctl, joblib, scikit-learn
Successfully installed joblib-1.4.2 scikit-learn-1.6.1 threadpoolctl-3.5.0
Note: you may need to restart the kernel to use updated packages.


In [None]:
import json
import numpy as np
import pandas as pd
import math
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

# Load the final predictions JSON
with open("/Users/wangzhuoyulucas/SMART /generatedImg/final_predictions_ResNet.json", "r") as f:
    results = json.load(f)

# Initialize lists for each target:
gt_surface = []  # total built-up surface area
pred_surface = []

gt_volume = []   # total built-up volume
pred_volume = []

gt_population = []  # population in this area
pred_population = []

# Loop through each prediction and extract values if ground_truth exists.
for item in results:
    gt = item.get("ground_truth")
    pred = item.get("predicted_values")
    # Only include items where both ground truth and predicted values are available.
    if gt is not None and pred is not None and len(gt) >= 3 and len(pred) >= 3:
        gt_surface.append(gt[0])
        pred_surface.append(pred[0])
        
        gt_volume.append(gt[1])
        pred_volume.append(pred[1])
        
        gt_population.append(gt[2])
        pred_population.append(pred[2])

# Convert lists to numpy arrays
gt_surface = np.array(gt_surface)
pred_surface = np.array(pred_surface)

gt_volume = np.array(gt_volume)
pred_volume = np.array(pred_volume)

gt_population = np.array(gt_population)
pred_population = np.array(pred_population)

# Function to compute metrics
def compute_metrics(y_true, y_pred):
    r2 = r2_score(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    rmse = math.sqrt(mse)
    return r2, mse, mae, rmse

# Compute metrics for each target
metrics_surface = compute_metrics(gt_surface, pred_surface)
metrics_volume = compute_metrics(gt_volume, pred_volume)
metrics_population = compute_metrics(gt_population, pred_population)

# Create a DataFrame with the results
data = {
    "Target": ["Total built-up surface area", "Total built-up volume", "Population"],
    "R2": [metrics_surface[0], metrics_volume[0], metrics_population[0]],
    "MSE": [metrics_surface[1], metrics_volume[1], metrics_population[1]],
    "MAE": [metrics_surface[2], metrics_volume[2], metrics_population[2]],
    "RMSE": [metrics_surface[3], metrics_volume[3], metrics_population[3]]
}

df = pd.DataFrame(data)

# Save the DataFrame to a CSV file
output_csv_path = "/Users/wangzhuoyulucas/SMART /generatedImg/evaluation_metrics.csv"
df.to_csv(output_csv_path, index=False)

print(f"Evaluation metrics saved to {output_csv_path}")
print(df.head())

Evaluation metrics saved to /Users/wangzhuoyulucas/SMART /generatedImg/evaluation_metrics.csv
                        Target        R2           MSE          MAE  \
0  Total built-up surface area  0.888232  6.546947e+01     5.830976   
1        Total built-up volume  0.500068  9.611077e+04   199.384547   
2                   Population  0.458813  5.048156e+06  1128.489559   

          RMSE  
0     8.091321  
1   310.017366  
2  2246.810225  


# By city and metrics

In [2]:
import json
import numpy as np
import pandas as pd
import math
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

# Load the final predictions JSON
json_path = "/Users/wangzhuoyulucas/SMART /generatedImg/final_predictions_ResNet.json"
with open(json_path, "r") as f:
    results = json.load(f)

# Initialize lists for overall metrics
overall_surface_gt = []
overall_surface_pred = []
overall_volume_gt = []
overall_volume_pred = []
overall_population_gt = []
overall_population_pred = []

# Dictionary to hold per-city data.
# Each city key will map to a dictionary holding lists for each target and a count N.
cities_data = {}

for item in results:
    city = item.get("city")
    gt = item.get("ground_truth")
    pred = item.get("predicted_values")
    
    # Only include items with valid ground_truth and predictions (with at least 3 values)
    if gt is None or pred is None or len(gt) < 3 or len(pred) < 3:
        continue
    
    # Append values to overall lists
    overall_surface_gt.append(gt[0])
    overall_surface_pred.append(pred[0])
    overall_volume_gt.append(gt[1])
    overall_volume_pred.append(pred[1])
    overall_population_gt.append(gt[2])
    overall_population_pred.append(pred[2])
    
    # Create or update the city entry
    if city not in cities_data:
        cities_data[city] = {
            "N": 0,
            "surface_gt": [],
            "surface_pred": [],
            "volume_gt": [],
            "volume_pred": [],
            "population_gt": [],
            "population_pred": []
        }
    cities_data[city]["N"] += 1
    cities_data[city]["surface_gt"].append(gt[0])
    cities_data[city]["surface_pred"].append(pred[0])
    cities_data[city]["volume_gt"].append(gt[1])
    cities_data[city]["volume_pred"].append(pred[1])
    cities_data[city]["population_gt"].append(gt[2])
    cities_data[city]["population_pred"].append(pred[2])

# Define a function to compute metrics given true and predicted values
def compute_metrics(y_true, y_pred):
    r2 = r2_score(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    rmse = math.sqrt(mse)
    return r2, mse, mae, rmse

# Helper function to compute metrics if data is available
def compute_metrics_for_target(gt_list, pred_list):
    if len(gt_list) == 0:
        return None
    y_true = np.array(gt_list)
    y_pred = np.array(pred_list)
    return compute_metrics(y_true, y_pred)

# Prepare a list to collect all rows for the final DataFrame.
# We will have one row per target per city (plus an overall "All" row).
rows = []

# Define the targets as tuples: (display_name, key used in our lists)
targets = [
    ("Total built-up surface area", "surface"),
    ("Total built-up volume", "volume"),
    ("Population", "population")
]

# First, compute overall metrics ("All") using the overall lists.
overall_data = {
    "surface": (overall_surface_gt, overall_surface_pred),
    "volume": (overall_volume_gt, overall_volume_pred),
    "population": (overall_population_gt, overall_population_pred)
}

for target_name, key in targets:
    gt_list, pred_list = overall_data[key]
    metrics = compute_metrics_for_target(gt_list, pred_list)
    if metrics is not None:
        rows.append({
            "Target": target_name,
            "City": "All",
            "N": len(gt_list),
            "R2": metrics[0],
            "RMSE": metrics[3],
            "MAE": metrics[2]
        })

# Now, compute metrics for each city
for city, data in cities_data.items():
    for target_name, key in targets:
        gt_list = data[f"{key}_gt"]
        pred_list = data[f"{key}_pred"]
        metrics = compute_metrics_for_target(gt_list, pred_list)
        if metrics is not None:
            rows.append({
                "Target": target_name,
                "City": city,
                "N": data["N"],
                "R2": metrics[0],
                "RMSE": metrics[3],
                "MAE": metrics[2]
            })

# Create a DataFrame from the collected rows.
df = pd.DataFrame(rows)

# (Optional) Sort the DataFrame by Target and City for clarity.
df = df.sort_values(by=["Target", "City"]).reset_index(drop=True)

# Save the DataFrame to a CSV file
output_csv_path = "/Users/wangzhuoyulucas/SMART /generatedImg/evaluation_metrics_by_city.csv"
df.to_csv(output_csv_path, index=False)

print(f"Evaluation metrics saved to {output_csv_path}")
print(df.head(10))


Evaluation metrics saved to /Users/wangzhuoyulucas/SMART /generatedImg/evaluation_metrics_by_city.csv
       Target       City    N        R2         RMSE          MAE
0  Population        All  897  0.458813  2246.810225  1128.489559
1  Population    chicago   87 -0.074850   661.527800   436.411361
2  Population   hongkong   78  0.377684  5399.925436  3628.771625
3  Population     kigali   84  0.571203   624.471638   368.343939
4  Population   kinshasa  102  0.586586  2638.008609  1830.260227
5  Population     mexico   90  0.414827   857.510764   676.250258
6  Population     munich   63  0.391949   631.853814   412.703545
7  Population    orlando  129  0.068959   220.264716   147.973980
8  Population   saopaulo   93 -0.116943  1192.303402   920.969016
9  Population  singapore   87 -2.123055  3656.419918  2627.721783


In [4]:
import pandas as pd

# Read the CSV
df = pd.read_csv("/Users/wangzhuoyulucas/SMART /generatedImg/evaluation_metrics_by_city.csv")

# Transpose the DataFrame
df_transposed = df.T

# Save the transposed DataFrame to a new CSV
df_transposed.to_csv("/Users/wangzhuoyulucas/SMART /generatedImg/evaluation_metrics_by_city_transposed.csv")

print(df_transposed.head())


                 0           1            2           3            4   \
Target   Population  Population   Population  Population   Population   
City            All     chicago     hongkong      kigali     kinshasa   
N               897          87           78          84          102   
R2         0.458813    -0.07485     0.377684    0.571203     0.586586   
RMSE    2246.810225    661.5278  5399.925436  624.471638  2638.008609   

                5           6           7            8            9   ...  \
Target  Population  Population  Population   Population   Population  ...   
City        mexico      munich     orlando     saopaulo    singapore  ...   
N               90          63         129           93           87  ...   
R2        0.414827    0.391949    0.068959    -0.116943    -2.123055  ...   
RMSE    857.510764  631.853814  220.264716  1192.303402  3656.419918  ...   

                           23                     24                     25  \
Target  Total built