In [None]:
"""step3_statistics.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1--a2JUkBlN3Z26g1gaT_EglNLNuZJ6ZY
"""

In [None]:
!apt-get update
!apt-get install -y chromium-chromedriver
!pip install selenium

0% [Working]            Hit:1 http://archive.ubuntu.com/ubuntu jammy InRelease
0% [Waiting for headers] [Waiting for headers] [Connected to cloud.r-project.or                                                                               Get:2 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
                                                                               Get:3 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
Get:4 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Hit:5 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Get:6 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
Get:7 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/

In [None]:
import os
import json
import time
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import torch
import torch.nn.functional as F
from tqdm import tqdm
import zipfile
from huggingface_hub import hf_hub_download
from google.colab import files
import io
import base64
from IPython.display import display, HTML
import subprocess

In [None]:
# Download dataset from Hugging Face
def download_dataset_from_hf(local_dir="./"):
    print("Downloading dataset from Hugging Face...")

    # Ensure directory exists
    os.makedirs(local_dir, exist_ok=True)

    try:
        # Get current logged-in user information
        try:
            hf_repo_id = "dragons666/ui2html_results"
            print(f"Will download from repository: {hf_repo_id}")
        except Exception as e:
            hf_repo_id = input("Please enter repository ID (format: username/repo-name): ")

        # Download zip file
        zip_path = hf_hub_download(
            repo_id=hf_repo_id,
            filename="ui2html_results.zip",
            repo_type="dataset"
        )
        print(f"Dataset downloaded to: {zip_path}")

        # Extract file
        print(f"Extracting dataset to: {local_dir}")
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(local_dir)

        print("Dataset download and extraction complete")
        return True
    except Exception as e:
        print(f"Error downloading dataset: {e}")
        return False

In [None]:
# Render HTML to image (Colab compatible version)
def render_html_to_image(html_str, output_path):
    """Use Selenium to render HTML string to image (Colab compatible version)"""
    from selenium import webdriver
    from selenium.webdriver.chrome.options import Options
    from selenium.webdriver.chrome.service import Service

    html_file = f"/tmp/temp_{int(time.time() * 1000)}.html"
    os.makedirs(os.path.dirname(output_path), exist_ok=True)

    with open(html_file, "w", encoding="utf-8") as f:
        f.write(html_str)

    options = Options()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    options.add_argument('--window-size=1280x1024')

    try:
        # ChromeDriver setup for Colab
        driver = webdriver.Chrome(options=options)
        driver.get("file://" + os.path.abspath(html_file))
        time.sleep(1)  # Wait for rendering
        driver.save_screenshot(output_path)
        driver.quit()
    except Exception as e:
        print(f"Error rendering HTML: {e}")
        return None

    try:
        os.remove(html_file)  # Clean up temporary file
    except:
        pass

    return output_path

# Load CLIP model for similarity calculation
def load_clip_model():
    print("Loading CLIP model for image similarity calculation...")
    from transformers import CLIPProcessor, CLIPModel
    clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
    clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
    return clip_model, clip_processor

# Get image embedding
def get_image_embedding(image_path, clip_model, clip_processor):
    """Get CLIP embedding for an image"""
    try:
        image = Image.open(image_path).convert("RGB")
        inputs = clip_processor(images=image, return_tensors="pt")
        with torch.no_grad():
            image_embeds = clip_model.get_image_features(**inputs)
        return F.normalize(image_embeds, p=2, dim=-1)
    except Exception as e:
        print(f"Error getting embedding for image {image_path}: {e}")
        return None

# Calculate cosine similarity
def cosine_similarity(a, b):
    """Calculate cosine similarity between two embeddings"""
    if a is None or b is None:
        return 0.0
    return (a @ b.T).item()

In [None]:
# Calculate similarity metrics
def calculate_similarity_metrics():
    print("Calculating similarity metrics...")

    # Ensure render directory exists
    os.makedirs("test_results/rendered", exist_ok=True)

    # Load base_outputs
    try:
        with open("test_results/base_outputs.json", "r") as f:
            base_data = json.load(f)
    except FileNotFoundError:
        print("base_outputs.json not found. Please run the inference script or download the dataset first.")
        return None

    # Load ft_results
    try:
        with open("test_results/ft_results.json", "r") as f:
            ft_data = json.load(f)
    except FileNotFoundError:
        print("ft_results.json not found. Please run the inference script or download the dataset first.")
        return None

    # Convert ft_data to dictionary for faster lookup
    ft_dict = {item['sample_id']: item for item in ft_data}

    # Load CLIP model
    clip_model, clip_processor = load_clip_model()

    # Prepare results dictionary
    results = {
        'sample_id': [],
        'original_path': [],
        'base_rendered_path': [],
        'ft_rendered_path': [],
        'base_similarity': [],
        'ft_similarity': [],
        'improvement': []
    }

    # Process each sample
    for item in tqdm(base_data, desc="Processing samples"):
        sample_id = item['sample_id']

        # Check if there's a corresponding fine-tuned result
        if sample_id not in ft_dict:
            print(f"Sample {sample_id} has no corresponding fine-tuned result, skipping")
            continue

        original_path = item['original_path']

        base_html = item['base_html']
        ft_html = ft_dict[sample_id]['ft_html']

        # Render HTML to images
        base_rendered_path = render_html_to_image(
            base_html,
            f"test_results/rendered/{sample_id}_base.png"
        )

        ft_rendered_path = render_html_to_image(
            ft_html,
            f"test_results/rendered/{sample_id}_ft.png"
        )

        if base_rendered_path is None or ft_rendered_path is None:
            print(f"Rendering failed for sample {sample_id}, skipping")
            continue

        # Get embeddings
        try:
            e_orig = get_image_embedding(original_path, clip_model, clip_processor)
            e_base = get_image_embedding(base_rendered_path, clip_model, clip_processor)
            e_ft = get_image_embedding(ft_rendered_path, clip_model, clip_processor)

            # If any embedding fails, skip this sample
            if e_orig is None or e_base is None or e_ft is None:
                print(f"Image embedding extraction failed for sample {sample_id}, skipping")
                continue

            # Calculate similarities
            base_similarity = cosine_similarity(e_orig, e_base)
            ft_similarity = cosine_similarity(e_orig, e_ft)
            improvement = ft_similarity - base_similarity

            # Store results
            results['sample_id'].append(sample_id)
            results['original_path'].append(original_path)
            results['base_rendered_path'].append(base_rendered_path)
            results['ft_rendered_path'].append(ft_rendered_path)
            results['base_similarity'].append(base_similarity)
            results['ft_similarity'].append(ft_similarity)
            results['improvement'].append(improvement)

            print(f"\n  Base model similarity: {base_similarity:.4f}")
            print(f"  Fine-tuned model similarity: {ft_similarity:.4f}")
            print(f"  Improvement: {improvement:.4f}")
        except Exception as e:
            print(f"Error processing sample {sample_id}: {e}")
            continue

    # Check if there are any results
    if len(results['sample_id']) == 0:
        print("Warning: No samples were successfully processed, cannot generate results")
        return None

    return results

In [None]:
# Generate statistics and visualizations
def generate_stats_and_visualizations(results):
    print("Generating statistics and visualizations...")

    # Create results dataframe
    results_df = pd.DataFrame(results)

    # Save results to CSV
    results_df.to_csv("test_results/similarity_metrics.csv", index=False)

    # Calculate average metrics
    avg_base_similarity = results_df['base_similarity'].mean()
    avg_ft_similarity = results_df['ft_similarity'].mean()
    avg_improvement = results_df['improvement'].mean()
    median_improvement = results_df['improvement'].median()
    improvement_percent = (results_df['improvement'] > 0).mean() * 100
    unchanged_percent = (results_df['improvement'] == 0).mean() * 100
    degraded_percent = (results_df['improvement'] < 0).mean() * 100

    # Generate summary statistics
    print("\n=== Statistical Summary ===")
    print(f"Number of test samples: {len(results_df)}")
    print(f"Base model average similarity: {avg_base_similarity:.4f}")
    print(f"Fine-tuned model average similarity: {avg_ft_similarity:.4f}")
    print(f"Average improvement: {avg_improvement:.4f}")
    print(f"Median improvement: {median_improvement:.4f}")
    print(f"Percentage of improved samples: {improvement_percent:.2f}%")
    print(f"Percentage of unchanged samples: {unchanged_percent:.2f}%")
    print(f"Percentage of degraded samples: {degraded_percent:.2f}%")

    # Save summary to file
    with open("test_results/summary_stats.txt", "w") as f:
        f.write("=== Statistical Summary ===\n")
        f.write(f"Number of test samples: {len(results_df)}\n")
        f.write(f"Base model average similarity: {avg_base_similarity:.4f}\n")
        f.write(f"Fine-tuned model average similarity: {avg_ft_similarity:.4f}\n")
        f.write(f"Average improvement: {avg_improvement:.4f}\n")
        f.write(f"Median improvement: {median_improvement:.4f}\n")
        f.write(f"Percentage of improved samples: {improvement_percent:.2f}%\n")

    # Visualize results
    plt.figure(figsize=(10, 6))
    plt.scatter(results_df['base_similarity'], results_df['ft_similarity'])
    plt.plot([0, 1], [0, 1], 'k--')  # Diagonal line
    plt.xlabel('Base Model Similarity')
    plt.ylabel('Fine-tuned Model Similarity')
    plt.title('Performance Comparison: Base vs Fine-tuned Model')
    plt.axis('equal')
    plt.grid(True)
    plt.savefig("test_results/similarity_comparison.png")
    display(plt.gcf())  # Display chart in Colab
    plt.close()

    # Plot improvement histogram
    plt.figure(figsize=(10, 6))
    plt.hist(results_df['improvement'], bins=20)
    plt.axvline(x=0, color='r', linestyle='--')
    plt.xlabel('Improvement (Fine-tuned - Base)')
    plt.ylabel('Frequency')
    plt.title('Improvement Histogram')
    plt.grid(True)
    plt.savefig("test_results/improvement_histogram.png")
    display(plt.gcf())  # Display chart in Colab
    plt.close()

    print("\nTest results saved to test_results/")
    print("Visual comparisons saved to test_results/rendered/")
    print("Metrics saved to test_results/similarity_metrics.csv")
    print("Charts saved to test_results/")

In [None]:
# Helper function: Display image comparison in Colab
def display_image_comparison(original_path, base_path, ft_path, sample_id):
    """Display image comparison in Colab"""
    html = f"""
    <div style="display: flex; flex-direction: column; align-items: center; margin-bottom: 20px">
        <h3>Sample ID: {sample_id}</h3>
        <div style="display: flex; justify-content: center;">
            <div style="margin: 10px; text-align: center;">
                <img src="data:image/png;base64,{image_to_base64(original_path)}" style="max-width: 300px;" />
                <p>Original Image</p>
            </div>
            <div style="margin: 10px; text-align: center;">
                <img src="data:image/png;base64,{image_to_base64(base_path)}" style="max-width: 300px;" />
                <p>Base Model Rendering</p>
            </div>
            <div style="margin: 10px; text-align: center;">
                <img src="data:image/png;base64,{image_to_base64(ft_path)}" style="max-width: 300px;" />
                <p>Fine-tuned Model Rendering</p>
            </div>
        </div>
    </div>
    """
    display(HTML(html))

# Helper function: Convert image to base64
def image_to_base64(image_path):
    """Convert image to base64 string for display in Colab"""
    try:
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')
    except Exception as e:
        print(f"Error converting image to base64: {e}")
        return ""

# Display comparison results
def display_top_improvements(results, n=2):
    """Display the top n samples with the largest improvements"""
    if results is None:
        print("No results available")
        return

    # Convert to DataFrame
    df = pd.DataFrame(results)

    # Sort by improvement
    df_sorted = df.sort_values('improvement', ascending=False)

    print(f"\nDisplaying top {n} samples with the largest improvements:")
    for i, row in df_sorted.head(n).iterrows():
        print(f"\nSample {row['sample_id']}:")
        print(f"Base model similarity: {row['base_similarity']:.4f}")
        print(f"Fine-tuned model similarity: {row['ft_similarity']:.4f}")
        print(f"Improvement: {row['improvement']:.4f}")

        # Display image comparison
        display_image_comparison(
            row['original_path'],
            row['base_rendered_path'],
            row['ft_rendered_path'],
            row['sample_id']
        )

In [None]:
print("Starting UI2HTML statistics and visualization script (Colab compatible version)...")

download_dataset_from_hf()

# Calculate similarity metrics
results = calculate_similarity_metrics()

if results is not None:
    # Generate statistics and visualizations
    generate_stats_and_visualizations(results)
    # Display top improvement samples
    display_top_improvements(results)
    print("Statistics and visualization script execution completed")
else:
    print("Unable to calculate similarity metrics, script terminated")