In [None]:
%load_ext autoreload
%autoreload 2

# Exploring the strengths and weaknesses of Stable Diffusion XL 0.9
This notebook aims at uncovering strengths and weaknesses of the current Stable Diffusion XL 0.9 model.

**Note that it builds on [THIS NOTEBOOK](stable_diffusion_evaluation.ipynb) which generates the necessary data. Run this in order to follow along or just use your own data.**

# Step 1: Loading the data

In [None]:
# Imports
import pandas as pd
import numpy as np
from sliceguard import SliceGuard
from renumics.spotlight import Image, Embedding

In [None]:
# Load the dataset
df = pd.read_json("sd_dataset_scored_embedded_parti.json")

In [None]:
# Get the text and image embeddings from the dataframe
clip_text_embeddings = np.vstack(df["clip_text_embedding"])
clip_image_embeddings = np.vstack(df["clip_image_embedding"])

# Step 2: Category-based analysis
Check if any **categories** are giving worse results based on the pre-computed CLIP Score.

In [None]:
# Define a metric that simply returns the precomputed metric
def return_precomputed_metric(y, y_pred):
    return y.mean(0)

In [None]:
sg = SliceGuard()

# Show the drop and support levels that could make sense
sg.show(df, ["category"],
               "clip_score",
               "clip_score",
               return_precomputed_metric)

In [None]:
# Find category specific issues
issues = sg.find_issues(df, ["category"],
               "clip_score",
               "clip_score",
               return_precomputed_metric,
               min_support=50,
               min_drop=0.5)
sg.report(spotlight_dtype={"image": Image})

# Step 3: Challenge-based analysis
Check if there are any **challenges** in image generation that are problematic for stable diffusion.

In [None]:
sg = SliceGuard()

# Show the drop and support levels that could make sense
sg.show(df, ["challenge"],
               "clip_score",
               "clip_score",
               return_precomputed_metric)

In [None]:
# Find challenge specific issues
sg = SliceGuard()

# Show the drop and support levels that could make sense
# for the category feature
sg.find_issues(df, ["challenge"],
            "clip_score",
            "clip_score",
            return_precomputed_metric,
            min_drop=1,
            min_support=20)
sg.report(spotlight_dtype={"image": Image})

# Step 4: Challenge, Category interaction
Check if there are combinations of **categories and challenges** that are especially challenging.

In [None]:
sg = SliceGuard()

# Show the drop and support levels that could make sense
sg.show(df, ["challenge", "category"],
               "clip_score",
               "clip_score",
               return_precomputed_metric)

In [None]:
# Find issues based on combinations of category and challenge.
sg = SliceGuard()

sg.find_issues(df, ["challenge", "category"],
            "clip_score",
            "clip_score",
            return_precomputed_metric,
            min_drop=1,
            min_support=20)
sg.report(spotlight_dtype={"image": Image})

# Step 5: Analysis based on prompt embeddings
Check if there are clusters in the **embedding space** of CLIP text embeddings that are especially challenging.

In [None]:
sg = SliceGuard()

# Show the drop and support levels that could make sense
sg.show(df, ["clip_text_embedding"],
       "clip_score",
       "clip_score",
       return_precomputed_metric,
        precomputed_embeddings={"clip_text_embedding": clip_text_embeddings})

In [None]:
sg = SliceGuard()

# Show the drop and support levels that could make sense
issues = sg.find_issues(df, ["clip_text_embedding"],
       "clip_score",
       "clip_score",
       return_precomputed_metric,
        min_support=3,
        min_drop=6,
        precomputed_embeddings={"clip_text_embedding": clip_text_embeddings})
sg.report(spotlight_dtype={"image": Image})