# Run Test-Time Simulation with Single Reward Model

In [1]:
import numpy as np
import os 
import matplotlib.pyplot as plt
import yaml 
import json

In [2]:
'---- Load the computed reward scores of each prompt, each image and each timestep ---'
value_folder = 'values'
diffusion_model = 'flux' # flux or sdxl
reward_model = 'imr' # imr or pic or hps
num_prompt = 200 # number of prompts to use
num_image = 200 # number of images per prompt
num_steps = 20 # number of diffusion inference timesteps

reward_base_all = []  # shape (num_prompt, num_image, num_steps)
reward_naft_all = [] # shape (num_prompt, num_image, num_steps)

location_base = f'{value_folder}/{diffusion_model}_{reward_model}_val_base'
for s in np.arange(num_steps):
    file_name = f'p{num_prompt}_i{num_image}_s{s:02d}.yaml'
    r_dict = yaml.safe_load(open(f'{location_base}/{file_name}')) 
    reward_one_step = np.stack([r_dict[k] for k in sorted(r_dict.keys())], axis=0) # shape (num_prompt, num_image)
    reward_base_all.append(reward_one_step)
reward_base_all = np.stack(reward_base_all, axis=-1) # shape (num_prompt, num_image, num_steps)
print(f'Loaded base reward scores with shape: {reward_base_all.shape}')

location_naft = f'{value_folder}/{diffusion_model}_{reward_model}_val_naft'
for s in np.arange(num_steps):
    file_name = f'p{num_prompt}_i{num_image}_s{s:02d}.yaml'
    if not os.path.exists(f'{location_naft}/{file_name}'):
        # some steps are not used for naft and we use base values instead, e.g. steps too high or too low
        r_dict = yaml.safe_load(open(f'{location_base}/{file_name}')) 
    else:
        r_dict = yaml.safe_load(open(f'{location_naft}/{file_name}')) 
    reward_one_step = np.stack([r_dict[k] for k in sorted(r_dict.keys())], axis=0) # shape (num_prompt, num_image)
    reward_naft_all.append(reward_one_step)
reward_naft_all = np.stack(reward_naft_all, axis=-1) # shape (num_prompt, num_image, num_steps)

print(f'Loaded naft reward scores with shape: {reward_naft_all.shape}')

Loaded base reward scores with shape: (200, 200, 20)
Loaded naft reward scores with shape: (200, 200, 20)


In [14]:
'Compare TTSnap, TTSp and Best-of-N'
from ttsnap_sim import *

denoise_budget = 9.9 # TFlops
verifier_budget = 1.2 # TFlops

budget = 2000 
iters = 40

S = Simulation(x=denoise_budget, y=verifier_budget, max_step=num_steps)

# Best-of-N
n = int(budget / S.bon_cost())
print('Number of images used Best-of-N:', n)
r_bon, _ = S.bon_run(reward_base_all[:,:,-1], image_num_use=n, iters=iters)

# TTsnap
alphas = [0.4, 0.4, 0.5] # hyperparameters
steps_use = [2, 6, 11] # hyperparameters
n = int(budget / S.ttsp_cost(alpha_s=alphas, steps_use=steps_use))
print('Number of images used TTsnap:', n)
r_ttsp, _ = S.ttsp_run(reward_naft_all, image_num_use=n, alpha_s=alphas, steps_use=steps_use, iters=iters)

print('Averaged reward Best-of-N of all the prompts:', r_bon)
print('Averaged reward TTsnap of all the prompts:', r_ttsp)

Number of images used Best-of-N: 10
Number of images used TTsnap: 38
Averaged reward Best-of-N of all the prompts: 1.644121504655399
Averaged reward TTsnap of all the prompts: 1.7378503455044119
