In [None]:
# !pip install llm-comparator

In [None]:
# Run this if using a google3 Colab Kernel, such as with
# blaze run //third_party/javascript/llm_comparator/python/src/llm_comparator:kernel
# Otherwise, import modules using the following cell.
from llm_comparator import model_helper
from llm_comparator import llm_judge_runner
from llm_comparator import rationale_bullet_generator
from llm_comparator import rationale_cluster_generator
import vertexai

In [None]:
from llm_comparator import model_helper
from llm_comparator import llm_judge_runner
from llm_comparator import rationale_bullet_generator
from llm_comparator import rationale_cluster_generator
import vertexai

In [None]:
#@title Setup for using Vertex AI.
from google.colab import auth

auth.authenticate_user()

PROJECT_ID = 'pair-experimental'  #@param {type: "string"}
REGION = 'us-central1'  #@param {type: "string"}

! gcloud config set project {PROJECT_ID}

vertexai.init(project=PROJECT_ID, location=REGION)

generator = model_helper.VertexGenerationModelHelper()
embedder = model_helper.VertexEmbeddingModelHelper()

In [None]:
llm_judge_inputs = [
    {'prompt': 'how are you?', 'response_a': 'good', 'response_b': 'bad'},
    {'prompt': 'hello?', 'response_a': 'hello', 'response_b': 'hi'},
    {'prompt': 'what is the capital of korea?', 'response_a': 'Seoul', 'response_b': 'Vancouver'}
]

In [None]:
# Run LLM judge.
judge = llm_judge_runner.LLMJudgeRunner(generator)
judge_outputs = judge.run(llm_judge_inputs, 4)

# Generate bulleted summary of rationales.
bullet_generator = rationale_bullet_generator.RationaleBulletGenerator(
    generator)
bullet_generator_outputs = bullet_generator.run(judge_outputs)

# Cluster the bulleted summary of rationales.
clusterer = rationale_cluster_generator.RationaleClusterGenerator(
    generator, embedder)
clusters, rationales_with_similarities = clusterer.run(
    bullet_generator_outputs, num_clusters=5
)

# TODO: Create a wrapper class that includes both LLM judge and rationale summary (not implemented yet).

In [None]:
#@title Prepare JSON for LLM Comparator
# TODO: Move to the pip package.
import json

llm_comparator_data = {
    'metadata': {'custom_fields_schema': []},
    'models': [{'name': 'A'}, { 'name': 'B'}],
    'examples': [{
        'input_text': input['prompt'],
        'tags': [],
        'output_text_a': input['response_a'],
        'output_text_b': input['response_b'],
        'score': judge_output['score'],
        'individual_rater_scores': judge_output['individual_rater_scores'],
        'rationale_list': rationales_with_similarities_for_ex,
    } for input, judge_output, rationales_with_similarities_for_ex in zip(
        llm_judge_inputs, judge_outputs, rationales_with_similarities)],
    'rationale_clusters': clusters,
}

with open('json_for_llm_comparator.json', 'w') as f:
  json.dump(llm_comparator_data, f)

In [None]:
!git clone https://github.com/PAIR-code/llm-comparator

In [None]:
#@title For displaying LLM Comparator.
from IPython.display import Javascript

# TODO: Move to the pip package.
def show_llm_comparator(json_path, height=800, port=8888):
  get_ipython().system_raw(f'python3 -m http.server {port} &')
  display(Javascript("""
  (async ()=>{
    fm = document.createElement('iframe')
    fm.src = await google.colab.kernel.proxyPort(%s)
    results_path = fm.src + '%s'
    fm.src += 'llm-comparator/docs/'
    fm.src += '?results_path=' + results_path
    fm.width = '100%%'
    fm.height = '%d'
    fm.frameBorder = 0
    document.body.append(fm)
  })();
  """ % (port, json_path, height) ))

In [None]:
show_llm_comparator('json_for_llm_comparator.json')