In [None]:
import json
import os
import requests
import random
import re
import tqdm
from transformers import AutoTokenizer

# Generate API URL
gen_url = "http://127.0.0.1:8080/v1/completions"

# Token count API URL
token_count_url = "http://127.0.0.1:8080/api/extra/tokencount"

def token_count(text, send_ids=False):
    # Set the headers
    headers = {
        "Content-Type": "application/json"
    }

    # Set the JSON
    json = {
                "prompt": text
            }

    # Send the request
    response = requests.post(token_count_url, headers=headers, json=json)

    # Expected response
    """
    {
    "value": 9,
    "ids": [
        1,
        22557,
        28725,
        586,
        1141,
        349,
        11952,
        28709,
        28723
    ]
    }
    """

    # Return the response
    if send_ids:
        return response.json()["value"], response.json()["ids"]
    else:
        return response.json()["value"]


def get_completion(prompt, max_tokens=200, temperature=1.0, min_p=0.2, stop_sequence=[], grammar=""):

    # Set the headers
    headers = {
        "Content-Type": "application/json"
    }

    # Set the JSON
    json = {
                "prompt": prompt,
                "max_context_length": 8192,
                "max_length": max_tokens,
                "rep_pen": 1.0,
                "rep_pen_range": 600,
                "rep_pen_slope": 0,
                "temperature": temperature,
                "min_p": min_p,
                "sampler_order": [6, 0, 1, 2, 3, 4, 5],
                "grammar": grammar,
                "stop_sequence": stop_sequence
            }
    
    # Expected response
    """
    {'results': [{'text': '\n1. JavaScript: This language is the most popular among developers and continues to be in high demand'}]}
    """

    # Send the request
    response = requests.post(gen_url, headers=headers, json=json)

    # Return the response
    return response.json()

def get_completion_text(prompt, max_tokens=200, temperature=1.0, min_p=0.2, stop_sequence=[], grammar=""):
    return get_completion(prompt, max_tokens, temperature, min_p, stop_sequence, grammar)["content"]

grammar = """root ::= " " line "\nAnswer:" answer

# Yes or no answer
answer ::= " Yes" | " No"

# String
line ::= [^\r\n\x0b\x0c\x85\u2028\u2029|:]+
"""

prompt_file = "./rubric_prompts/out-of-context.txt"

with open(prompt_file, "r") as f:
    prompt = f.read()

question = "If you had to choose another historical period to apply these concepts, when would you pick and why? What elements of that era might make them more effective?"

# Replace {{QUESTION}} with the question
prompt = re.sub(r"{{QUESTION}}", question, prompt)

output = get_completion_text(prompt, max_tokens=100, grammar=grammar)

print(output)
