#### Calling RunPod Endpoint

In [1]:
# Using the same BASE_URL you defined
INTERNAL_PORT = 30000 # Need to expose port from runpod instance editting window 
POD_ID = "5hkrkc2bdgjocd" # Specific to each runpod instance
BASE_URL = f"https://{POD_ID}-{INTERNAL_PORT}.proxy.runpod.net" # replace "localhost" with this

import requests

prompts = [  
    "What is the capital of France?",
    "Tell me a short joke"
] * 2

grammar_str = """
root ::= "Batman: " word
word ::= letter letter letter letter letter
letter ::= [a-zA-Z]
"""

# grammar_str = """
# root ::= [a-z]
# """

request_data = {
    "prompts": prompts,
    "grammar": grammar_str
}

# response with grammar constraint
response = requests.post(
    f"{BASE_URL}/generate_cfg",
    json=request_data,
)

# response without grammar constraint 
# response = requests.post(
#     f"{BASE_URL}/generate",
#     json = prompts
# )

if response.status_code == 200:
    results = response.json()
    print("\nResults:")
    print(results)
else:
    print(f"Error: {response.status_code}")
    print(response.text)
    
    
# Next question is how to convert interleaved text & image data into prompt
# llama3.2-vision-instruct should provide such information right? 



Results:
{'results': ['Batman: TheaR', 'Batman: WhyIP', 'Batman: thess', 'Batman: INuag']}


#### MetaPrompt to GrammarString
1. Structural inference slow down the offline engine and sometimes break it. 
-  https://github.com/vllm-project/vllm/issues/8313

2. Guided generation is too slow in offline mode, so we need to move away from it.

In [5]:
# grammar_str = """
# root ::= "Batman: " [!-~\x20]+ 
# """ # proper grammar for string

# In practice, we want to connect "meta_prompt" object 
# to a grammar string
# A simple case involves single output of either string or integer type

meta_prompt = {'task': 'Get the age of a celebrity.',
 'func_name': 'get_celeb_age',
 'inputs': ['name'],
 'outputs': ['age'],
 'input_types': ['str'],
 'output_types': ['int'],
 'mode': 'prompt'}

# need a function to convert above meta_prompt's outputs into grammar_str 
# - note that we usually don't require list output, let's stick to either string or integer for now

def convert_meta_prompt_to_output_grammar(meta_prompt: dict):
    # collect output information 
    output_names = meta_prompt["outputs"]
    map_type = lambda x: "number" if x == "int" else "string"
    output_types = [map_type(t) for t in meta_prompt["output_types"]]
    # now convert to grammar string
    grammar_parts = [
        'root ::= "{" pair "}"',
        f'pair ::= "\\"" {output_names[0]} "\\"" ":" value',
        f'{output_names[0]} ::= string',
        f'value ::= {output_types[0]}',
        'string ::= "\\"" [!-~\\x20]+ "\\""',
        'number ::= [0-9]+'
    ]
    return '\n'.join(grammar_parts)


# grammar_str = r""" 
# root ::= "{" pair "}"
# pair ::= string ":" value
# value ::= string | number 
# string ::= "\"" [!-~\x20]+ "\""
# number ::= [0-9]+
# """


grammar_str = convert_meta_prompt_to_output_grammar(meta_prompt)

In [6]:
grammar_str

'root ::= "{" pair "}"\npair ::= "\\"" age "\\"" ":" value\nage ::= string\nvalue ::= number\nstring ::= "\\"" [!-~\\x20]+ "\\""\nnumber ::= [0-9]+'

In [7]:
prompts = [  
    "Joker: So we meet again, Batman!\n"
] * 10

request_data = {
    "prompts": prompts,
    "grammar": grammar_str
}

# response with grammar constraint
response = requests.post(
    f"{BASE_URL}/generate_cfg",
    json=request_data,
)

if response.status_code == 200:
    results = response.json()
    print("\nResults:")
    print(results)
else:
    print(f"Error: {response.status_code}")
    print(response.text)
    
# CFG guided generation is very slow ... 

Error: 524
<!DOCTYPE html>
<!--[if lt IE 7]> <html class="no-js ie6 oldie" lang="en-US"> <![endif]-->
<!--[if IE 7]>    <html class="no-js ie7 oldie" lang="en-US"> <![endif]-->
<!--[if IE 8]>    <html class="no-js ie8 oldie" lang="en-US"> <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en-US"> <!--<![endif]-->
<head>


<title>5hkrkc2bdgjocd-30000.proxy.runpod.net | 524: A timeout occurred</title>
<meta charset="UTF-8" />
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta http-equiv="X-UA-Compatible" content="IE=Edge" />
<meta name="robots" content="noindex, nofollow" />
<meta name="viewport" content="width=device-width,initial-scale=1" />
<link rel="stylesheet" id="cf_styles-css" href="/cdn-cgi/styles/main.css" />


</head>
<body>
<div id="cf-wrapper">
    <div id="cf-error-details" class="p-0">
        <header class="mx-auto pt-10 lg:pt-6 lg:px-8 w-240 lg:w-full mb-8">
            <h1 class="inline-block sm:block sm:mb-2 font-light text-60 lg:tex

In [32]:
results['results']

['{"And":"I"}',
 '{"Winter":"backup"}',
 '{"ImpressedpauseAnyonehaveanyfunplanningthatattackonGothamCitytodayorjustanotherbrilliantmomentofinsanityofyoursceeemeetyoufthereporterwhisperingstopwimperoncdnationalNewsreportershishiateroneodyzerosubberyourageagainAha":"WhatdidyouexpectInthejungleofchampionsbareyourpawsandteethInserstandbeunderstoodApollothisoneIIoceneattleoolovertamenteightyearsDeathswistentialnotniceactuallySiteuruscampfastaabinterruptnavigationprioritylisttemplatePrknownaroundthecirclevalandinmonsertoirealreadyalemicroaghfigurerefreshagentiscriminyoursBangpptartantoticsLvattentiontiesmailRArlicktoswhoseWithoutcoldThrowStoryyoumadeoutofEuropeDegsymOL409publiciwellactuallySotheretherchildforestfrontrot03WowAlreadyocyluzzizsidecolboundboxingederlandrequiresornsothereeoshandandestroy18skillthroughautmeshirthishome_senderOotoBeAndiamJobsfreeJsuhotelargumentmakinggestionarialackmetreadautideerwakeThigtehandlingantezoewearedFullnormunlessbeneficopsywatvojackoterdtokenlightNospre