In [1]:
import os
import json
import tiktoken
import scenic
import random
# from langchain_openai import ChatOpenAI
from langchain_core.pydantic_v1 import BaseModel, Field
from OutputParser import Scenic_output
from langchain_core.prompts.few_shot import FewShotPromptTemplate
from langchain_core.prompts.prompt import PromptTemplate
from langchain_chroma import Chroma
from langchain.chains import LLMChain
from langchain_core.example_selectors import SemanticSimilarityExampleSelector
from langchain.embeddings import LlamaCppEmbeddings, HuggingFaceEmbeddings
from langchain_core.documents.base import Document
from langchain_core.vectorstores import VectorStore
# from langchain_openai import OpenAIEmbeddings
# from scenic.simulators.carla import CarlaSimulator



In [None]:
# Basic Configurations for OpenAI
os.environ["OPENAI_API_KEY"] = "sk-gDa1bvL8Ian5Rkdq186bFbDeBf904447B4C707409e660dB6"
base_url = "https://apikeyplus.com/v1"
model_name = "gpt-3.5-turbo"
model = ChatOpenAI(
    model_name=model_name,
    max_tokens=2048,
    temperature=0,
    openai_api_key=os.environ["OPENAI_API_KEY"],
    openai_api_base= base_url
)
structured_model = model.with_structured_output(Scenic_output)

In [11]:
# Basic Configuration for Local LLaMA 2 7B Model
from langchain_community.llms import LlamaCpp
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
from langchain_core.prompts import PromptTemplate

callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
model = LlamaCpp(
    model_path="/home/xiaoyc/Downloads/hfd/Llama-2-7B-Chat-GGUF/llama-2-7b-chat.Q4_K_M.gguf",
    temperature=0.75,
    max_tokens=2000,
    top_p=1,
    callback_manager=callback_manager,
    verbose=True,
    n_ctx=5000
)

llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from /home/xiaoyc/Downloads/hfd/Llama-2-7B-Chat-GGUF/llama-2-7b-chat.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:               

In [12]:
# Some useful functions
###############################################
# Calculate the number of tokens in a message.#
###############################################
def num_tokens_from_messages(messages, model="gpt-3.5-turbo-0301", include_final_response_prefix=True):
    """Returns the number of tokens used by a list of messages."""
    try:
        encoding = tiktoken.encoding_for_model(model)
    except KeyError:
        #print("Warning: model not found. Using cl100k_base encoding.")
        encoding = tiktoken.get_encoding("cl100k_base")
    if "gpt-3.5" in model:
        tokens_per_message = 4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
        tokens_per_name = -1  # if there's a name, the role is omitted                                                                                                                                                                                             
    elif "gpt-4" in model:
        tokens_per_message = 3
        tokens_per_name = 1
    else:
        raise NotImplementedError(f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md \
        for information on how messages are converted to tokens.""")
    
    num_tokens = 0
    for message in messages:
        num_tokens += tokens_per_message
        for key, value in message.items():
            num_tokens += len(encoding.encode(value))
            if key == "name":
                num_tokens += tokens_per_name
    if include_final_response_prefix:
        num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
    return num_tokens

################################################
# Convert the example to chat messages         #
################################################
# Example are in json formats and messages is in the format of the chat
def example_to_chat_messages(example):
    messages = [
        {"role": "system", "name": "example_user", 
         "content": '""" Scenario description\n%s\n"""' % example['docstring']},
        {"role": "system", "name": "example_assistant", 
         "content": '```##Map and Model##\n%s\n##Constants##\n%s\n##Moniters##\n%s\n##Defining Agent Behaviors##\n%s\n##Spatial Relations##\n%s\n##Scenario Specifications##\n%s\n##Background Activities##\n%s\n```' \
             % (example['map_and_model'], example['constants'], example['monitors'], example['behaviors'], example['spatial_relations'], example['scenario'], example['background'])}
    ]
    return messages

################################################
# Get the number of tokens for the example     #
################################################
# We need to construct the chat messages from the example and then calculate the number of tokens
def get_example_num_tokens(example):
    return num_tokens_from_messages(example_to_chat_messages(example), model_name, 
                                    include_final_response_prefix=False)

################################################
# Convert the input to chat messages           #
################################################
# Input is a string or a dictionary with docstring
# If the input is a sole string, then we should directly add it to Scenario Description
# If the input is a dictionary, then we should extract the docstring from it
def input_to_chat_messages(example):
    docstring = example if (isinstance(example, str)) else example['docstring']
    messages = [
        {"role": "user", 
         "content": '""" Scenario description\n%s\n"""' % docstring}
    ]
    return messages

################################################
# Get the number of tokens for the input       #
################################################
# We need to construct the chat messages from the input and then calculate the number of tokens
def get_input_num_tokens(example, include_final_response_prefix=True):
    return num_tokens_from_messages(input_to_chat_messages(example), model_name, 
                                    include_final_response_prefix=include_final_response_prefix)

The structure a formatted request in LLM should contain the following fields:
- _System Message_
    - This field will determine the role of the message in the system. It will be used to determine the type of message that is being sent.
- _Example Message_
    - This field will contain the format of the message that is being sent.
- _Prompt Message/Input Message_
    - This field will contain the prompt message that is being sent to the model. Based on the prompt message, the model will modify some of the field values in the Example Message and generate new scenarios.
- _Response Message_
    - This field will contain the response message that is generated by the model. This message will be generated based on the Example Message and Prompt Message.
- _RAG Status_
    - This field will contain the RAG status of the message. This will be used to determine the quality of the response generated by the model.

In [5]:
# System message information
system_message_str="""You are a helpful agent that generates specifications for car driving scenarios in the Scenic language
Scenic is a domain-specific probabilistic programming language for modeling the environments of cyber-physical systems like robots and autonomous cars. A Scenic program defines a distribution over scenes,\
    configurations of physical objects and agents; sampling from this distribution yields concrete scenes which can be simulated to produce training or testing data. Scenic can also define (probabilistic)\
    policies for dynamic agents, allowing modeling scenarios where agents take actions over time in response to the state of the world.

Your task is to generate Scenic scenarios, each according to its corresponding description in English included as a docstring. Write each scenario in a separate code box. Follow the examples below:"""

system_role = "system"
system_message = {"role": system_role, "content": system_message_str}
# system_message_len = num_tokens_from_messages([system_message], model_name, include_final_response_prefix=False)
system_message_len = num_tokens_from_messages([system_message], include_final_response_prefix=False)
print(system_message_len)

155


In [6]:
# Example message information
import os
from JsonFormatter import convert_file

folder_path = '../data_scenic/NHTSA_Scenarios'
output_dir = '../data_json/formatted'

for _, dirs, _ in os.walk(folder_path):
    for dir in dirs:
        temp_path = folder_path + '/' + dir
        for file_name in os.listdir(temp_path):
            # Full path to the file
            file_path = os.path.join(temp_path, file_name)
            # Test if the file is a .scenic file
            if file_name.endswith('.scenic') and os.path.isfile(file_path):
                convert_file(file_path, output_dir)

examples = []

formatted_dir = '../data_json/formatted'

for file_name in os.listdir(formatted_dir):
    file_path = os.path.join(formatted_dir, file_name) 
    with open(file_path, "r") as f:
        scenic_doc = json.load(f) # Load the input from the JSON file
        del scenic_doc['has_docstring']
        del scenic_doc['name']
        for key in scenic_doc.keys():
            if scenic_doc[key] == None:
                scenic_doc[key] = ""
        examples.append(scenic_doc)


In [7]:
# Prompt message information/Input message information
input_message_str = "Vehicle is turning left/right at an \
intersection-related location, in a rural area at night, \
under clear weather conditions, with a posted speed limit \
of 25 mph; and then departs the edge of the road."

input_role = "user"
input_message = {"role": input_role, "docstring": input_message_str}
# input_message_len = num_tokens_from_messages([input_message], model_name, include_final_response_prefix=False)
input_message_len = num_tokens_from_messages([input_message], include_final_response_prefix=False)

We will use LangChain Few Shots Examples features to train our LLM.

In [None]:
examples_prompt = PromptTemplate(
    input_variables=[
        'docstring', 'map_and_model', 'constants', 'monitors', 'behaviors', 'spatial_relations', 'scenario'
    ],
    template="#Scenario description#\n{docstring}\n\n##Map and Model##\n{map_and_model}\n##Constants##\n{constants}\n##Moniters##\n{monitors}\n##Defining Agent Behaviors##\n{behaviors}\n##Spatial Relations##\n{spatial_relations}\n##Scenario Specifications##\n{scenario}\n"
)

to_vectorize = [" ".join(example.values()) for example in examples]
    
example_selector = SemanticSimilarityExampleSelector(
    vectorstore=Chroma.from_texts(
        texts=to_vectorize,
        embedding=OpenAIEmbeddings(
            base_url=base_url,
        ),
        metadatas=examples),
    example_keys=["docstring", "map_and_model", "constants", "monitors", "behaviors", "spatial_relations", "scenario"],
    input_keys=["docstring"],
    k = 5
)

few_shot_prompt = FewShotPromptTemplate(
    example_prompt=examples_prompt,
    example_selector=example_selector,
    prefix=system_message_str,
    suffix="Generate scenarios: {docstring}",
    input_variables=["docstring"],
)

# Final prompt message is composed of system message, example message and input message
final_prompt = few_shot_prompt

In [8]:
print(examples)
print(len(examples))
to_vectorize = [" ".join(example.values()) for example in examples]
print(len(to_vectorize))
print(to_vectorize)

vs = Chroma.from_texts(
    embedding=LlamaCppEmbeddings(
        model_path="/home/xiaoyc/Downloads/hfd/Llama-2-7B-Chat-GGUF/llama-2-7b-chat.Q4_K_M.gguf",
        n_gpu_layers=-1
    ),
    texts=to_vectorize,
    metadatas=examples,
)
print(vs)

llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from /home/xiaoyc/Downloads/hfd/Llama-2-7B-Chat-GGUF/llama-2-7b-chat.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:               

[{'docstring': '"""\nTITLE: Intersection 05\nAUTHOR: Francis Indaheng, findaheng@berkeley.edu\nDESCRIPTION: Ego vehicle makes a right turn at 4-way intersection \nwhile adversary vehicle from opposite lane makes a left turn.\nSOURCE: NHSTA, #25\n"""', 'map_and_model': "\nparam map = localPath('../../../assets/maps/CARLA/Town05.xodr')\nparam carla_map = 'Town05'\nmodel scenic.simulators.carla.model\n\n", 'constants': "\nMODEL = 'vehicle.lincoln.mkz_2017'\n\nEGO_INIT_DIST = [20, 25]\nparam EGO_SPEED = VerifaiRange(7, 10)\nparam EGO_BRAKE = VerifaiRange(0.5, 1.0)\n\nADV_INIT_DIST = [5, 15]\nparam ADV_SPEED = VerifaiRange(7, 10)\n\nparam SAFETY_DIST = VerifaiRange(10, 20)\nCRASH_DIST = 5\nTERM_DIST = 70\n\n", 'monitors': '', 'behaviors': '\nbehavior EgoBehavior(trajectory):\n    try:\n        do FollowTrajectoryBehavior(target_speed=globalParameters.EGO_SPEED, trajectory=trajectory)\n    interrupt when withinDistanceToAnyObjs(self, globalParameters.SAFETY_DIST):\n        take SetBrakeActio

llm_load_tensors:        CPU buffer size =  3891.24 MiB
..................................................................................................
llama_new_context_with_model: n_ctx      = 512
llama_new_context_with_model: n_batch    = 512
llama_new_context_with_model: n_ubatch   = 512
llama_new_context_with_model: flash_attn = 0
llama_new_context_with_model: freq_base  = 10000.0
llama_new_context_with_model: freq_scale = 1
llama_kv_cache_init:        CPU KV buffer size =   256.00 MiB
llama_new_context_with_model: KV self size  =  256.00 MiB, K (f16):  128.00 MiB, V (f16):  128.00 MiB
llama_new_context_with_model:        CPU  output buffer size =     0.14 MiB
llama_new_context_with_model:        CPU compute buffer size =    70.50 MiB
llama_new_context_with_model: graph nodes  = 1030
llama_new_context_with_model: graph splits = 1
AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | AVX512_BF16 = 0 | FMA = 1 | NEON = 0 | SVE = 0 | ARM_FMA = 0 | F1

<langchain_chroma.vectorstores.Chroma object at 0x7f236ec22a60>


In [9]:

examples_prompt = PromptTemplate(
    input_variables=[
        'docstring', 'map_and_model', 'constants', 'monitors', 'behaviors', 'spatial_relations', 'scenario'
    ],
    template="#Scenario description#\n{docstring}\n\n##Map and Model##\n{map_and_model}\n##Constants##\n{constants}\n##Moniters##\n{monitors}\n##Defining Agent Behaviors##\n{behaviors}\n##Spatial Relations##\n{spatial_relations}\n##Scenario Specifications##\n{scenario}\n"
)

to_vectorize = [" ".join(example.values()) for example in examples]

# Examples are a list of dictionaries, each dictionary contains the following keys:
# docstring, map_and_model, constants, monitors, behaviors, spatial_relations, scenario
# documents = [Document(page_content=example) for example in examples]
    
example_selector = SemanticSimilarityExampleSelector(
    vectorstore=vs,
    example_keys=["docstring", "map_and_model", "constants", "monitors", "behaviors", "spatial_relations", "scenario"],
    input_keys=["docstring"],
    k = 5
)

few_shot_prompt = FewShotPromptTemplate(
    example_prompt=examples_prompt,
    example_selector=example_selector,
    prefix=system_message_str,
    suffix="Generate scenarios: {docstring}",
    input_variables=["docstring"],
)

# Final prompt message is composed of system message, example message and input message
final_prompt = few_shot_prompt

In [15]:
chain = LLMChain(llm=model, prompt=final_prompt)
result = chain.run(input_message_str)  


llama_print_timings:        load time =   23696.44 ms
llama_print_timings:      sample time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings: prompt eval time =   43733.26 ms /    50 tokens (  874.67 ms per token,     1.14 tokens per second)
llama_print_timings:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =   44089.13 ms /    51 tokens
Llama.generate: prefix-match hit






















































































































































































































































































































































































































 գ
  





















 



   ,

     

 
   

  '





  


 


ens'


 


llama_print_timings:        load time =   16920.00 ms
llama_print_timings:      sample time =     190.80 ms /   472 runs   (    0.40 ms per token,  2473.81 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     0 tokens (    -nan ms per token,     -nan tokens per second)
llama_print_timings:        eval time =  181033.65 ms /   472 runs   (  383.55 ms per token,     2.61 tokens per second)
llama_print_timings:       total time =  182380.36 ms /   472 tokens


In [16]:

print(result)





















































































































































































































































































































































































































 գ
  





















 



   ,

     

 
   

  '





  


 


ens'


 


In [13]:
def output_to_string(output: Scenic_output):
    scenic_str = '##Map and Model##\n%s\n##Constants##\n%s\n##Moniters##\n%s\n##Defining Agent Behaviors##\n%s\n##Spatial Relations##\n%s\n##Scenario Specifications##\n%s\n##Background Activities##\n%s\n' \
             % (output.map_and_model, output.constants, output.monitors, output.behaviors, output.spatial_relations, output.scenario, output.background)
    return scenic_str

def compile_scenic(scenic_str):
    random.seed()
    scenario = scenic.scenarioFromString(scenic_str, model='scenic.simulators.carla', mode2D=True)
    scene, numIters = scenario.generate(maxIterations=1000)
    return scene, numIters

def simulate(scene, numIters):
    sim = CarlaSimulator()
    simulation = sim.simulate(scene, maxIterations=numIters)
    if simulation:
        result = simulation.result
        for i, state in enumerate(result.trajectory):
            egoPos, parkedCarPos = state
            print(f'Time step {i}: ego at {egoPos}; parked car at {parkedCarPos}')
        return True
    else:
        print('Simulation failed')
        return False