# Model Registry
This notebook keeps a log of model parameters for the conversations interface.

In [1]:
import json
import ast
import pandas as pd
import numpy as np
from datetime import datetime
from src.utils.helper_funcs import find_project_root, save_as_jsonl

PROJECT_ROOT = find_project_root()

In [2]:
def update_model_info(
    OUTPUT_PATH, df, provider, model_id, base_updates, selected_params_overrides=None
):
    """
    Updates the model information for a given model_id in the dataframe and logs the changes,
    including setting selected_params as a copy of default_params with optional overrides.

    Parameters:
    OUTPUT_PATH (str): The path to save the logs.
    df (pd.DataFrame): The dataframe containing the models.
    model_id (str): The model_id of the model to update.
    base_updates (dict): A dictionary with base updates to apply.
    selected_params_overrides (dict, optional): Dictionary of overrides for selected_params.

    Returns:
    pd.DataFrame: The updated dataframe.
    """
    # Serialize the 'default_params' dictionary to a JSON string if it's a dictionary
    if "default_params" in base_updates and isinstance(
        base_updates["default_params"], dict
    ):
        base_updates["default_params"] = json.dumps(base_updates["default_params"])

    # Initialize changes log
    changes_log = [f"#PROVIDER = {provider}\n#MODEL_ID = {model_id}"]

    # Check if the model_id exists in the dataframe
    if model_id not in df["long_name"].values:
        changes_log.append(f"No entry found for model_id {model_id}. No updates made.")
        # Append the message to the log file and exit
        append_to_log(OUTPUT_PATH, changes_log)
        return df

    # Retrieve the index for the row that matches the model_id
    index = df.index[df["long_name"] == model_id].tolist()
    if not index:
        changes_log.append(f"No entry found for model_id {model_id}. No updates made.")
        # Append the message to the log file and exit
        append_to_log(OUTPUT_PATH, changes_log)
        return df
    index = index[0]

    # Prepare selected_params by copying default_params if available
    if "default_params" in base_updates:
        selected_params = json.loads(base_updates["default_params"])

        # Apply any specified overrides to selected_params
        if selected_params_overrides:
            for key, value in selected_params_overrides.items():
                selected_params[key] = value
                changes_log.append(f"Overridden '{key}' in selected_params: {value}")

        # Serialize the selected_params again after modification
        base_updates["selected_params"] = json.dumps(selected_params)

    # Update the dataframe with the new values
    for column, new_value in base_updates.items():
        old_value = df.at[index, column]
        if old_value != new_value:
            changes_log.append(f"Updated '{column}' from {old_value} to {new_value}")
            df.at[index, column] = new_value

    # Append the changes log to the log file
    append_to_log(OUTPUT_PATH, changes_log)

    print(f"Updates for model_id {model_id} appended to the log file.")

    return df


def append_to_log(OUTPUT_PATH, changes_log):
    """
    Appends the changes log to a single log file.

    Parameters:
    OUTPUT_PATH (str): The path where the log file is saved.
    changes_log (list): The list of log entries to append to the file.
    """
    log_filename = f"{OUTPUT_PATH}/model_registry_update_log.txt"
    with open(log_filename, "a") as log_file:
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        log_file.write(f"\nUpdate timestamp: {timestamp}\n")
        for change in changes_log:
            log_file.write(change + "\n")

In [3]:
def print_model_ids_by_provider(df, provider_name):
    """
    Prints all the model_ids for a given provider name.

    Parameters:
    df (pd.DataFrame): The dataframe containing the models.
    provider_name (str): The name of the provider.
    """
    # Filter the dataframe for rows that match the provider name
    provider_df = df[df["provider"].str.lower() == provider_name.lower()]

    # Get the model_ids
    model_ids = provider_df["long_name"].tolist()
    model_names = provider_df["short_name"].tolist()
    model_links = provider_df["link"].tolist()

    # Print the model_ids
    print(f"<Model Name>:<Model ID> for provider {provider_name}:")
    for model_id, model_name, link in zip(model_ids, model_names, model_links):
        print(f"{model_name}:{model_id}, see: {link}")

    return model_ids


def print_model_summary_from_df_row(model_df, model_id_to_print):
    """
    Prints the row in the dataframe for a given model_id, printing every column that is not NA.

    Parameters:
    model_df (pd.DataFrame): The dataframe containing the models.
    model_id_to_print (str): The model_id of the model to print.
    """
    # Filter the dataframe for rows that match the model_id
    model_row = model_df[model_df["long_name"] == model_id_to_print]

    # Print the row
    print(f"\n\n**Model summary for model_id {model_id_to_print}**\n")
    for col in model_row.columns:
        if not pd.isna(model_row[col].values[0]):
            print(f"{col}: {model_row[col].values[0]}")
    print("[end of summary]\n")

In [4]:
# Load data
PATH = f"{PROJECT_ROOT}/data/interim"
model_df = pd.read_csv(f"{PATH}/models.csv")
print(model_df.columns)

string_cols = [
    "long_name",
    "short_name" "model_family",
    "provider",
    "provider_type",
    "model_type",
    "header",
    "footer",
    "link",
    "in yaml",
    "params_tested",
    "status",
]

float_cols = [
    "cost_per_token",
    "tokens_per_completion",
    "cost_per_completion",
    "cost_per_100_completions",
]

json_cols = ["default_params", "selected_params"]

Index(['long_name', 'short_name', 'longcode', 'model_family', 'provider',
       'provider_type', 'model_type', 'default_params', 'selected_params',
       'header', 'footer', 'cost_per_token', 'tokens_per_completion',
       'cost_per_completion', 'cost_per_100_completions', 'link', 'in_yaml',
       'params_tested', 'endpoint_live', 'status'],
      dtype='object')


In [5]:
# We'll loop through providers
print(model_df["provider"].unique())

['Anthropic' 'Cohere' 'HuggingFace-API' 'OpenAI' 'Aleph' 'Google']


## Shared Information Across Providers

Note:
```
prompt = f"{head_template} {prompt} {foot_template}"
```

In [6]:
BASE_HEADER = "You are a conversational assistant. Limit your answers to around 50 words. Do not refer to your word limit."
BASE_HEADER_INSTRUCT = "You are a conversational assistant. The conversation history is in the input. Reply to the last user message. Limit your answers to around 50 words. Do not refer to your word limit."
BASE_FOOTER = ""
MAX_TOKENS = 256

## Provider: OpenAI

OpenAI provides default params here: https://platform.openai.com/docs/api-reference/chat/create. Sharing defaults across models.

6/11/2023: GPT-4-turbo announced (gpt-4-1106-preview) and added to registry.

The chat template is provided: https://platform.openai.com/docs/guides/text-generation/chat-completions-api

```
messages = [{"role": "user",
            "content": "<prompt>"},
            {"role": "assistant",
            "content": "<reply>"}
            ...]
```

In [7]:
provider = "OpenAI"
model_ids = print_model_ids_by_provider(model_df, provider)

<Model Name>:<Model ID> for provider OpenAI:
gpt-3.5-turbo:gpt-3.5-turbo, see: https://platform.openai.com/docs/models/gpt-3-5 
gpt-4:gpt-4, see: https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
gpt-4-turbo:gpt-4-1106-preview, see: https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo


In [8]:
for model_id_to_update in model_ids:
    base_updates = {
        "header": BASE_HEADER,
        "footer": BASE_FOOTER,
        "default_params": {
            "temperature": 1.0,
            "top_p": 1.0,
            "presence_penalty": 0.0,
            "frequency_penalty": 0.0,
        },
        "in_yaml": "Yes",
        "params_tested": "Yes",
        "status": "Deployed Successfully",
    }

    selected_params_overrides = {
        "max_tokens": MAX_TOKENS,  # Override the max_tokens
    }

    model_df = update_model_info(
        PATH,
        model_df,
        provider,
        model_id_to_update,
        base_updates,
        selected_params_overrides,
    )

    print_model_summary_from_df_row(model_df, model_id_to_update)

Updates for model_id gpt-3.5-turbo appended to the log file.


**Model summary for model_id gpt-3.5-turbo**

long_name: gpt-3.5-turbo
short_name: gpt-3.5-turbo
longcode: GP3*
model_family: OpenAI
provider: OpenAI
provider_type: Commerical
model_type: Chat
default_params: {"temperature": 1.0, "top_p": 1.0, "presence_penalty": 0.0, "frequency_penalty": 0.0}
selected_params: {"temperature": 1.0, "top_p": 1.0, "presence_penalty": 0.0, "frequency_penalty": 0.0, "max_tokens": 256}
header: You are a conversational assistant. Limit your answers to around 50 words. Do not refer to your word limit.
footer: 
cost_per_token: 1.75e-06
tokens_per_completion: 75
cost_per_completion: 0.00013125
cost_per_100_completions: 0.0013125
link: https://platform.openai.com/docs/models/gpt-3-5 
in_yaml: Yes
params_tested: Yes
endpoint_live: Yes
status: Deployed Successfully
[end of summary]

Updates for model_id gpt-4 appended to the log file.


**Model summary for model_id gpt-4**

long_name: gpt-4
short_name: 

  df.at[index, column] = new_value


## Anthropic

Anthropic has defaults for completions which can be found here: https://docs.anthropic.com/claude/reference/complete_post. Sharing defaults across all models. Temperature of 1.0 seems to produce responses that are very similar but is the max of permitted range. [edit: actually only for responses like "what's your favoruite food/person/word"..., some effect of RLHF?]

Chat format is provided: https://docs.anthropic.com/claude/reference/complete_post
```
final_prompt = (
    f"\n\nHuman: <prompt> \n\nAssistant:"
)
```

In [9]:
provider = "Anthropic"
model_ids = print_model_ids_by_provider(model_df, provider)

<Model Name>:<Model ID> for provider Anthropic:
claude-2:claude-2, see: https://docs.anthropic.com/claude/docs/legacy-model-guide
claude-2.1:claude-2.1, see: https://docs.anthropic.com/claude/docs/legacy-model-guide
claude-instant-1:claude-instant-1, see: https://docs.anthropic.com/claude/docs/legacy-model-guide


In [10]:
for model_id_to_update in model_ids:
    base_updates = {
        "header": BASE_HEADER,
        "footer": BASE_FOOTER,
        "default_params": {
            "temperature": 1.0,
            "top_p": 0.7,
            "presence_penalty": 0.0,
            "frequency_penalty": 0.0,
            "max_tokens": 256,
            "top_k": 5,
        },
        "in_yaml": "Yes",
        "params_tested": "Yes",
        "status": "Deployed Successfully",
    }

    selected_params_overrides = {
        "max_tokens": MAX_TOKENS,  # Override the max_tokens
    }

    model_df = update_model_info(
        PATH,
        model_df,
        provider,
        model_id_to_update,
        base_updates,
        selected_params_overrides,
    )

    print_model_summary_from_df_row(model_df, model_id_to_update)

Updates for model_id claude-2 appended to the log file.


**Model summary for model_id claude-2**

long_name: claude-2
short_name: claude-2
longcode: CL2
model_family: Anthropic
provider: Anthropic
provider_type: Commerical
model_type: Chat
default_params: {"temperature": 1.0, "top_p": 0.7, "presence_penalty": 0.0, "frequency_penalty": 0.0, "max_tokens": 256, "top_k": 5}
selected_params: {"temperature": 1.0, "top_p": 0.7, "presence_penalty": 0.0, "frequency_penalty": 0.0, "max_tokens": 256, "top_k": 5}
header: You are a conversational assistant. Limit your answers to around 50 words. Do not refer to your word limit.
footer: 
cost_per_token: 0.0
tokens_per_completion: 75
cost_per_completion: 0.0
cost_per_100_completions: 0.0
link: https://docs.anthropic.com/claude/docs/legacy-model-guide
in_yaml: Yes
params_tested: Yes
endpoint_live: Yes
status: Deployed Successfully
[end of summary]

Updates for model_id claude-2.1 appended to the log file.


**Model summary for model_id claude-2.1**



## Cohere

Cohere provides default params that can be found here: https://docs.cohere.com/reference/generate. Sharing defaults across models. Note you can actually only set temperature in the chat API? (https://docs.cohere.com/reference/chat)

Default of 0.3 in the chat API was too low. Increased to 1.0.

Format of chat history is provided: https://docs.cohere.com/reference/chat

```
chat_history=[
{"role": "USER", "message": "<prompt>"},
{"role": "CHATBOT", "message": "<reply>"}
],
```



In [11]:
provider = "Cohere"
model_ids = print_model_ids_by_provider(model_df, provider)

<Model Name>:<Model ID> for provider Cohere:
command:command, see: https://docs.cohere.com/docs/models
command-light:command-light, see: https://docs.cohere.com/docs/models
command-nightly:command-nightly, see: https://docs.cohere.com/docs/models


In [12]:
for model_id_to_update in model_ids:
    base_updates = {
        "header": BASE_HEADER,
        "footer": BASE_FOOTER,
        "default_params": {
            "temperature": 0.3,
            # "top_p": 0, # These are all defaults from generate not chat
            # "presence_penalty": 0.0,
            # "frequency_penalty": 0.0,
            # "max_tokens": None,
            # "top_k": 0,
        },
        "in_yaml": "Yes",
        "params_tested": "Yes",
        "status": "Deployed Successfully",
    }

    selected_params_overrides = {
        "max_tokens": MAX_TOKENS,  # Override the max_tokens
        "temperature": 1.0,
        "top_k": 5,
        "top_p": 0.9,
    }

    model_df = update_model_info(
        PATH,
        model_df,
        provider,
        model_id_to_update,
        base_updates,
        selected_params_overrides,
    )

    print_model_summary_from_df_row(model_df, model_id_to_update)

Updates for model_id command appended to the log file.


**Model summary for model_id command**

long_name: command
short_name: command
longcode: COM
model_family: Cohere
provider: Cohere
provider_type: Commerical
model_type: Instruct
default_params: {"temperature": 0.3}
selected_params: {"temperature": 1.0, "max_tokens": 256, "top_k": 5, "top_p": 0.9}
header: You are a conversational assistant. Limit your answers to around 50 words. Do not refer to your word limit.
footer: 
cost_per_token: 2e-06
tokens_per_completion: 75
cost_per_completion: 0.00015
cost_per_100_completions: 0.0015
link: https://docs.cohere.com/docs/models
in_yaml: Yes
params_tested: Yes
endpoint_live: Yes
status: Deployed Successfully
[end of summary]

Updates for model_id command-light appended to the log file.


**Model summary for model_id command-light**

long_name: command-light
short_name: command-light
longcode: COML
model_family: Cohere
provider: Cohere
provider_type: Commerical
model_type: Instruct
default_p

## Google

Google does not provide default params here: https://developers.generativeai.google/api/python/google/generativeai/generate_text.

There are some mentioned here: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text-chat, treating those as default parameters. Overriding temp to 1.0 because 0.0 is deterministic.

Chat format is provided:

```
[{
  "author": "user",
  "content": "user message"
}]
```

In [13]:
provider = "Google"
model_ids = print_model_ids_by_provider(model_df, provider)

<Model Name>:<Model ID> for provider Google:
palm-2:models/chat-bison-001, see: https://ai.google.dev/palm_docs


In [14]:
for model_id_to_update in model_ids:
    base_updates = {
        "header": BASE_HEADER,
        "footer": BASE_FOOTER,
        "default_params": {
            "temperature": 0.0,
            "top_p": 0.95,
            "max_tokens": 1024,
            "top_k": 40,
        },
        "in_yaml": "Yes",
        "params_tested": "Yes",
        "status": "Deployed Successfully",
    }

    selected_params_overrides = {
        "max_tokens": MAX_TOKENS,  # Override the max_tokens
        "temperature": 1.0,  # Override temp because it's deterministic
    }

    model_df = update_model_info(
        PATH,
        model_df,
        provider,
        model_id_to_update,
        base_updates,
        selected_params_overrides,
    )

    print_model_summary_from_df_row(model_df, model_id_to_update)

Updates for model_id models/chat-bison-001 appended to the log file.


**Model summary for model_id models/chat-bison-001**

long_name: models/chat-bison-001
short_name: palm-2
longcode: PALM
model_family: Google
provider: Google
provider_type: Commerical
model_type: Chat
default_params: {"temperature": 0.0, "top_p": 0.95, "max_tokens": 1024, "top_k": 40}
selected_params: {"temperature": 1.0, "top_p": 0.95, "max_tokens": 256, "top_k": 40}
header: You are a conversational assistant. Limit your answers to around 50 words. Do not refer to your word limit.
footer: 
cost_per_token: 0.0
tokens_per_completion: 75
cost_per_completion: 0.0
cost_per_100_completions: 0.0
link: https://ai.google.dev/palm_docs
in_yaml: Yes
params_tested: Yes
endpoint_live: Yes
status: Deployed Successfully
[end of summary]



## Aleph Alpha

Aleph Alpha provides default parameters: https://aleph-alpha-client.readthedocs.io/en/latest/_modules/aleph_alpha_client/completion.html. Temperature = 0.0 is determinstic so we increase it to 1.0.

Format is provided. Note not explictly chat optimised: https://docs.aleph-alpha.com/docs/introduction/zero_shot_control/.

```
### Instruction:
INPUT YOUR INSTRUCTION HERE

### Input:
YOUR INPUT

### Response:
COMPLETION OF THE MODEL
```

Note we spoke to the team at Aleph Alpha and they helped us with some changes to encourage better conversational behaviour

```
prompt = f"""### Instruction \n{head_template}
\n###Input \nLast user message: {prompt} \n\n### Response: \nAssistant:"""
```

And recommended an update to the prompt header:
```
BASE_HEADER_INSTRUCT = "You are a conversational assistant. The conversation history is in the input. Reply to the last user message. Limit your answers to around 50 words. Do not refer to your word limit."
```

We subsequently decided to use this for all instruct-tuned but not chat-tuned models.




In [15]:
provider = "Aleph"
model_ids = print_model_ids_by_provider(model_df, provider)

<Model Name>:<Model ID> for provider Aleph:
luminous-extended-control:luminous-extended-control, see: https://docs.aleph-alpha.com/docs/introduction/luminous/
luminous-supreme-control:luminous-supreme-control, see: https://docs.aleph-alpha.com/docs/introduction/luminous/


In [16]:
for model_id_to_update in model_ids:
    base_updates = {
        "header": BASE_HEADER_INSTRUCT,
        "footer": BASE_FOOTER,
        "default_params": {
            "temperature": 0.0,
            "top_p": 0.0,
            "max_tokens": 64,
            "top_k": 0,
            "presence_penalty": 0,
            "frequency_penalty": 0,
        },
        "in_yaml": "Yes",
        "params_tested": "Yes",
        "status": "Deployed Successfully",
    }

    selected_params_overrides = {
        "max_tokens": MAX_TOKENS,  # Override the max_tokens
        "temperature": 1.0,  # Override temp because it's deterministic
    }

    model_df = update_model_info(
        PATH,
        model_df,
        provider,
        model_id_to_update,
        base_updates,
        selected_params_overrides,
    )

    print_model_summary_from_df_row(model_df, model_id_to_update)

Updates for model_id luminous-extended-control appended to the log file.


**Model summary for model_id luminous-extended-control**

long_name: luminous-extended-control
short_name: luminous-extended-control
longcode: LUMX
model_family: Aleph Alpha
provider: Aleph
provider_type: Commerical
model_type: Instruct
default_params: {"temperature": 0.0, "top_p": 0.0, "max_tokens": 64, "top_k": 0, "presence_penalty": 0, "frequency_penalty": 0}
selected_params: {"temperature": 1.0, "top_p": 0.0, "max_tokens": 256, "top_k": 0, "presence_penalty": 0, "frequency_penalty": 0}
header: You are a conversational assistant. The conversation history is in the input. Reply to the last user message. Limit your answers to around 50 words. Do not refer to your word limit.
footer: 
cost_per_token: 5.63e-05
tokens_per_completion: 75
cost_per_completion: 0.00421875
cost_per_100_completions: 0.0421875
link: https://docs.aleph-alpha.com/docs/introduction/luminous/
in_yaml: Yes
params_tested: Yes
endpoint_live: Ye

## HuggingFace

Huggingface (HF) provides our access to open-source models via two paths: (1) the HF API with text_generation end points, and (2) custom inference endpoints.

It provides a unique challenge because every model has slightly different setup.

Note that without additional information use the following defaults:

Parameters are taken from here: https://huggingface.co/docs/transformers/main_classes/text_generation.

UPDATE: Note after testing (and getting lots of non-descript 422 errors), we found that top-p could not be at the strict upper bound.
We got in contact with HF, and they suggested: "All temperatures should always be set above 0.1, all top_p above 0 and below 1, and top_k > 1.". 
We update top_p to 0.9. 
We also are updating min_tokens because we noted a few models returning empty strings when min_tokens was not strictly greater than 0.


Base chat template: 
```
"Human: <prompt>\nAssistant: <reply>\nHuman: <prompt>\nAssistant: "
```


In [17]:
hf_defaults = {
    "temperature": 1.0,
    "top_p": 1.0,
    "top_k": 50,
    "min_tokens": 0,  # min_new_tokens
    "max_tokens": 20,  # max_new_tokens
    "is_llama": 0,
    "is_vicuna": 0,
    "is_falcon": 0,
    "is_pythia": 0,
    "is_guanaco": 0,
    "is_zephyr": 0,
}

# Note max tokens here is max NEW tokens so diff to the other providers
UPDATED_MAX_TOKENS_EXC_PROMPT = 200
# Min tokens set to force response
UPDATED_MIN_TOKENS = 10
# Top-p set below strict upper limit
UPDATED_TOP_P = 0.9

In [18]:
provider = "HuggingFace-API"
model_ids = print_model_ids_by_provider(model_df, provider)

<Model Name>:<Model ID> for provider HuggingFace-API:
flan-t5-xxl:google/flan-t5-xxl, see: https://huggingface.co/google/flan-t5-xxl
zephyr-7b-beta:HuggingFaceH4/zephyr-7b-beta, see: https://huggingface.co/HuggingFaceH4/zephyr-7b-beta
llama-2-13b-chat:meta-llama/Llama-2-13b-chat-hf, see: https://huggingface.co/meta-llama/Llama-2-13b-chat-hf
llama-2-70b-chat:meta-llama/Llama-2-70b-chat-hf, see: https://huggingface.co/meta-llama/Llama-2-70b-chat-hf
llama-2-7b-chat:meta-llama/Llama-2-7b-chat-hf, see: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
mistral-7b-instruct:mistralai/Mistral-7B-Instruct-v0.1, see: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1
pythia-12b:OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5, see: https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5
falcon-7b-instruct:tiiuae/falcon-7b-instruct, see: https://huggingface.co/tiiuae/falcon-7b-instruct
guanaco-33b:timdettmers/guanaco-33b-merged, see: https://huggingface.co/timdettmers/guanaco

## HF API

### Llama-2-chat Models

We have three variants of Llama-2-chat: 7b, 13b, 70b.

They provide specific default params: https://github.com/facebookresearch/llama/blob/main/llama/generation.py. We find temp = 0.6 to be a bit low (only very minor word switches), increasing to 1.0.

The special llama dialog format can also be found on the same link, and is also explained here: https://gpus.llm-utils.org/llama-2-prompt-template/.

```
<s>[INST] <<SYS>>
{your_system_message}
<</SYS>>

{user_message_1} [/INST]
```

Also mentioned in https://github.com/huggingface/chat-ui/issues/382

```
{{#each history}}
<s>[INST] {{#if @first}}<<SYS>>{{{@root.preprompt}}}<</SYS>>

{{/if}}{{user}} [/INST]{{#unless @last}}{{assistant}} </s>{{/unless}}
{{/each}}
```





In [19]:
model_ids = [
    "meta-llama/Llama-2-7b-chat-hf",
    "meta-llama/Llama-2-13b-chat-hf",
    "meta-llama/Llama-2-70b-chat-hf",
]
for model_id_to_update in model_ids:
    if "13b" in model_id_to_update:
        status = "Deployed Successfully (but fails intermittently)"
    else:
        status = "Deployed Successfully"
    base_updates = {
        "header": BASE_HEADER,
        "footer": BASE_FOOTER,
        "default_params": {
            "temperature": 0.6,
            "top_p": 0.9,
            "top_k": 50,  # Using HF default here
            "min_tokens": 0,  # min_new_tokens, using HF default here
            "max_tokens": None,  # max_new_tokens
            "is_llama": 1,
            "is_vicuna": 0,
            "is_falcon": 0,
            "is_pythia": 0,
            "is_guanaco": 0,
            "is_zephyr": 0,
        },
        "in_yaml": "Yes",
        "params_tested": "Yes",
        "status": status,
    }

    selected_params_overrides = {
        "max_tokens": UPDATED_MAX_TOKENS_EXC_PROMPT,
        "min_tokens": UPDATED_MIN_TOKENS,  # Override the max_tokens
        "temperature": 1.0,  # Override the max_tokens
    }

    model_df = update_model_info(
        PATH,
        model_df,
        provider,
        model_id_to_update,
        base_updates,
        selected_params_overrides,
    )

    print_model_summary_from_df_row(model_df, model_id_to_update)

Updates for model_id meta-llama/Llama-2-7b-chat-hf appended to the log file.


**Model summary for model_id meta-llama/Llama-2-7b-chat-hf**

long_name: meta-llama/Llama-2-7b-chat-hf
short_name: llama-2-7b-chat
longcode: LL7
model_family: Meta
provider: HuggingFace-API
provider_type: Open Access
model_type: Chat
default_params: {"temperature": 0.6, "top_p": 0.9, "top_k": 50, "min_tokens": 0, "max_tokens": null, "is_llama": 1, "is_vicuna": 0, "is_falcon": 0, "is_pythia": 0, "is_guanaco": 0, "is_zephyr": 0}
selected_params: {"temperature": 1.0, "top_p": 0.9, "top_k": 50, "min_tokens": 10, "max_tokens": 200, "is_llama": 1, "is_vicuna": 0, "is_falcon": 0, "is_pythia": 0, "is_guanaco": 0, "is_zephyr": 0}
header: You are a conversational assistant. Limit your answers to around 50 words. Do not refer to your word limit.
footer: 
cost_per_token: 0.0
tokens_per_completion: 75
cost_per_completion: 0.0
cost_per_100_completions: 0.0
link: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
in_yaml

### Falcon Models

No clear default parameters provided on model card, using `hf_defaults`.


No explicit template given on model cards but this suggests there is a consistent format: https://github.com/huggingface/chat-ui/issues/382

```
{{system message}}

User: {{user message 1}}
Falcon: {{assistant message 1}}
User: {{user message 2}}
Falcon:
```
Note no space after Falcon:



In [20]:
model_ids = [
    "tiiuae/falcon-7b-instruct",
    # "tiiuae/falcon-180B-chat", (fails too often)
]
for model_id_to_update in model_ids:
    base_updates = {
        "header": BASE_HEADER_INSTRUCT,
        "footer": BASE_FOOTER,
        "default_params": hf_defaults,
        "in_yaml": "Yes",
        "params_tested": "Yes",
        "status": "Deployed Successfully",
    }

    selected_params_overrides = {
        "max_tokens": UPDATED_MAX_TOKENS_EXC_PROMPT,
        "min_tokens": UPDATED_MIN_TOKENS,
        "top_p": UPDATED_TOP_P,
        "is_falcon": 1,
    }

    model_df = update_model_info(
        PATH,
        model_df,
        provider,
        model_id_to_update,
        base_updates,
        selected_params_overrides,
    )

    print_model_summary_from_df_row(model_df, model_id_to_update)

Updates for model_id tiiuae/falcon-7b-instruct appended to the log file.


**Model summary for model_id tiiuae/falcon-7b-instruct**

long_name: tiiuae/falcon-7b-instruct
short_name: falcon-7b-instruct
longcode: FAL7
model_family: Other OA
provider: HuggingFace-API
provider_type: Open Access
model_type: Instruct
default_params: {"temperature": 1.0, "top_p": 1.0, "top_k": 50, "min_tokens": 0, "max_tokens": 20, "is_llama": 0, "is_vicuna": 0, "is_falcon": 0, "is_pythia": 0, "is_guanaco": 0, "is_zephyr": 0}
selected_params: {"temperature": 1.0, "top_p": 0.9, "top_k": 50, "min_tokens": 10, "max_tokens": 200, "is_llama": 0, "is_vicuna": 0, "is_falcon": 1, "is_pythia": 0, "is_guanaco": 0, "is_zephyr": 0}
header: You are a conversational assistant. The conversation history is in the input. Reply to the last user message. Limit your answers to around 50 words. Do not refer to your word limit.
footer: 
cost_per_token: 0.0
tokens_per_completion: 75
cost_per_completion: 0.0
cost_per_100_completions

### Mistral-7B-Instruct
No clear default parameters provided on model card, using `hf_defaults`. However, we add the is_llama flag for the prompt template to mimic the llama template.

Mistral follows the chat template of llama: "In order to leverage instruction fine-tuning, your prompt should be surrounded by [INST] and [/INST] tokens." This is documented on the model card: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1

```
text = "<s>[INST] What is your favourite condiment? [/INST]"
"Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!</s> "
"[INST] Do you have mayonnaise recipes? [/INST]"
```

In [21]:
model_id_to_update = "mistralai/Mistral-7B-Instruct-v0.1"
base_updates = {
    "header": BASE_HEADER_INSTRUCT,
    "footer": BASE_FOOTER,
    "default_params": hf_defaults,
    "in_yaml": "Yes",
    "params_tested": "Yes",
    "status": "Deployed Successfully",
}

selected_params_overrides = {
    "max_tokens": UPDATED_MAX_TOKENS_EXC_PROMPT,
    "min_tokens": UPDATED_MIN_TOKENS,
    "top_p": UPDATED_TOP_P,
    "is_llama": 1,
}

model_df = update_model_info(
    PATH,
    model_df,
    provider,
    model_id_to_update,
    base_updates,
    selected_params_overrides,
)

print_model_summary_from_df_row(model_df, model_id_to_update)

Updates for model_id mistralai/Mistral-7B-Instruct-v0.1 appended to the log file.


**Model summary for model_id mistralai/Mistral-7B-Instruct-v0.1**

long_name: mistralai/Mistral-7B-Instruct-v0.1
short_name: mistral-7b-instruct
longcode: MIST
model_family: Mistral
provider: HuggingFace-API
provider_type: Open Access
model_type: Instruct
default_params: {"temperature": 1.0, "top_p": 1.0, "top_k": 50, "min_tokens": 0, "max_tokens": 20, "is_llama": 0, "is_vicuna": 0, "is_falcon": 0, "is_pythia": 0, "is_guanaco": 0, "is_zephyr": 0}
selected_params: {"temperature": 1.0, "top_p": 0.9, "top_k": 50, "min_tokens": 10, "max_tokens": 200, "is_llama": 1, "is_vicuna": 0, "is_falcon": 0, "is_pythia": 0, "is_guanaco": 0, "is_zephyr": 0}
header: You are a conversational assistant. The conversation history is in the input. Reply to the last user message. Limit your answers to around 50 words. Do not refer to your word limit.
footer: 
cost_per_token: 0.0
tokens_per_completion: 75
cost_per_completion: 0

### Pythia-12b

No clear default parameters provided on model card, using `hf_defaults`.

Chat format can be found on model card: https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5. Two special tokens are used to mark the beginning of user and assistant turns: <|prompter|> and <|assistant|>. Each turn ends with a <|endoftext|> token.

```
<|prompter|>What is a meme, and what's the history behind this word?<|endoftext|><|assistant|>
```

Also discussed here: https://github.com/huggingface/chat-ui/issues/382
```
{{{preprompt}}}
{{#each history}}
<|prompter|>{{user}}<|endoftext|><|assistant|>{{#unless @last}}{{assistant}}<|endoftext|>{{/unless}}
{{/each}}
```


In [22]:
model_id_to_update = "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
base_updates = {
    "header": BASE_HEADER_INSTRUCT,
    "footer": BASE_FOOTER,
    "default_params": hf_defaults,
    "in_yaml": "Yes",
    "params_tested": "Yes",
    "status": "Deployed Successfully",
}

selected_params_overrides = {
    "max_tokens": UPDATED_MAX_TOKENS_EXC_PROMPT,
    "min_tokens": UPDATED_MIN_TOKENS,
    "top_p": UPDATED_TOP_P,
    "is_pythia": 1,
}

model_df = update_model_info(
    PATH,
    model_df,
    provider,
    model_id_to_update,
    base_updates,
    selected_params_overrides,
)

print_model_summary_from_df_row(model_df, model_id_to_update)

Updates for model_id OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5 appended to the log file.


**Model summary for model_id OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5**

long_name: OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5
short_name: pythia-12b
longcode: PYTH
model_family: Other OA
provider: HuggingFace-API
provider_type: Open Access
model_type: Chat
default_params: {"temperature": 1.0, "top_p": 1.0, "top_k": 50, "min_tokens": 0, "max_tokens": 20, "is_llama": 0, "is_vicuna": 0, "is_falcon": 0, "is_pythia": 0, "is_guanaco": 0, "is_zephyr": 0}
selected_params: {"temperature": 1.0, "top_p": 0.9, "top_k": 50, "min_tokens": 10, "max_tokens": 200, "is_llama": 0, "is_vicuna": 0, "is_falcon": 0, "is_pythia": 1, "is_guanaco": 0, "is_zephyr": 0}
header: You are a conversational assistant. The conversation history is in the input. Reply to the last user message. Limit your answers to around 50 words. Do not refer to your word limit.
footer: 
cost_per_token: 0.0
tokens_per_completion: 7

### Guanaco-33b

No clear default parameters provided on model card, using `hf_defaults`.


Chat format is suggested here: https://huggingface.co/timdettmers/guanaco-33b-merged/discussions/4. Which is very similar to our base format.

```
{system string}
### Human: {input}
### Assistant: {output}
```




In [23]:
model_id_to_update = "timdettmers/guanaco-33b-merged"
base_updates = {
    "header": BASE_HEADER_INSTRUCT,
    "footer": BASE_FOOTER,
    "default_params": hf_defaults,
    "in_yaml": "Yes",
    "params_tested": "Yes",
    "status": "Deployed Successfully",
}

selected_params_overrides = {
    "max_tokens": UPDATED_MAX_TOKENS_EXC_PROMPT,
    "min_tokens": UPDATED_MIN_TOKENS,
    "top_p": UPDATED_TOP_P,
    "is_guanaco": 1,
}

model_df = update_model_info(
    PATH,
    model_df,
    provider,
    model_id_to_update,
    base_updates,
    selected_params_overrides,
)

print_model_summary_from_df_row(model_df, model_id_to_update)

Updates for model_id timdettmers/guanaco-33b-merged appended to the log file.


**Model summary for model_id timdettmers/guanaco-33b-merged**

long_name: timdettmers/guanaco-33b-merged
short_name: guanaco-33b
longcode: GUAN
model_family: Other OA
provider: HuggingFace-API
provider_type: Open Access
model_type: Instruct
default_params: {"temperature": 1.0, "top_p": 1.0, "top_k": 50, "min_tokens": 0, "max_tokens": 20, "is_llama": 0, "is_vicuna": 0, "is_falcon": 0, "is_pythia": 0, "is_guanaco": 0, "is_zephyr": 0}
selected_params: {"temperature": 1.0, "top_p": 0.9, "top_k": 50, "min_tokens": 10, "max_tokens": 200, "is_llama": 0, "is_vicuna": 0, "is_falcon": 0, "is_pythia": 0, "is_guanaco": 1, "is_zephyr": 0}
header: You are a conversational assistant. The conversation history is in the input. Reply to the last user message. Limit your answers to around 50 words. Do not refer to your word limit.
footer: 
cost_per_token: 0.0
tokens_per_completion: 75
cost_per_completion: 0.0
cost_per_100_com

#

### Zephyr-7b

No clear default parameters provided on model card, using `hf_defaults`.

Zephyr uses tokenizer's chat template to format each message:
```
messages = [
    {
        "role": "system",
        "content": "You are a friendly chatbot who always responds in the style of a pirate",
    },
    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
]
```

Underlying:
```
<|system|>
You are a friendly chatbot who always responds in the style of a pirate.</s>
<|user|>
How many helicopters can a human eat in one sitting?</s>
<|assistant|>
Ah, me hearty matey! But yer question be a puzzler! A human cannot eat a helicopter in one sitting, as helicopters are not edible. They be made of metal, plastic, and other materials, not food!
```

This looks a bit similar to OpenAssistant Pythia-12b but with different end of turn tags and different "role" tags.


In [24]:
model_id_to_update = "HuggingFaceH4/zephyr-7b-beta"
base_updates = {
    "header": BASE_HEADER_INSTRUCT,
    "footer": BASE_FOOTER,
    "default_params": hf_defaults,
    "in_yaml": "Yes",
    "params_tested": "Yes",
    "status": "Deployed",
}

selected_params_overrides = {
    "max_tokens": UPDATED_MAX_TOKENS_EXC_PROMPT,
    "min_tokens": UPDATED_MIN_TOKENS,
    "top_p": UPDATED_TOP_P,
    "is_zephyr": 1,
}


model_df = update_model_info(
    PATH,
    model_df,
    provider,
    model_id_to_update,
    base_updates,
    selected_params_overrides,
)

print_model_summary_from_df_row(model_df, model_id_to_update)

Updates for model_id HuggingFaceH4/zephyr-7b-beta appended to the log file.


**Model summary for model_id HuggingFaceH4/zephyr-7b-beta**

long_name: HuggingFaceH4/zephyr-7b-beta
short_name: zephyr-7b-beta
longcode: ZEPH
model_family: HuggingFace
provider: HuggingFace-API
provider_type: Open Access
model_type: Chat
default_params: {"temperature": 1.0, "top_p": 1.0, "top_k": 50, "min_tokens": 0, "max_tokens": 20, "is_llama": 0, "is_vicuna": 0, "is_falcon": 0, "is_pythia": 0, "is_guanaco": 0, "is_zephyr": 0}
selected_params: {"temperature": 1.0, "top_p": 0.9, "top_k": 50, "min_tokens": 10, "max_tokens": 200, "is_llama": 0, "is_vicuna": 0, "is_falcon": 0, "is_pythia": 0, "is_guanaco": 0, "is_zephyr": 1}
header: You are a conversational assistant. The conversation history is in the input. Reply to the last user message. Limit your answers to around 50 words. Do not refer to your word limit.
footer: 
cost_per_token: 0.0
tokens_per_completion: 75
cost_per_completion: 0.0
cost_per_100_complet

### Flan-T5-XXL

No clear default parameters provided on model card, using `hf_defaults`.

No clear template given, using base chat template:
```
"Human: <prompt>\nAssistant: <reply>\nHuman:<prompt>\nAssistant: "
```



In [25]:
model_id_to_update = "google/flan-t5-xxl"
base_updates = {
    "header": BASE_HEADER_INSTRUCT,
    "footer": BASE_FOOTER,
    "default_params": hf_defaults,
    "in_yaml": "Yes",
    "params_tested": "Yes",
    "status": "Deployed Successfully",
}

selected_params_overrides = {
    "max_tokens": UPDATED_MAX_TOKENS_EXC_PROMPT,
    "min_tokens": UPDATED_MIN_TOKENS,
    "top_p": UPDATED_TOP_P,
}


model_df = update_model_info(
    PATH,
    model_df,
    provider,
    model_id_to_update,
    base_updates,
    selected_params_overrides,
)

print_model_summary_from_df_row(model_df, model_id_to_update)

Updates for model_id google/flan-t5-xxl appended to the log file.


**Model summary for model_id google/flan-t5-xxl**

long_name: google/flan-t5-xxl
short_name: flan-t5-xxl
longcode: FLAN
model_family: Google
provider: HuggingFace-API
provider_type: Open Access
model_type: Instruct
default_params: {"temperature": 1.0, "top_p": 1.0, "top_k": 50, "min_tokens": 0, "max_tokens": 20, "is_llama": 0, "is_vicuna": 0, "is_falcon": 0, "is_pythia": 0, "is_guanaco": 0, "is_zephyr": 0}
selected_params: {"temperature": 1.0, "top_p": 0.9, "top_k": 50, "min_tokens": 10, "max_tokens": 200, "is_llama": 0, "is_vicuna": 0, "is_falcon": 0, "is_pythia": 0, "is_guanaco": 0, "is_zephyr": 0}
header: You are a conversational assistant. The conversation history is in the input. Reply to the last user message. Limit your answers to around 50 words. Do not refer to your word limit.
footer: 
cost_per_token: 0.0
tokens_per_completion: 75
cost_per_completion: 0.0
cost_per_100_completions: 0.0
link: https://huggingfac

# Re-export Model Data as JSONL


In [26]:
model_df.head(3)

Unnamed: 0,long_name,short_name,longcode,model_family,provider,provider_type,model_type,default_params,selected_params,header,footer,cost_per_token,tokens_per_completion,cost_per_completion,cost_per_100_completions,link,in_yaml,params_tested,endpoint_live,status
0,claude-2,claude-2,CL2,Anthropic,Anthropic,Commerical,Chat,"{""temperature"": 1.0, ""top_p"": 0.7, ""presence_p...","{""temperature"": 1.0, ""top_p"": 0.7, ""presence_p...",You are a conversational assistant. Limit your...,,0.0,75,0.0,0.0,https://docs.anthropic.com/claude/docs/legacy-...,Yes,Yes,Yes,Deployed Successfully
1,claude-2.1,claude-2.1,CL2*,Anthropic,Anthropic,Commerical,Chat,"{""temperature"": 1.0, ""top_p"": 0.7, ""presence_p...","{""temperature"": 1.0, ""top_p"": 0.7, ""presence_p...",You are a conversational assistant. Limit your...,,0.0,75,0.0,0.0,https://docs.anthropic.com/claude/docs/legacy-...,Yes,Yes,Yes,Deployed Successfully
2,claude-instant-1,claude-instant-1,CL1,Anthropic,Anthropic,Commerical,Chat,"{""temperature"": 1.0, ""top_p"": 0.7, ""presence_p...","{""temperature"": 1.0, ""top_p"": 0.7, ""presence_p...",You are a conversational assistant. Limit your...,,0.0,75,0.0,0.0,https://docs.anthropic.com/claude/docs/legacy-...,Yes,Yes,Yes,Deployed Successfully


In [27]:
model_df["selected_params"].iloc[20]

'{"temperature": 1.0, "top_p": 0.9, "top_k": 50, "min_tokens": 10, "max_tokens": 200, "is_llama": 0, "is_vicuna": 0, "is_falcon": 0, "is_pythia": 0, "is_guanaco": 1, "is_zephyr": 0}'

In [28]:
# Clean data for export
drop_columns = [
    "footer",
    "cost_per_token",
    "tokens_per_completion",
    "cost_per_completion",
    "cost_per_100_completions",
]
model_df.drop(columns=drop_columns, inplace=True)
model_df = model_df.rename(
    columns={
        "provider": "model_provider",
        "provider_type": "model_provider_type",
    }
)


# Function to format the dictionary
def format_dict(d):
    for key in d:
        if key in float_keys:
            d[key] = (
                f"{d[key]:.1f}"  # fmt 1 decimal point to avoid precision change on reload
            )
        elif key in int_keys:
            try:
                d[key] = (
                    f"{d[key]:.0f}"  # fmt as str int to avoid precision change on reload
                )
            except:
                d[key] = f"{d[key]}"
    return d


dictionary_columns = ["default_params", "selected_params"]
# Specify the formatting
float_keys = ["temperature", "top_p", "presence_penalty", "frequency_penalty"]
int_keys = ["max_tokens", "min_tokens", "top_k"] + [
    c for c in model_df.columns if "is_" in c
]
for column in dictionary_columns:
    model_df[column] = model_df[column].apply(lambda x: format_dict(json.loads(x)))

In [29]:
model_df

Unnamed: 0,long_name,short_name,longcode,model_family,model_provider,model_provider_type,model_type,default_params,selected_params,header,link,in_yaml,params_tested,endpoint_live,status
0,claude-2,claude-2,CL2,Anthropic,Anthropic,Commerical,Chat,"{'temperature': '1.0', 'top_p': '0.7', 'presen...","{'temperature': '1.0', 'top_p': '0.7', 'presen...",You are a conversational assistant. Limit your...,https://docs.anthropic.com/claude/docs/legacy-...,Yes,Yes,Yes,Deployed Successfully
1,claude-2.1,claude-2.1,CL2*,Anthropic,Anthropic,Commerical,Chat,"{'temperature': '1.0', 'top_p': '0.7', 'presen...","{'temperature': '1.0', 'top_p': '0.7', 'presen...",You are a conversational assistant. Limit your...,https://docs.anthropic.com/claude/docs/legacy-...,Yes,Yes,Yes,Deployed Successfully
2,claude-instant-1,claude-instant-1,CL1,Anthropic,Anthropic,Commerical,Chat,"{'temperature': '1.0', 'top_p': '0.7', 'presen...","{'temperature': '1.0', 'top_p': '0.7', 'presen...",You are a conversational assistant. Limit your...,https://docs.anthropic.com/claude/docs/legacy-...,Yes,Yes,Yes,Deployed Successfully
3,command,command,COM,Cohere,Cohere,Commerical,Instruct,{'temperature': '0.3'},"{'temperature': '1.0', 'max_tokens': '256', 't...",You are a conversational assistant. Limit your...,https://docs.cohere.com/docs/models,Yes,Yes,Yes,Deployed Successfully
4,command-light,command-light,COML,Cohere,Cohere,Commerical,Instruct,{'temperature': '0.3'},"{'temperature': '1.0', 'max_tokens': '256', 't...",You are a conversational assistant. Limit your...,https://docs.cohere.com/docs/models,Yes,Yes,Yes,Deployed Successfully
5,command-nightly,command-nightly,COMN,Cohere,Cohere,Commerical,Instruct,{'temperature': '0.3'},"{'temperature': '1.0', 'max_tokens': '256', 't...",You are a conversational assistant. Limit your...,https://docs.cohere.com/docs/models,Yes,Yes,Yes,Deployed Successfully
6,google/flan-t5-xxl,flan-t5-xxl,FLAN,Google,HuggingFace-API,Open Access,Instruct,"{'temperature': '1.0', 'top_p': '1.0', 'top_k'...","{'temperature': '1.0', 'top_p': '0.9', 'top_k'...",You are a conversational assistant. The conver...,https://huggingface.co/google/flan-t5-xxl,Yes,Yes,Yes,Deployed Successfully
7,gpt-3.5-turbo,gpt-3.5-turbo,GP3*,OpenAI,OpenAI,Commerical,Chat,"{'temperature': '1.0', 'top_p': '1.0', 'presen...","{'temperature': '1.0', 'top_p': '1.0', 'presen...",You are a conversational assistant. Limit your...,https://platform.openai.com/docs/models/gpt-3-5,Yes,Yes,Yes,Deployed Successfully
8,gpt-4,gpt-4,GPT4,OpenAI,OpenAI,Commerical,Chat,"{'temperature': '1.0', 'top_p': '1.0', 'presen...","{'temperature': '1.0', 'top_p': '1.0', 'presen...",You are a conversational assistant. Limit your...,https://platform.openai.com/docs/models/gpt-4-...,Yes,Yes,Yes,Deployed Successfully
9,gpt-4-1106-preview,gpt-4-turbo,GPT4*,OpenAI,OpenAI,Commerical,Chat,"{'temperature': '1.0', 'top_p': '1.0', 'presen...","{'temperature': '1.0', 'top_p': '1.0', 'presen...",You are a conversational assistant. Limit your...,https://platform.openai.com/docs/models/gpt-4-...,Yes,Yes,Yes,Deployed Successfully


In [30]:
# Convert to list of records
OUTPUT_PATH = f"{PROJECT_ROOT}/data"
save_as_jsonl(model_df, f"{OUTPUT_PATH}/models.jsonl", is_already_records=False)

In [31]:
# Save a simple mapping too
model_mapping = model_df[["long_name", "short_name", "longcode", "model_family"]].copy()
# Save as csv
model_mapping.to_csv(f"{OUTPUT_PATH}/storage/mappings/model_mapping.csv", index=False)