In [1]:
import os
import openai
import tiktoken
import wandb
from pprint import pprint
from getpass import getpass
from wandb.integration.openai import autolog

# Set OpenAI API key 

To get key, click on [link](https://platform.openai.com/account/api-keys).

In [2]:
# Set OpenAI API key environment variable
if os.getenv("OPENAI_API_KEY") is None:
  if any(['VSCODE' in x for x in os.environ.keys()]):
    print('Please enter password in the VS Code prompt at the top of your VS Code window!')
  os.environ["OPENAI_API_KEY"] = getpass("Paste your OpenAI key from: https://platform.openai.com/account/api-keys\n")
  openai.api_key = os.getenv("OPENAI_API_KEY", "")

assert os.getenv("OPENAI_API_KEY", "").startswith("sk-"), "This doesn't look like a valid OpenAI API key"
print("OpenAI API key configured")

Please enter password in the VS Code prompt at the top of your VS Code window!
OpenAI API key configured


# Start W&B logging

autolog - convenient function for logging OpenAI results to W&B

In [4]:
autolog({"project":"llmapps", "job_type": "introduction"})

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33md-oliver-cort[0m ([33mdoc93[0m). Use [1m`wandb login --relogin`[0m to force relogin


# Tokenisation

OpenAI has tiktoken library - used to tokenise text and decode tokenised text

In [5]:
# Example of tokanisation (encode and decode)
# - here we use the tokeniser for the text-davinci-003 model
# - different model may need a different tokeniser
encoding = tiktoken.encoding_for_model("text-davinci-003")
enc = encoding.encode("Weights & Biases is awesome!")
print(enc)
print(encoding.decode(enc))

[1135, 2337, 1222, 8436, 1386, 318, 7427, 0]
Weights & Biases is awesome!


In [6]:
# Decode tokens one by one
# - most natural way of splitting words into tokens would be to use the spaces
# - however, to contain the size of the vocabulary, some words split into sub-words (units)
for token_id in enc:
    print(f"{token_id}\t{encoding.decode([token_id])}")

1135	We
2337	ights
1222	 &
8436	 Bi
1386	ases
318	 is
7427	 awesome
0	!


# Sampling

### Sampling with Temperature

Let's sample some text from the model by passing the **temperature** parameter. 

In [7]:
# Create a function that passes temperature parameter to the OpenAI completion function (API)
def generate_with_temperature(temp):
  "Generate text with a given temperature, higher temperature means more randomness"
  response = openai.Completion.create(
    model="text-davinci-003",
    prompt="Say something about Weights & Biases",
    max_tokens=50,
    temperature=temp,
  )
  return response.choices[0].text

In [None]:
# Generate text with different temperature values
# - with temperatures > 1, the generated text becomes too gibberish (cause more low probability tokens get sampled)
for temp in [0, 0.5, 1, 1.5, 2]:
  pprint(f'TEMP: {temp}, GENERATION: {generate_with_temperature(temp)}')

### Top p sampling

Can also use the [`top_p` parameter](https://platform.openai.com/docs/api-reference/completions/create#completions/create-top_p) to control the diversity of the generated text. 
- This parameter controls the cumulative probability of the next token. 
- For example, if `top_p=0.9`, the model will pick the next token from the top 90% most likely tokens. 
- The higher the `top_p` the more likely the model will pick a token that it hasn't seen before. 
- Decreasing `top_p` results in higher probability text being generated.
- You should only use one of `temperature` or `top_p` at a given time.

In [9]:
def generate_with_topp(topp):
  "Generate text with a given top-p, higher top-p means more randomness"
  response = openai.Completion.create(
    model="text-davinci-003",
    prompt="Say something about Weights & Biases",
    max_tokens=50,
    top_p=topp,
    )
  return response.choices[0].text

In [None]:
for topp in [0.01, 0.1, 0.5, 1]:
  pprint(f'TOP_P: {topp}, GENERATION: {generate_with_topp(topp)}')

# Chat API

- The Chat API (using `gpt-3.5-turbo`) looks a bit different to the one above (used with the `davinci-003` model).
  
- The `gpt-3.5-turbo` model is faster and cheaper than `davinci-003`
  
- Instead of a prompt, this API takes a list of messages.
  - Messages come with different roles (system or user) and corresponding prompt
  - `system-role` gives some control over the model's response (below we steer the model to adhere to a certain behaviour)


- See [OpenAI ref](https://platform.openai.com/docs/guides/gpt)

In [None]:
MODEL = "gpt-3.5-turbo"
response = openai.ChatCompletion.create(
    model=MODEL,
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Say something about Weights & Biases"},
    ],
    temperature=0,
)

# The response is a JSON object with relevant information about the request.
response

In [None]:
pprint(response.choices[0].message.content)

Finish wandb run (see [Promps docs](https://docs.wandb.ai/guides/prompts))

In [None]:
wandb.finish()