<a href="https://colab.research.google.com/github/Deji01/LLMs/blob/main/LLM_Experiment_Tracking_with_Weights_%26_Biases.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Introduction

In [1]:
%pip install --upgrade openai==0.27.2 tiktoken wandb -qq

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.1/70.1 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m196.4/196.4 kB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m254.1/254.1 kB[0m [31m15.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
llmx 0.0.15a0 requires cohere, which is not installed.[0m[31m
[0m

In [2]:
import os
import openai
import tiktoken
import wandb
from pprint import pprint
from getpass import getpass
from wandb.integration.openai import autolog

In [4]:
if os.getenv("OPENAI_API_KEY") is None:
  if any(['VSCODE' in x for x in os.environ.keys()]):
    print('Please enter password in the VS Code prompt at the top of your VS Code window!')
  os.environ["OPENAI_API_KEY"] = getpass("Paste your OpenAI key from: https://platform.openai.com/account/api-keys\n")
  openai.api_key = os.getenv("OPENAI_API_KEY", "")

assert os.getenv("OPENAI_API_KEY", "").startswith("sk-"), "This doesn't look like a valid OpenAI API key"
print("OpenAI API key configured")

Paste your OpenAI key from: https://platform.openai.com/account/api-keys
··········
OpenAI API key configured


In [5]:
# start logging to W&B
autolog({"project":"llmapps", "job_type": "introduction"})

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


### Tokenization

In [6]:
encoding = tiktoken.encoding_for_model("text-davinci-003")
enc = encoding.encode("Weights & Biases is awesome!")
print(enc)
print(encoding.decode(enc))

[1135, 2337, 1222, 8436, 1386, 318, 7427, 0]
Weights & Biases is awesome!


In [7]:
for token_id in enc:
    print(f"{token_id}\t{encoding.decode([token_id])}")

1135	We
2337	ights
1222	 &
8436	 Bi
1386	ases
318	 is
7427	 awesome
0	!


### Sampling

In [12]:
def generate_with_temperature(temp):
  "Generate text with a given temperature, higher temperature means more randomness"
  response = openai.Completion.create(
    model="davinci-002",
    prompt="Say something about Weights & Biases",
    max_tokens=50,
    temperature=temp,
  )
  return response.choices[0].text

In [19]:
for temp in [0, 0.5, 1]:
  pprint(f'TEMP: {temp}, GENERATION: {generate_with_temperature(temp)}')

('TEMP: 0, GENERATION: \n'
 '\n'
 'We are a team of 30+ people building a platform to help data scientists and '
 'machine learning engineers build, share, and deploy their models. We are a '
 'remote-first company with team members in 10+ countries. We are backed by Y')
('TEMP: 0.5, GENERATION: \n'
 '\n'
 "I'm a co-founder of W&B and I'm a big fan of the project. I think it's a "
 'really powerful tool for data scientists and engineers to be able to track '
 "their experiments and share them with others. It's also a great")
('TEMP: 1, GENERATION:  here.\n'
 '\n'
 '— — —\n'
 '\n'
 'And thank you to Julie Fisher for editing!')


In [21]:
def generate_with_topp(topp):
  "Generate text with a given top-p, higher top-p means more randomness"
  response = openai.Completion.create(
    model="davinci-002",
    prompt="Say something about Weights & Biases",
    max_tokens=50,
    top_p=topp,
    )
  return response.choices[0].text

In [22]:
for topp in [0.1, 0.5, 1]:
  pprint(f'TOP_P: {topp}, GENERATION: {generate_with_topp(topp)}')

('TOP_P: 0.1, GENERATION: \n'
 '\n'
 'We are a team of 30+ people building a platform to help data scientists and '
 'machine learning engineers build, share, and deploy their models. We are a '
 'remote-first company with team members in 10+ countries. We are backed by Y')
('TOP_P: 0.5, GENERATION:  here. Say something about Weights & Biases here. '
 'Say something about Weights & Biases here. Say something about Weights & '
 'Biases here. Say something about Weights & Biases here. Say something about '
 'Weights & Biases')
('TOP_P: 1, GENERATION: \n'
 '\n'
 'Works with all platforms (including your own)\n'
 '\n'
 'Discover and demo-tialise deeper AI insights')


### Chat API

In [23]:
MODEL = "gpt-3.5-turbo"
response = openai.ChatCompletion.create(
    model=MODEL,
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Say something about Weights & Biases"},
    ],
    temperature=0,
)

response

<OpenAIObject chat.completion id=chatcmpl-8hhkugXoRt9y75QWy4vpJVVVo6y4Z at 0x7aea9d2a8400> JSON: {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "message": {
        "content": "Weights & Biases is a powerful tool for machine learning experimentation and collaboration. It provides a platform to track and visualize experiments, making it easier to understand and iterate on models. With features like hyperparameter sweeps and experiment comparison, it helps researchers and data scientists optimize their models and make informed decisions. Additionally, Weights & Biases offers integrations with popular machine learning frameworks, making it seamless to incorporate into existing workflows.",
        "role": "assistant"
      }
    }
  ],
  "created": 1705426992,
  "id": "chatcmpl-8hhkugXoRt9y75QWy4vpJVVVo6y4Z",
  "model": "gpt-3.5-turbo-0613",
  "object": "chat.completion",
  "system_fingerprint": null,
  "usage": {
    "completion_toke

In [24]:
pprint(response.choices[0].message.content)

('Weights & Biases is a powerful tool for machine learning experimentation and '
 'collaboration. It provides a platform to track and visualize experiments, '
 'making it easier to understand and iterate on models. With features like '
 'hyperparameter sweeps and experiment comparison, it helps researchers and '
 'data scientists optimize their models and make informed decisions. '
 'Additionally, Weights & Biases offers integrations with popular machine '
 'learning frameworks, making it seamless to incorporate into existing '
 'workflows.')


In [25]:
wandb.finish()

VBox(children=(Label(value='0.039 MB of 0.039 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
usage/completion_tokens,▅▂▃▅▅▁▅▅▅▅▂▅▅▂█
usage/elapsed_time,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
usage/prompt_tokens,▁▁▁▁▁▁▁▁▁▁▁▁▁▁█
usage/total_tokens,▄▁▂▄▄▁▄▄▄▄▂▄▄▂█

0,1
usage/completion_tokens,84.0
usage/elapsed_time,0.0
usage/prompt_tokens,25.0
usage/total_tokens,109.0


## Generation

In [26]:
%pip install -Uqqq rich openai==0.27.2 tiktoken wandb tenacity

In [27]:
import os
import random

import openai
import tiktoken

from pathlib import Path
from pprint import pprint
from getpass import getpass

from rich.markdown import Markdown
import pandas as pd
from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential, # for exponential backoff
)
import wandb
from wandb.integration.openai import autolog

In [28]:
# Download files on colab
if not Path("examples.txt").exists():
    !wget https://raw.githubusercontent.com/wandb/edu/main/llm-apps-course/notebooks/{examples,prompt_template,system_template}.txt

--2024-01-16 17:55:04--  https://raw.githubusercontent.com/wandb/edu/main/llm-apps-course/notebooks/examples.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 40595 (40K) [text/plain]
Saving to: ‘examples.txt’


2024-01-16 17:55:04 (11.7 MB/s) - ‘examples.txt’ saved [40595/40595]

--2024-01-16 17:55:04--  https://raw.githubusercontent.com/wandb/edu/main/llm-apps-course/notebooks/prompt_template.txt
Reusing existing connection to raw.githubusercontent.com:443.
HTTP request sent, awaiting response... 200 OK
Length: 1054 (1.0K) [text/plain]
Saving to: ‘prompt_template.txt’


2024-01-16 17:55:04 (72.6 MB/s) - ‘prompt_template.txt’ saved [1054/1054]

--2024-01-16 17:55:04--  https://raw.githubusercontent.com/wandb/edu/main/llm-apps-course/notebooks/system_templ

In [29]:
# start logging to W&B
autolog({"project":"llmapps", "job_type": "generation"})

[34m[1mwandb[0m: Currently logged in as: [33mayodejiyou[0m. Use [1m`wandb login --relogin`[0m to force relogin


### Genrating synthetic support questions

In [30]:
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def completion_with_backoff(**kwargs):
    return openai.ChatCompletion.create(**kwargs)

In [31]:
MODEL_NAME = "gpt-3.5-turbo"

In [32]:
system_prompt = "You are a helpful assistant."
user_prompt = "Generate a support question from a W&B user"

def generate_and_print(system_prompt, user_prompt, n=5):
    messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ]
    responses = completion_with_backoff(
        model=MODEL_NAME,
        messages=messages,
        n = n,
        )
    for response in responses.choices:
        generation = response.message.content
        display(Markdown(generation))

generate_and_print(system_prompt, user_prompt)

### Fewshot

In [33]:
# Test if examples.txt is present, download if not
if not Path("examples.txt").exists():
    !wget https://raw.githubusercontent.com/wandb/edu/main/llm-apps-course/notebooks/examples.txt

In [34]:
delimiter = "\t" # tab separated queries
with open("examples.txt", "r") as file:
    data = file.read()
    real_queries = data.split(delimiter)

pprint(f"We have {len(real_queries)} real queries:")
Markdown(f"Sample one: \n\"{random.choice(real_queries)}\"")

'We have 228 real queries:'


In [35]:
def generate_few_shot_prompt(queries, n=3):
    prompt = "Generate a support question from a W&B user\n" +\
        "Below you will find a few examples of real user queries:\n"
    for _ in range(n):
        prompt += random.choice(queries) + "\n"
    prompt += "Let's start!"
    return prompt

generation_prompt = generate_few_shot_prompt(real_queries)
Markdown(generation_prompt)

In [36]:
generate_and_print(system_prompt, user_prompt=generation_prompt)

### Add Context & Response
Let's create a function to find all the markdown files in a directory and return it's content and path


In [37]:
# check if directory exists, if not, create it and download the files, e.g if running in colab
if not os.path.exists("../docs_sample/"):
  !git clone https://github.com/wandb/edu.git
  !cp -r edu/llm-apps-course/docs_sample ../

Cloning into 'edu'...
remote: Enumerating objects: 2739, done.[K
remote: Counting objects: 100% (808/808), done.[K
remote: Compressing objects: 100% (252/252), done.[K
remote: Total 2739 (delta 667), reused 578 (delta 554), pack-reused 1931[K
Receiving objects: 100% (2739/2739), 22.79 MiB | 15.25 MiB/s, done.
Resolving deltas: 100% (1573/1573), done.


In [38]:
def find_md_files(directory):
    "Find all markdown files in a directory and return their content and path"
    md_files = []
    for file in Path(directory).rglob("*.md"):
        with open(file, 'r', encoding='utf-8') as md_file:
            content = md_file.read()
        md_files.append((file.relative_to(directory), content))
    return md_files

documents = find_md_files('../docs_sample/')
len(documents)

11

In [40]:
tokenizer = tiktoken.encoding_for_model(MODEL_NAME)
tokens_per_document = [len(tokenizer.encode(document)) for _, document in documents]
pprint(tokens_per_document)

[4179, 2596, 803, 1644, 365, 956, 2529, 2093, 1206, 2940, 537]


In [41]:
# extract a random chunk from a document
def extract_random_chunk(document, max_tokens=512):
    tokens = tokenizer.encode(document)
    if len(tokens) <= max_tokens:
        return document
    start = random.randint(0, len(tokens) - max_tokens)
    end = start + max_tokens
    return tokenizer.decode(tokens[start:end])

In [42]:
def generate_context_prompt(chunk):
    prompt = "Generate a support question from a W&B user\n" +\
        "The question should be answerable by provided fragment of W&B documentation.\n" +\
        "Below you will find a fragment of W&B documentation:\n" +\
        chunk + "\n" +\
        "Let's start!"
    return prompt

chunk = extract_random_chunk(documents[0][1])
generation_prompt = generate_context_prompt(chunk)

In [43]:
Markdown(generation_prompt)

In [44]:
generate_and_print(system_prompt, generation_prompt, n=3)

In [45]:
# read system_template.txt file into an f-string
with open("system_template.txt", "r") as file:
    system_prompt = file.read()

In [46]:
Markdown(system_prompt)

In [47]:
# read prompt_template.txt file into an f-string
with open("prompt_template.txt", "r") as file:
    prompt_template = file.read()

In [48]:
Markdown(prompt_template)

In [49]:
def generate_context_prompt(chunk, n_questions=3):
    questions = '\n'.join(random.sample(real_queries, n_questions))
    user_prompt = prompt_template.format(QUESTIONS=questions, CHUNK=chunk)
    return user_prompt

user_prompt = generate_context_prompt(chunk)

In [51]:
Markdown(user_prompt)

In [52]:
def generate_questions(documents, n_questions=3, n_generations=5):
    questions = []
    for _, document in documents:
        chunk = extract_random_chunk(document)
        user_prompt = generate_context_prompt(chunk, n_questions)
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ]
        response = completion_with_backoff(
            model=MODEL_NAME,
            messages=messages,
            n = n_generations,
            )
        questions.extend([response.choices[i].message.content for i in range(n_generations)])
    return questions

In [53]:
# function to parse model generation and extract CONTEXT, QUESTION and ANSWER
def parse_generation(generation):
    lines = generation.split("\n")
    context = []
    question = []
    answer = []
    flag = None

    for line in lines:
        if "CONTEXT:" in line:
            flag = "context"
            line = line.replace("CONTEXT:", "").strip()
        elif "QUESTION:" in line:
            flag = "question"
            line = line.replace("QUESTION:", "").strip()
        elif "ANSWER:" in line:
            flag = "answer"
            line = line.replace("ANSWER:", "").strip()

        if flag == "context":
            context.append(line)
        elif flag == "question":
            question.append(line)
        elif flag == "answer":
            answer.append(line)

    context = "\n".join(context)
    question = "\n".join(question)
    answer = "\n".join(answer)
    return context, question, answer

In [54]:
generations = generate_questions([documents[0]], n_questions=3, n_generations=5)
parse_generation(generations[0])

('The user is working on a deep learning project and is using the PyTorch Lightning library along with Weights & Biases for experiment tracking and visualization. They have implemented a custom PyTorch Lightning module and want guidance on how to log metrics during the training and validation steps.\n',
 'How can I log the training loss and accuracy metrics during my PyTorch Lightning training step using Weights & Biases?\n',
 "To log the training loss and accuracy metrics during your PyTorch Lightning training step using Weights & Biases, you can use the `self.log()` method. Inside the `training_step()` method of your LightningModule, you can call `self.log('train_loss', loss)` to log the training loss and `self.log('train_accuracy', acc)` to log the training accuracy. These metrics will be automatically logged by Weights & Biases and visualized in your W&B dashboard.")

In [55]:
parsed_generations = []
generations = generate_questions(documents, n_questions=3, n_generations=5)
for generation in generations:
    context, question, answer = parse_generation(generation)
    parsed_generations.append({"context": context, "question": question, "answer": answer})

# let's convert parsed_generations to a pandas dataframe and save it locally
df = pd.DataFrame(parsed_generations)
df.to_csv('generated_examples.csv', index=False)

# log df as a table to W&B for interactive exploration
wandb.log({"generated_examples": wandb.Table(dataframe=df)})

# log csv file as an artifact to W&B for later use
artifact = wandb.Artifact("generated_examples", type="dataset")
artifact.add_file("generated_examples.csv")
wandb.log_artifact(artifact)

<Artifact generated_examples>

In [56]:
wandb.finish()

VBox(children=(Label(value='0.910 MB of 0.910 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
usage/completion_tokens,▁▁▂▆█▇▇█▆▇▆▄▆▅▆
usage/elapsed_time,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
usage/prompt_tokens,▁▂▄▆▇▆▆▆▅▆▅██▆▆
usage/total_tokens,▁▂▃▆█▇▇▇▆▇▆▇█▆▇

0,1
usage/completion_tokens,1138.0
usage/elapsed_time,0.0
usage/prompt_tokens,1064.0
usage/total_tokens,2202.0


## Retrieval

In [66]:
%pip install -Uqqq rich openai==0.27.2 tiktoken wandb langchain unstructured tabulate pdf2image chromadb

In [58]:
import os, random
from pathlib import Path
import tiktoken
from getpass import getpass
from rich.markdown import Markdown

In [59]:
# we need a single line of code to start tracing langchain with W&B
os.environ["LANGCHAIN_WANDB_TRACING"] = "true"

# wandb documentation to configure wandb using env variables
# https://docs.wandb.ai/guides/track/advanced/environment-variables
# here we are configuring the wandb project name
os.environ["WANDB_PROJECT"] = "llmapps"

### Parsing Documents

In [60]:
from langchain.document_loaders import DirectoryLoader

def find_md_files(directory):
    "Find all markdown files in a directory and return a LangChain Document"
    dl = DirectoryLoader(directory, "**/*.md")
    return dl.load()

documents = find_md_files('edu/llm-apps-course/docs_sample/')
len(documents)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


11

In [61]:
# We will need to count tokens in the documents, and for that we need the tokenizer
tokenizer = tiktoken.encoding_for_model(MODEL_NAME)

In [62]:
# function to count the number of tokens in each document
def count_tokens(documents):
    token_counts = [len(tokenizer.encode(document.page_content)) for document in documents]
    return token_counts

count_tokens(documents)

[2116, 1901, 626, 1023, 288, 650, 2209, 1499, 954, 2062, 342]

In [63]:
from langchain.text_splitter import MarkdownTextSplitter

md_text_splitter = MarkdownTextSplitter(chunk_size=1000)
document_sections = md_text_splitter.split_documents(documents)
len(document_sections), max(count_tokens(document_sections))

(88, 382)

In [64]:
Markdown(document_sections[0].page_content)

### Embeddings

In [79]:
!ls

edu	      generated_examples.csv  sample_data	   wandb
examples.txt  prompt_template.txt     system_template.txt


In [83]:
!pip install chromadb -q

In [86]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma

# We will use the OpenAIEmbeddings to embed the text, and Chroma to store the vectors
embeddings = OpenAIEmbeddings()
db = Chroma.from_documents(document_sections, embeddings)

In [None]:
retriever = db.as_retriever(search_kwargs=dict(k=3))

In [None]:
query = "How can I share my W&B report with my team members in a public W&B project?"
docs = retriever.get_relevant_documents(query)

In [None]:
# Let's see the results
for doc in docs:
    print(doc.metadata["source"])

In [None]:
from langchain.prompts import PromptTemplate

prompt_template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Helpful Answer:"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

context = "\n\n".join([doc.page_content for doc in docs])
prompt = PROMPT.format(context=context, question=query)

In [None]:
from langchain.llms import OpenAI

llm = OpenAI()
response = llm.predict(prompt)
Markdown(response)

In [None]:
from langchain.chains import RetrievalQA

qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=retriever)
result = qa.run(query)

Markdown(result)

In [92]:
wandb.finish()