This Jupyter Notebook allows various tests of the llm_demo sandbox

In [1]:
import tests

In-context learning using device:  cpu
Fine-tuning using device:  cpu


In [None]:
test_ftparams = tests.Test_ft_params()
test_ftparams.setUp()
test_ftparams.test()

In [None]:
# finetuning entire models

# test finetuing all parameters of bert-tiny with k=1
%python3 main.py --task run_ft --model bert-tiny --dataset amazon --k 1

# test finetuning multiple models with a range of k values, can take several hours on cpu
%python3 main.py --task run_ft --model bert-tiny,bert-med --dataset amazon --k 1,8,128

# plot finetuning results
%python3 main.py --task plot_ft --model bert-tiny,bert-med --dataset amazon --k 1,8,128

In [None]:
ft_test = tests.Test_ft()
ft_test.setUp()
ft_test.test_ft_modes()

In [None]:
# test a minimal training loop to check for gradient tracking
mode = "last"
ft_test.test_gradient_tracking(mode)


In [None]:
test_icl = tests.Test_icl()
test_icl.setUp()
#test_icl.test_customPrompt_format()
#test_icl.test_do_sample()
test_icl.test_allPrompts_format()

In [None]:
# evaluate in context learning on the bAbI dataset; gpt2-xl k=16 can take hours
# choose yes when asked if wish to run the custom code
%python3 main.py --task run_icl --model gpt2-med,gpt2-xl --dataset babi --k 0,1,16

# plot icl results
%python3 main.py --task plot_icl --model gpt2-med,gpt2-xl --dataset babi --k 0,1,16

In [None]:
# test in context learning on the XSum dataset; can take hours on cpu
%python3 main.py --task run_icl --model gpt2-med,gpt2-xl --dataset xsum --k 0,1,4 --prompt none,tldr,custom

# plot icl results
%python3 main.py --task plot_icl --model gpt2-med,gpt2-xl --dataset xsum --k 0,1,4 --prompt none,tldr,custom

Test Multimodal Llama 3.2 Models

In [2]:
import warnings
warnings.filterwarnings('ignore')

from utilsLlama import load_env,llama32,llama31
load_env()

messages = [
  {"role": "user",
    "content": "Who wrote the book Charlotte's Web?"}
]

response_32 = llama32(messages, 90)
print(response_32)

Exception: {'message': 'Invalid API key provided. You can find your API key at https://api.together.xyz/settings/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}

In [None]:
import os
from getpass import getpass

# Enter REPLICATE API TOKEN to run inference
REPLICATE_API_TOKEN = getpass(prompt="Enter REPLICATE API TOKEN: ")
os.environ["REPLICATE_API_TOKEN"] = REPLICATE_API_TOKEN

In [None]:
import utilsLlama

testLlama = tests.Test_llama()

# basic QA
prompt = "The typical color of a llama is: "
output = testLlama.llama3_70b(prompt)
utilsLlama.md(output)

# single turn chat
prompt_chat = "What is the average lifespan of a Llama? Answer the question in few words."
output = testLlama.llama3_70b(prompt_chat)
utilsLlama.md(output)

# multi turn chat, with storing previous context
prompt_chat = """
User: What is the average lifespan of a Llama?
Assistant: 15-20 years.
User: What animal family are they?
"""
output = testLlama.llama3_70b(prompt_chat)
utilsLlama.md(output)

# multi-turn chat, with storing previous context and more instructions
prompt_chat = """
User: What is the average lifespan of a Llama?
Assistant: Sure! The average lifespan of a llama is around 20-30 years.
User: What animal family are they?

Answer the question with one word.
"""
output = testLlama.llama3_70b(prompt_chat)
utilsLlama.md(output)

In [None]:
# Prompt engineering examples
# Zero-shot example. To get positive/negative/neutral sentiment, we need to give examples in the prompt
prompt = '''
Classify: I saw a Gecko.
Sentiment: ?

Give one word response.
'''
output = testLlama.llama3_70b(prompt)
utilsLlama.md(output)

# By giving examples to Llama, it understands the expected output format.
prompt = '''
Classify: I love Llamas!
Sentiment: Positive
Classify: I dont like Snakes.
Sentiment: Negative
Classify: I saw a Gecko.
Sentiment:

Give one word response.
'''
output = testLlama.llama3_70b(prompt)
utilsLlama.md(output)

In [None]:
# Chain of thought examples
# Standard prompting
prompt = '''
Llama started with 5 tennis balls. It buys 2 more cans of tennis balls. Each can has 3 tennis balls.
How many tennis balls does Llama have?

Answer in one word.
'''
output = testLlama.llama3_70b(prompt)
utilsLlama.md(output)

# By default, Llama 3 models follow "Chain-Of-Thought" prompting
prompt = '''
Llama started with 5 tennis balls. It buys 2 more cans of tennis balls. Each can has 3 tennis balls.
How many tennis balls does Llama have?
'''
output = testLlama.llama3_70b(prompt)
utilsLlama.md(output)

# Chain of thought prompting with word problem
prompt = """
15 of us want to go to a restaurant.
Two of them have cars
Each car can seat 5 people.
Two of us have motorcycles.
Each motorcycle can fit 2 people.
Can we all get to the restaurant by car or motorcycle?
Think step by step.
Provide the answer as a single yes/no answer first.
Then explain each intermediate step.
"""
output = testLlama.llama3_70b(prompt)
utilsLlama.md(output)

In [None]:
# Retrieval Augmented Generation (RAG) using LangChain
%pip install langchain
%pip install langchain-community
%pip install sentence-transformers
%pip install faiss-cpu
%pip install bs4
%pip install langchain-groq

####  LangChain Q&A Retriever
* ConversationalRetrievalChain
* Query the Source documents

In [None]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
import bs4

# Step 1: Load the document from a web url
loader = WebBaseLoader(["https://huggingface.co/blog/llama31"])
documents = loader.load()

# Step 2: Split the document into chunks with a specified chunk size
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
all_splits = text_splitter.split_documents(documents)

# Step 3: Store the document into a vector store with a specific embedding model
vectorstore = FAISS.from_documents(all_splits, HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2"))

First sign in at [Groq](https://console.groq.com/login) with your github or gmail account, then get an API token to try Groq out for free.

In [None]:
import os
from getpass import getpass

GROQ_API_TOKEN = getpass(prompt="Enter GROQ API TOKEN: ")
os.environ["GROQ_API_KEY"] = GROQ_API_TOKEN

In [None]:
from langchain_groq import ChatGroq
llm = ChatGroq(temperature=0, model_name="llama3-8b-8192")

In [None]:
from langchain.chains import ConversationalRetrievalChain

# Query against your own data
chain = ConversationalRetrievalChain.from_llm(llm,vectorstore.as_retriever(),return_source_documents=True)

# no chat history passed
result = chain({"question": "What’s new with Llama 3?", "chat_history": []})
utilsLlama.md(result['answer'])


In [None]:
# This time your previous question and answer will be included as a chat history which will enable the ability
# to ask follow up questions.
query = "What two sizes?"
chat_history = [(query, result["answer"])]
result = chain({"question": query, "chat_history": chat_history})
utilsLlama.md(result['answer'])