Note: Responses from local models can be quite slow, especially with 8-bit quantization.

With 4bit quantization, `HuggingFaceH4/zephyr-7b-beta` uses about 8GB of VRAM and spiked to 14GB of RAM when loading the model, then settled around 5GB. I used a T4 instance for this notebook.

In [1]:
!pip install llama-index transformers accelerate bitsandbytes

Collecting llama-index
  Downloading llama_index-0.8.53.post3-py3-none-any.whl (794 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m794.6/794.6 kB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting transformers
  Downloading transformers-4.34.1-py3-none-any.whl (7.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m114.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-0.24.0-py3-none-any.whl (260 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.0/261.0 kB[0m [31m32.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bitsandbytes
  Downloading bitsandbytes-0.41.1-py3-none-any.whl (92.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
Collecting aiostream<0.6.0,>=0.5.2 (from llama-index)
  Downloading aiostream-0.5.2-py3-none-any.whl (39 kB)
Collecting dataclasses-json<0.6.0,>=0.5.7 (from ll

## Setup

### Data

In [2]:
from llama_index.readers import BeautifulSoupWebReader

url = "https://www.theverge.com/2023/9/29/23895675/ai-bot-social-network-openai-meta-chatbots"

documents = BeautifulSoupWebReader().load_data([url])

### LLM

This should run on a T4 instance on the free tier

In [8]:
import torch
from transformers import BitsAndBytesConfig
from llama_index.prompts import PromptTemplate
from llama_index.llms import HuggingFaceLLM

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)


def messages_to_prompt(messages):
  prompt = ""
  for message in messages:
    if message.role == 'system':
      prompt += f"<|system|>\n{message.content}</s>\n"
    elif message.role == 'user':
      prompt += f"<|user|>\n{message.content}</s>\n"
    elif message.role == 'assistant':
      prompt += f"<|assistant|>\n{message.content}</s>\n"

  # ensure we start with a system prompt, insert blank if needed
  if not prompt.startswith("<|system|>\n"):
    prompt = "<|system|>\n</s>\n" + prompt

  # add final assistant prompt
  prompt = prompt + "<|assistant|>\n"

  return prompt


llm = HuggingFaceLLM(
    model_name="HuggingFaceH4/zephyr-7b-beta",
    tokenizer_name="HuggingFaceH4/zephyr-7b-beta",
    query_wrapper_prompt=PromptTemplate("<|system|>\n</s>\n<|user|>\n{query_str}</s>\n<|assistant|>\n"),
    context_window=3900,
    max_new_tokens=256,
    model_kwargs={"quantization_config": quantization_config},
    # tokenizer_kwargs={},
    generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
    messages_to_prompt=messages_to_prompt,
    device_map="auto",
)

Downloading (…)lve/main/config.json:   0%|          | 0.00/643 [00:00<?, ?B/s]

Downloading (…)fetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading (…)of-00008.safetensors:   0%|          | 0.00/1.89G [00:00<?, ?B/s]

Downloading (…)of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

Downloading (…)of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

Downloading (…)of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

Downloading (…)of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

Downloading (…)of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

Downloading (…)of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

Downloading (…)of-00008.safetensors:   0%|          | 0.00/816M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/1.43k [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

Downloading (…)in/added_tokens.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/168 [00:00<?, ?B/s]

In [9]:
from llama_index import ServiceContext

service_context = ServiceContext.from_defaults(llm=llm, embed_model="local:BAAI/bge-small-en-v1.5")

Downloading (…)lve/main/config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/134M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/394 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

[nltk_data] Downloading package punkt to /tmp/llama_index...
[nltk_data]   Unzipping tokenizers/punkt.zip.


### Index Setup

In [10]:
from llama_index import VectorStoreIndex

vector_index = VectorStoreIndex.from_documents(documents, service_context=service_context)

In [11]:
from llama_index import SummaryIndex

summary_index = SummaryIndex.from_documents(documents, service_context=service_context)

### Helpful Imports / Logging

In [12]:
from llama_index.response.notebook_utils import display_response

In [13]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

## Basic Query Engine

### Compact (default)

In [14]:
query_engine = vector_index.as_query_engine(response_mode="compact")

response = query_engine.query("How do OpenAI and Meta differ on AI tools?")

display_response(response)



**`Final Response:`** In the given context, OpenAI and Meta differ in their approach to using AI tools. OpenAI tends to present its products as productivity tools, while Meta is building LLMs for entertainment purposes. OpenAI's latest updates for ChatGPT, such as the ability to interact via voice and upload images, make the tool more useful and powerful, but it still generates dry and sterile text. Meta, on the other hand, has revealed 28 personality-driven chatbots for its messaging apps, featuring celebrities like Charli D’Amelio, Dwyane Wade, and Paris Hilton. These characters come with brief descriptions and are intended to offer companionship, coaching, tutoring, or therapy. While both companies are using AI tools, OpenAI's focus is on productivity, while Meta's is on entertainment and companionship.

### Refine

In [15]:
query_engine = vector_index.as_query_engine(response_mode="refine")

response = query_engine.query("How do OpenAI and Meta differ on AI tools?")

display_response(response)

**`Final Response:`** In terms of their approach to AI tools, OpenAI and Meta differ in their focus and intended use cases. OpenAI's latest updates for ChatGPT, such as the addition of voice and image capabilities, are presented as ways to make the tool more useful and powerful for practical applications. In contrast, Meta is building LLMs for entertainment purposes, as evidenced by the 28 personality-driven chatbots it revealed for use in its messaging apps. While both companies are exploring the potential of LLMs, OpenAI's focus seems to be more on practical applications, while Meta is exploring the entertainment value of AI. This difference in focus is reflected in the way they present their AI tools - OpenAI as productivity tools, and Meta as a source of entertainment.

### Tree Summarize

In [26]:
query_engine = vector_index.as_query_engine(response_mode="tree_summarize")

response = query_engine.query("How do OpenAI and Meta differ on AI tools?")

display_response(response)



**`Final Response:`** OpenAI and Meta both are developing AI tools, but they differ in their approach and intended use cases. OpenAI presents its AI products as productivity tools, while Meta is focusing on entertainment and social networking applications. OpenAI's latest updates for ChatGPT, such as voice and image capabilities, are aimed at making the tool more useful and engaging, while Meta is building AI characters and chatbots for its messaging apps, including celebrities like Snoop Dogg and MrBeast. OpenAI's AI tools are designed to help users get things done, while Meta's AI characters aim to provide companionship, coaching, and entertainment. Both companies are exploring the potential of AI in social networking and feeds, but the nature and impact of these AI tools on users' experiences are yet to be fully understood.

## Router Query Engine

In [28]:
from llama_index.tools import QueryEngineTool, ToolMetadata

vector_tool = QueryEngineTool(
    vector_index.as_query_engine(),
    metadata=ToolMetadata(
        name="vector_search",
        description="Useful for searching for specific facts."
    )
)

summary_tool = QueryEngineTool(
    summary_index.as_query_engine(response_mode="tree_summarize"),
    metadata=ToolMetadata(
        name="summary",
        description="Useful for summarizing an entire document."
    )
)

### Single Selector

In [29]:
from llama_index.query_engine import RouterQueryEngine

query_engine = RouterQueryEngine.from_defaults(
    [vector_tool, summary_tool],
    service_context=service_context,
    select_multi=False
)

response = query_engine.query("What was mentioned about Meta?")

display_response(response)



**`Final Response:`** Meta, a tech company, is building LLMs (large language models) and has revealed the creation of 28 personality-driven chatbots based on the personalities of celebrities such as Charli D’Amelio, Dwyane Wade, Kendall Jenner, MrBeast, Snoop Dogg, Tom Brady, and Paris Hilton. These chatbots will have personalities, such as MrBeast's Zach being billed as "the big brother who will roast you — because he cares." Meta plans to place these AI characters on every major surface of its products, including Facebook pages and Instagram accounts, and feeds may become partially synthetic social networks. The article also mentions that the technology is new enough that celebrities are not yet entrusting their entire personas to Meta for safekeeping, and it is unclear how much novelty value these chatbots will have. Nonetheless, the potential for these chatbots is significant, and it is suggested that they may have more than passing novelty value.

### Multi Selector

In [30]:
from llama_index.query_engine import RouterQueryEngine

query_engine = RouterQueryEngine.from_defaults(
    [vector_tool, summary_tool],
    service_context=service_context,
    select_multi=True,
)

response = query_engine.query("What was mentioned about Meta? Summarize with any other companies mentioned in the entire document.")

display_response(response)

**`Final Response:`** In the given context, it is mentioned that Meta (formerly Facebook) is building LLMs (large language models) and has unveiled 28 personality-driven chatbots for its messaging apps, featuring celebrities such as Charli D’Amelio, Dwyane Wade, and Paris Hilton. This could potentially serve as an intermediate step towards fully synthetic versions of celebrities. Other companies mentioned in the article include OpenAI, which has updated its AI language model ChatGPT with voice and image capabilities, and YouTube, which may introduce an official way to create AI-generated content. Anthropic, a company that distributes song lyrics through AI, has been sued by Universal Music for AI-generated song lyrics. Google is also mentioned for its AI-powered voice assistants. Disney delayed the release of its live-action Snow White due to the dwarfs' CGI. The article suggests that AI is rapidly advancing and could significantly impact various industries, including entertainment, social networking, and finance.

## SubQuestion Query Engine

In [31]:
from llama_index.tools import QueryEngineTool, ToolMetadata

vector_tool = QueryEngineTool(
    vector_index.as_query_engine(),
    metadata=ToolMetadata(
        name="vector_search",
        description="Useful for searching for specific facts."
    )
)

summary_tool = QueryEngineTool(
    summary_index.as_query_engine(response_mode="tree_summarize"),
    metadata=ToolMetadata(
        name="summary",
        description="Useful for summarizing an entire document."
    )
)

In [32]:
import nest_asyncio
nest_asyncio.apply()

In [33]:
from llama_index.query_engine import SubQuestionQueryEngine

query_engine = SubQuestionQueryEngine.from_defaults(
    [vector_tool, summary_tool],
    service_context=service_context,
    verbose=True,
)

response = query_engine.query("What was mentioned about Meta? How Does it differ from how OpenAI is talked about?")

display_response(response)

Generated 5 sub questions.
[1;3;38;2;237;90;200m[vector_search] Q: What information is provided about Meta in the given document?
[0m[1;3;38;2;237;90;200m[vector_search] A: The given document provides information about Meta's efforts in developing artificial intelligence and voices. It mentions that Meta is building LLMs (large language models) and has revealed 28 personality-driven chatbots to be used in their messaging apps. Celebrities such as Charli D’Amelio, Dwyane Wade, Kendall Jenner, MrBeast, Snoop Dogg, Tom Brady, and Paris Hilton have lent their voices to these chatbots. The document also mentions that Meta is in the entertainment business and is using generative AI and voices for this purpose. However, the document suggests that these character bots may have passing novelty value and that celebrities are not yet entrusting their entire personas to Meta for safekeeping. The document also mentions that Meta plans to place its AI characters on every major surface of its prod

**`Final Response:`** In the given document, Meta is discussed in the context of its efforts to develop artificial intelligence and voices for entertainment purposes. The company is building large language models (LLMs) and has revealed 28 personality-driven chatbots, voiced by celebrities like Snoop Dogg and Charli D’Amelio, for use in their messaging apps. Meta is positioning itself as being in the entertainment business and using generative AI and voices for this purpose. The document suggests that these character bots may have passing novelty value, and celebrities are not yet entrusting their entire personas to Meta for safekeeping. The article also mentions that Meta plans to place its AI characters on every major surface of its products, including Facebook pages and Instagram accounts, which could change the nature of social feeds and engagement.

On the other hand, the discussion about OpenAI in the given document is focused on its latest updates for ChatGPT, including the addition of voice and image capabilities. OpenAI is presenting its products as productivity tools, and the article mentions that while LLMs have potential uses in entertainment, OpenAI's CEO, Sam Altman, has expressed concerns about the potential misuse of LLMs and

## SQL Query Engine

Here, we download and use a sample SQLite database with 11 tables, with various info about music, playlists, and customers. We will limit to a select few tables for this test.

In [34]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [35]:
!curl https://www.sqlitetutorial.net/wp-content/uploads/2018/03/chinook.zip -O /content/chinook.zip
!unzip /content/chinook.zip

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100  298k  100  298k    0     0  2290k      0 --:--:-- --:--:-- --:--:-- 2295k
curl: (3) URL using bad/illegal format or missing URL
Archive:  /content/chinook.zip
  inflating: chinook.db              


In [36]:
from sqlalchemy import create_engine, MetaData, Table, Column, String, Integer, select, column

engine = create_engine("sqlite:////content/chinook.db")

In [38]:
conn = engine.connect()

In [37]:
from llama_index import SQLDatabase

sql_database = SQLDatabase(engine)

In [41]:
from llama_index.indices.struct_store import NLSQLTableQueryEngine

query_engine = NLSQLTableQueryEngine(
    sql_database=sql_database,
    tables=["albums", "tracks", "artists"],
    service_context=service_context
)

In [42]:
response = query_engine.query("What are some albums? Limit to 5.")

display_response(response)



**`Final Response:`** Based on the SQL query results, some albums with their corresponding track names, durations, and prices are:

1. "Battlestar Galactica: The Story So Far" with a total duration of 2622250 milliseconds (approximately 43 minutes and 42 seconds) and a price of $1.99.
2. "Occupation / Precipice" with a total duration of 5286953 milliseconds (approximately 88 minutes and 23 seconds) and a price of $1.99.
3. "Exodus, Pt. 1" with a total duration of 2621708 milliseconds (approximately 43 minutes and 48 seconds) and a price of $1.99.
4. "Exodus, Pt. 2" with a total duration of 2618000 milliseconds (approximately 43 minutes and 48 seconds) and a price of $1.99.
5. "Collaborators" with a total duration of 26266

In [43]:
response = query_engine.query("What are some artists? Limit it to 5.")

display_response(response)

**`Final Response:`** Based on the SQL query results, some popular artists across different genres include those in the Rock genre with 1297 tracks, followed by Latin with 579 tracks, Metal with 374 tracks, Alternative & Punk with 332 tracks, and Jazz with 130 tracks. These genres and their respective track counts provide insight into the current trends and preferences in the music industry, and offer a diverse range of artists to explore.

In [44]:
print(response.metadata['sql_query'])

SELECT genres.Name, COUNT(tracks.GenreId) AS track_count
FROM genres
JOIN tracks ON genres.GenreId = tracks.GenreId
GROUP BY genres.GenreId
ORDER BY track_count DESC
LIMIT 5;


This last query should be a more complex join

In [47]:
response = query_engine.query("What are some tracks from the artist AC/DC? Limit it to 3 and output the result in bullet points.")

display_response(response)

**`Final Response:`** - "For Those About To Rock (We Salute You)"
- "Put The Finger On You"
- "Let's Get It Up" (These are three popular tracks by the legendary rock band AC/DC, as retrieved from our database.)

In [48]:
print(response.metadata['sql_query'])

SELECT tracks.Name FROM tracks JOIN albums ON tracks.AlbumId = albums.AlbumId JOIN artists ON albums.ArtistId = artists.ArtistId WHERE artists.Name = 'AC/DC' LIMIT 3;


## Programs

Depending the LLM, you will have to test with either `OpenAIPydanticProgram` or `LLMTextCompletionProgram`

In [49]:
from typing import List
from pydantic import BaseModel

from llama_index.program import OpenAIPydanticProgram, LLMTextCompletionProgram

class Song(BaseModel):
    """Data model for a song."""

    title: str
    length_seconds: int


class Album(BaseModel):
    """Data model for an album."""

    name: str
    artist: str
    songs: List[Song]

In [50]:
from llama_index.output_parsers import PydanticOutputParser

prompt_template_str = """\
Generate an example album, with an artist and a list of songs. \
Using the movie {movie_name} as inspiration.\
"""
program = LLMTextCompletionProgram.from_defaults(
    output_parser=PydanticOutputParser(Album),
    prompt_template_str=prompt_template_str,
    llm=llm,
    verbose=True,
)

In [51]:
output = program(movie_name="The Shining")

ValidationError: ignored

In [None]:
print(output)

## Data Agent

Similar to programs, OpenAI LLMs will use `OpenAIAgent`, while other LLMs will use `ReActAgent`.

In [53]:
from llama_index.agent import OpenAIAgent, ReActAgent

agent = ReActAgent.from_tools(
    [vector_tool, summary_tool],
    llm=llm,
    verbose=True
)

Some inputs are hallucinated, causing issues with responses. Likely a better system prompt or tool descriptions could help.

In [58]:
response = agent.chat("draft a legal document for selling my house to other person. US government law should be followed while drafting the document.")
print(response)



[1;3;38;5;200mThought: (Implicit) I can answer without any more tools!
Response: [Your Name]
[Your Address]
[City, State ZIP Code]
[Email Address]
[Phone Number]

[Buyer’s Name]
[Buyer’s Address]
[City, State ZIP Code]
[Email Address]
[Phone Number]

SALE AGREEMENT

This Sale Agreement (the “Agreement”) is made and entered into as of the _____ day of ________, 20__, by and between [Your Name], a resident of the State of ________, with an address at [Your Address] (the “Seller”), and [Buyer’s Name], a resident of the State of ________, with an address at [Buyer’s Address] (the “Buyer”).

WHEREAS, the Seller owns a certain parcel of real property located at [Address of the Property] (the “Property”);

WHEREAS, the Buyer desires to purchase the Property from the Seller, and the Seller desires to sell the Property to the Buyer;

NOW, THEREFORE, in consideration of the mutual coven
[0m[Your Name]
[Your Address]
[City, State ZIP Code]
[Email Address]
[Phone Number]

[Buyer’s Name]
[Buyer’s

In [55]:
response = agent.chat("What was mentioned about Meta? How Does it differ from how OpenAI is talked about?")
print(response)

[1;3;38;5;200mThought: I need to use a tool to help me answer this question.
Action: vector_search
Action Input: {'text': 'What was mentioned about Meta? How Does it differ from how OpenAI is talked about?'}
[0m[1;3;34mObservation: In the given context, it is mentioned that both OpenAI and Meta are developing artificial intelligence (AI) and voices, but there is a difference in their approach. OpenAI tends to present its products as productivity tools, while Meta is building LLMs for entertainment purposes. Meta has revealed 28 personality-driven chatbots featuring celebrities like Charli D’Amelio, Dwyane Wade, and Snoop Dogg, while OpenAI's latest updates for ChatGPT include a voice feature and the ability to upload images and ask questions about them. However, the text also suggests that the technology is new enough that celebrities are not yet entrusting their entire personas to Meta, and it remains to be seen how popular these AI characters will be. Overall, both companies are e

In [61]:
llm.complete('write hello world in c++').text



'```cpp\n#include <iostream>\n\nint main() {\n    std::cout << "Hello World!";\n    return 0;\n}\n```\n\nIn this program, we first include the `iostream` header file, which provides input/output streams. Then, we define the `main()` function, which is the entry point of the program. Inside the `main()` function, we use the `cout` object to print the string "Hello World!" to the standard output stream (i.e., the console). Finally, we return 0 to indicate successful program execution.'

In [62]:
response = llm.complete('write a python program to detect cycle in graph').text



In [69]:
llm = HuggingFaceLLM(
    model_name="HuggingFaceH4/zephyr-7b-beta",
    tokenizer_name="HuggingFaceH4/zephyr-7b-beta",
    query_wrapper_prompt=PromptTemplate("<|user|>\n{query_str}</s>\n<|assistant|>\n"),
    context_window=3900,
    max_new_tokens=4000,
    model_kwargs={"quantization_config": quantization_config},
    # tokenizer_kwargs={},
    generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
    #messages_to_prompt=messages_to_prompt,
    device_map="auto",
)

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

In [73]:
response = llm.complete('draft a legal document for selling my house to other person. US government law should be followed while drafting the document.').text



In [74]:
print(response)

SALE AGREEMENT

This Sale Agreement (the "Agreement") is made and entered into as of the _____ day of ______, 20__, by and between [Seller Name], with a mailing address of [Seller Address] ("Seller"), and [Buyer Name], with a mailing address of [Buyer Address] ("Buyer").

WHEREAS, Seller is the owner of a certain parcel of real property located at [Property Address] (the "Property");

WHEREAS, Buyer desires to purchase the Property from Seller, and Seller desires to sell the Property to Buyer, subject to the terms and conditions set forth herein.

NOW, THEREFORE, in consideration of the mutual covenants and agreements contained herein, the parties agree as follows:

1. Sale of Property. Subject to the terms and conditions set forth herein, Seller agrees to sell and convey to Buyer, and Buyer agrees to purchase from Seller, the Property, together with all improvements, fixtures, and personal property located on the Property, as of the date hereof.

2. Purchase Price. The purchase price 