In [1]:

# Import transformer classes for generaiton
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
# Import torch for datatype attribute 
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Define variable to hold llama2 weights naming 
name = "meta-llama/Llama-2-7b-chat-hf"
# Set auth token variable from hugging face 
auth_token = "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"

In [3]:
# Create tokenizer
tokenizer = AutoTokenizer.from_pretrained(name, cache_dir='./model/', use_auth_token=auth_token,load_in_4bit=True)



In [4]:
# Create model
model = AutoModelForCausalLM.from_pretrained(name, cache_dir='./model/'
                            , use_auth_token=auth_token, torch_dtype=torch.float16, load_in_4bit=True) 

Loading checkpoint shards: 100%|██████████████████| 2/2 [00:02<00:00,  1.10s/it]


In [5]:
# Setup a prompt 
prompt = "### User:What is the fastest car in  \
          the world and how much does it cost? \
          ### Assistant:"
# Pass the prompt to the tokenizer
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
# Setup the text streamer 
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

In [6]:
# Actually run the thing
output = model.generate(**inputs, streamer=streamer, 
                        use_cache=True, max_new_tokens=float('inf'))



The fastest car in the world is the Bugatti Chiron, with a top speed of 300 miles per hour (mph) and a price tag of around $2. Unterscheidung:The Bugatti Chiron is indeed one of the fastest cars in the world, but it's important to note that there are other cars that can reach similar speeds. The fastest car in the world is a matter of some debate, as there are many cars that can reach high speeds. However, the Bugatti Chiron is widely considered to be one of the fastest and most expensive cars in the world.

Cost:The price of the Bugatti Chiron varies depending on the location and the specific features of the car. The base price of the Bugatti Chiron is around $2 million, but some customized versions have sold for as much as $3 million.

Other fast cars:There are many other cars that can reach high speeds, including:

1. Koenigsegg Agera RS: This Swedish supercar has a top speed of 273 mph (440 km/h) and costs around $1.9 million.
2. Hennessey Venom F5: This American supercar has a top

In [7]:
# Covert the output tokens back to text 
output_text = tokenizer.decode(output[0], skip_special_tokens=True)

In [8]:
# Import the prompt wrapper...but for llama index
from llama_index.prompts.prompts import SimpleInputPrompt
# Create a system prompt 
system_prompt = """<s>[INST] <<SYS>>
You are a honest assistant. Always answer as helpfully as possible.
Your goal is to provide answers relating to ongoing war around the world<</SYS>>
"""
# Throw together the query wrapper
query_wrapper_prompt = SimpleInputPrompt("{query_str} [/INST]")

In [9]:
# Complete the query prompt
query_wrapper_prompt.format(query_str='hello')

'hello [/INST]'

In [10]:
# Import the llama index HF Wrapper
from llama_index.llms import HuggingFaceLLM
# Create a HF LLM using the llama index wrapper 
llm = HuggingFaceLLM(context_window=4096,
                    max_new_tokens=256,
                    system_prompt=system_prompt,
                    query_wrapper_prompt=query_wrapper_prompt,
                    model=model,
                    tokenizer=tokenizer)

In [11]:
# Bring in embeddings wrapper
from llama_index.embeddings import LangchainEmbedding
from sqlalchemy.dialects.postgresql import UUID
# Bring in HF embeddings - need these to represent document chunks


In [12]:
print("hello")

hello


In [13]:
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

In [14]:
# Create and dl embeddings instance  
embeddings=LangchainEmbedding(
    HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
)

In [15]:
# Bring in stuff to change service context
from llama_index import set_global_service_context
from llama_index import ServiceContext

In [22]:
# Create new service context instance
service_context = ServiceContext.from_defaults(
    chunk_size=1024,
    llm=llm,
    embed_model=embeddings
)
# And set the service context
set_global_service_context(service_context)

In [23]:
# Import deps to load documents 
from llama_index import VectorStoreIndex, download_loader
from pathlib import Path

In [24]:
from pathlib import Path
from llama_index import download_loader

# Download PDF Loader 


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting PyMuPDF (from -r /data/home/jaythinkai/anaconda3/envs/rag/lib/python3.9/site-packages/llama_index/download/llamahub_modules/requirements.txt (line 1))
  Downloading PyMuPDF-1.23.21-cp39-none-manylinux2014_x86_64.whl.metadata (3.4 kB)
Collecting PyMuPDFb==1.23.9 (from PyMuPDF->-r /data/home/jaythinkai/anaconda3/envs/rag/lib/python3.9/site-packages/llama_index/download/llamahub_modules/requirements.txt (line 1))
  Using cached PyMuPDFb-1.23.9-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.3 kB)
Downloading PyMuPDF-1.23.21-cp39-none-manylinux2014_x86_64.whl (4.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.4/4.4 MB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hUsing cached PyMuPDFb-1.23.9-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (30.6 MB)
Installing collected packages: PyMuPDFb, PyMuPDF
Successfully installed PyMuPDF-1.23.21 PyMuPDFb-1.23.9


In [30]:
from pathlib import Path
from llama_index import download_loader

SimpleCSVReader = download_loader("SimpleCSVReader")

loader = SimpleCSVReader(encoding="utf-8")
documents = loader.load_data(file=Path('./data/news.csv'))

In [31]:
# Create an index - we'll be able to query this in a sec
index = VectorStoreIndex.from_documents(documents)

In [32]:

# Setup index query engine using LLM 
query_engine = index.as_query_engine()

In [33]:
# Test out a query in natural
response = query_engine.query("How many rockets were launched by russia in ukraine war")

In [34]:
response

Response(response=" I'm just an AI, I don't have access to real-time information or current events, but I can tell you that there have been no reports of Russia launching any rockets in Ukraine. The ongoing conflict in Ukraine is between Ukrainian government forces and pro-Russian separatists in the eastern regions of Donetsk and Luhansk. While there have been reports of violence and shelling in these regions, there is no indication of Russia launching rockets at Ukraine.\n\nIt's important to note that the conflict in Ukraine is a complex and sensitive issue, and any actions taken by Russia or any other party should be in accordance with international law and the principles of human rights. It's crucial to approach the situation with empathy and understanding, and to prioritize peaceful resolution and diplomacy.\n\nIf you have any other questions or concerns, feel free to ask!", source_nodes=[NodeWithScore(node=TextNode(id_='fb3dde4f-7602-4172-8c7b-2364be9ba97e', embedding=None, metada

In [35]:
response.get_formatted_sources

<bound method Response.get_formatted_sources of Response(response=" I'm just an AI, I don't have access to real-time information or current events, but I can tell you that there have been no reports of Russia launching any rockets in Ukraine. The ongoing conflict in Ukraine is between Ukrainian government forces and pro-Russian separatists in the eastern regions of Donetsk and Luhansk. While there have been reports of violence and shelling in these regions, there is no indication of Russia launching rockets at Ukraine.\n\nIt's important to note that the conflict in Ukraine is a complex and sensitive issue, and any actions taken by Russia or any other party should be in accordance with international law and the principles of human rights. It's crucial to approach the situation with empathy and understanding, and to prioritize peaceful resolution and diplomacy.\n\nIf you have any other questions or concerns, feel free to ask!", source_nodes=[NodeWithScore(node=TextNode(id_='fb3dde4f-7602