In [None]:
# # source: https://gpt-index.readthedocs.io/en/stable/examples/vector_stores/SimpleIndexDemoLlama-Local.html
# !pip install llama-index ipywidgets
# !pip install scipy
# !pip install accelerate
# !pip install bitsandbytes
# !pip install torch transformers
# !pip install pypdf

: 

In [1]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
from IPython.display import Markdown, display

In [2]:
!huggingface-cli login --token $HUGGING_FACE_TOKEN

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /Users/greatmaster/.cache/huggingface/token
Login successful


In [3]:
import torch
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core.prompts import PromptTemplate

# Model names (make sure you have access on HF)
LLAMA2_7B = "meta-llama/Llama-2-7b-hf"
LLAMA2_7B_CHAT = "meta-llama/Llama-2-7b-chat-hf"
LLAMA2_13B = "meta-llama/Llama-2-13b-hf"
LLAMA2_13B_CHAT = "meta-llama/Llama-2-13b-chat-hf"
LLAMA2_70B = "meta-llama/Llama-2-70b-hf"
LLAMA2_70B_CHAT = "meta-llama/Llama-2-70b-chat-hf"

selected_model = LLAMA2_7B_CHAT

SYSTEM_PROMPT = """You are an AI assistant that answers questions in a friendly manner, based on the given source documents. Here are some rules you always follow:
- Generate human readable output, avoid creating output with gibberish text.
- Generate only the requested output, don't include any other language before or after the requested output.
- Never say thank you, that you are happy to help, that you are an AI agent, etc. Just answer directly.
- Generate professional language typically used in business documents in North America.
- Never generate offensive or foul language.
"""

query_wrapper_prompt = PromptTemplate(
    "[INST]<<SYS>>\n" + SYSTEM_PROMPT + "<</SYS>>\n\n{query_str}[/INST] "
)

In [6]:
!pip install -i https://pypi.org/simple/ bitsandbytes

llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=2048,
    generate_kwargs={"temperature": 0.0, "do_sample": False},
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name=selected_model,
    model_name=selected_model,
    device_map="auto",
)

Looking in indexes: https://pypi.org/simple/


Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some parameters are on the meta device device because they were offloaded to the disk.


In [8]:
!mkdir data
!mv /content/gans_paper.pdf ./data

In [7]:
from llama_index import SimpleDirectoryReader

# load documents
documents = SimpleDirectoryReader(
    "./assets-resources/pdfs/"
).load_data()
documents

ImportError: cannot import name 'SimpleDirectoryReader' from 'llama_index' (unknown location)

In [10]:
from llama_index import VectorStoreIndex, ServiceContext, set_global_service_context

service_context = ServiceContext.from_defaults(llm=llm, embed_model="local:BAAI/bge-small-en")

set_global_service_context(service_context)

Downloading (…)lve/main/config.json:   0%|          | 0.00/684 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

[nltk_data] Downloading package punkt to /tmp/llama_index...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [11]:
index = VectorStoreIndex.from_documents(documents)

In [12]:
# set Logging to DEBUG for more detailed outputs
query_engine = index.as_query_engine()

In [13]:
response = query_engine.query("What is this paper about?")
display(Markdown(f"<b>{response}</b>"))



<b>The paper appears to be about Generative Adversarial Networks (GANs), a type of deep learning model used for generating new data that resembles existing data. The paper discusses the use of GANs for generating realistic images and the challenges of training GANs. Specifically, the paper describes how the authors trained two neural networks, D and G, to compete with each other in order to generate new data that is indistinguishable from the original data. The authors also discuss the use of a Gaussian Parzen window to estimate the probability of the test set data under the true data distribution.</b>

In [14]:
import time
query_engine = index.as_query_engine(streaming=True)
response = query_engine.query("What is the role of the generator in the generative adversarial network?")
start_time = time.time()
token_count = 0
for token in response.response_gen:
    print(token, end="")
    token_count += 1
time_elapsed = time.time() - start_time
tokens_per_second = token_count / time_elapsed
print(f"\n\nStreamed output at {tokens_per_second} tokens/s")

The generator in a generative adversarial network (GAN) plays a crucial role in the training process. The generator, also known as the "fake data" generator, is a neural network that takes a random noise vector as input and produces a synthetic data sample that is intended to resemble the real data distribution. The generator is trained in an adversarial manner, meaning that it is trained to produce samples that are indistinguishable from real data, while also being detected by the discriminator (the "real data" discriminator) with a low probability.

The generator's role in the GAN can be understood as follows:

1. Generating synthetic data: The generator takes a random noise vector as input and produces a synthetic data sample that is intended to resemble the real data distribution.
2. Training the discriminator: The generator is trained to produce samples that are difficult for the discriminator to distinguish from real data. This is achieved by minimizing the log-likelihood of the 