In [1]:
# pip install transformers
# pip install bitsandbytes
# pip install -qU langchain_community pypdf
# pip install -qU "langchain-chroma>=0.1.2"
# pip install -U sentence-transformers
# pip install sentence_transformers
# pip install langchain==0.0.174 -i https://pypi.doubanio.com/simple/ --trusted-host pypi.doubanio.com

from uuid import uuid4
import torch
from transformers import pipeline
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_core.documents import Document
from langchain_chroma import Chroma
from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")

prompt = "What is GPT4 and how does it work?"

embeddings = HuggingFaceEmbeddings(
    model_name="Snowflake/snowflake-arctic-embed-m",
    model_kwargs={"device": "cuda"},
    show_progress=True,
)

loader = PyPDFLoader(
    "./data/gpt4.pdf",
)

document = loader.load()

vector_store = Chroma(
    collection_name="gpt4",
    embedding_function=embeddings,
    persist_directory="./chroma_langchain_db",  # Where to save data locally, remove if not neccesary
)

documents = []

num_pages = len(document)
for i in range(num_pages):
    documents.append(
        Document(
            page_content=document[i].page_content,
            id=i,
        )
    )

print(f"Adding {len(documents)} documents to the vector store")

uuids = [str(uuid4()) for _ in range(len(documents))]

vector_store.add_documents(documents=documents, ids=uuids)

results = vector_store.similarity_search(
    prompt,
    k=1,
)
context = []
for res in results:
    context.append(res.page_content)

print("Context:")
print(len(context))

# Ensure prompt and context are properly formatted
prompt = prompt.replace("\n", " ").strip()
context = [c.replace("\n", " ").strip() for c in context]
# Make context a string from the list


messages = [
    {"role": "user", "content": prompt},
    {"role": "user", "content": f"Here is the context to the question, please answer the prompt based on the context: {context[0]}"},
]
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", load_in_4bit=True, device_map="auto")

pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, do_sample=True, max_length=1024, pad_token_id=tokenizer.eos_token_id, top_k=50, top_p=0.95, temperature=0.5, num_return_sequences=1)
result = pipe(messages)[0]
for message in result['generated_text']:
    if message['role'] == 'assistant':
        print(message['content'])

  from .autonotebook import tqdm as notebook_tqdm

Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.38it/s]
  embeddings = HuggingFaceEmbeddings(


Adding 100 documents to the vector store


Batches: 100%|██████████| 4/4 [00:01<00:00,  2.64it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 147.54it/s]


Context:
1


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
Loading checkpoint shards: 100%|██████████| 4/4 [00:08<00:00,  2.17s/it]


Based on the context, it appears that GPT-4 is a model in the GPT (Generative Pre-trained Transformer) family, specifically a successor to GPT-3.5.

GPT-4 is a large-scale language model trained on a massive dataset of text, which enables it to generate human-like language outputs. The model is designed to understand and respond to natural language inputs, such as sentences, paragraphs, or even entire documents.

The "Turbo" suffix in GPT3.5-Turbo suggests that it is a variant of GPT-3.5 with additional capabilities or improvements. The "launch 98" part is likely a version number or a specific iteration of the model.

Without more information, it's difficult to provide a detailed explanation of how GPT-4 works. However, here is a general overview of the GPT architecture:

1. **Pre-training**: GPT-4 is trained on a large corpus of text data, which allows it to learn patterns and relationships between words, phrases, and sentences.
2. **Transformer architecture**: GPT-4 uses a transforme