https://openai.com/sitemap.xml

#**1: Install All the Required Packages**

In [None]:
!pip -q install langchain
!pip -q install bitsandbytes accelerate transformers
!pip -q install datasets loralib sentencepiece
!pip -q install pypdf
!pip -q install sentence_transformers

In [None]:
!pip -q install unstructured

In [None]:
!pip install tokenizers

In [None]:
!pip install xformers

In [None]:
!pip install pinecone-client

#**2: Import All the Required Libraries**

In [None]:
from langchain.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import Pinecone
import pinecone
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.embeddings import HuggingFaceEmbeddings
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import pipeline
from langchain import HuggingFacePipeline
from huggingface_hub import notebook_login
import textwrap
import sys
import os
import torch

In [None]:
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

#**3: Pass the URLs and extract the data from these URLs**

In [None]:
URLs=[
    'https://blog.gopenai.com/paper-review-llama-2-open-foundation-and-fine-tuned-chat-models-23e539522acb',
    'https://www.mosaicml.com/blog/mpt-7b',
    'https://stability.ai/blog/stability-ai-launches-the-first-of-its-stablelm-suite-of-language-models',
    'https://lmsys.org/blog/2023-03-30-vicuna/'

]

In [None]:
loaders = UnstructuredURLLoader(urls=URLs)
data = loaders.load()

In [None]:
data

In [None]:
len(data)

#**4: Split the Text into Chunks**

In [None]:
text_splitter=CharacterTextSplitter(separator='\n',
                                    chunk_size=1000,
                                    chunk_overlap=200)

In [None]:
text_chunks=text_splitter.split_documents(data)

In [None]:
len(text_chunks)

In [None]:
text_chunks

In [None]:
text_chunks[0]

In [None]:
text_chunks[1]

In [None]:
text_chunks[2]

#**5: Download the Hugging Face Embeddings**

In [None]:
#embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
embeddings=HuggingFaceEmbeddings()

In [None]:
embeddings

In [None]:
query_result = embeddings.embed_query("How are you")
len(query_result)

In [None]:
query_result

In [None]:
# query_result

#**06: Convert the Text Chunks into Embeddings and Create a Knowledge Base**

In [None]:
PINECONE_API_KEY= '1db13105-964d-4965-9035-e09c00ad18a5'
PINECONE_API_ENV= 'gcp-starter'

In [None]:
pinecone.init(
   api_key=PINECONE_API_KEY,
   environment=PINECONE_API_ENV
)

In [None]:
index_name='llama'

In [None]:
vectorstore=Pinecone.from_texts([t.page_content for t in text_chunks], embeddings, index_name=index_name)

In [None]:
vectorstore=Pinecone.from_documents(text_chunks, embeddings, index_name=index_name)

#**07: Create a Large Language Model (LLM) Wrapper**

In [None]:
notebook_login()

In [None]:
model = "meta-llama/Llama-2-7b-chat-hf"

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model,
                                          use_auth_token=True,)


model = AutoModelForCausalLM.from_pretrained(model,
                                             device_map='auto',
                                             torch_dtype=torch.float16,
                                             use_auth_token=True,
                                              load_in_8bit=True,
                                              #load_in_4bit=True
                                             )

In [None]:
pipe = pipeline("text-generation",
                model=model,
                tokenizer= tokenizer,
                torch_dtype=torch.bfloat16,
                device_map="auto",
                max_new_tokens = 512,
                do_sample=True,
                top_k=30,
                num_return_sequences=1,
                eos_token_id=tokenizer.eos_token_id
                )

In [None]:
llm=HuggingFacePipeline(pipeline=pipe, model_kwargs={'temperature':0})

In [None]:
llm.predict("Please provide a concise summary of the Book Harry Potter")

#**08: Initialize the Retrieval QA with Source Chain**

In [None]:
from langchain.chains import RetrievalQA

In [None]:
query = "How good is Vicuna?"

In [None]:
docs = vectorstore.similarity_search(query, k=3)

In [None]:
docs

In [None]:
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vectorstore.as_retriever())

In [None]:
query = "How good is Vicuna?"
qa.run(query)

In [None]:
query = "How does Llama 2 outperforms other models"
qa.run(query)

In [None]:
query = "What is is stableLM?"
qa.run(query)

In [None]:
while True:
  user_input = input(f"Input Prompt: ")
  if user_input == 'exit':
    print('Exiting')
    sys.exit()
  if user_input == '':
    continue
  result = qa({'query': user_input})
  print(f"Answer: {result['result']}")