https://openai.com/sitemap.xml

#**1: Install All the Required Packages**

In [None]:
!pip -q install langchain langchain-community
!pip -q install pypdf
!pip -q install sentence_transformers
!pip install openai
!pip install tiktoken

In [None]:
!pip install tokenizers
!pip install faiss-cpu
!pip -q install unstructured

In [None]:
!pip install numpy==1.24.4
!pip install nltk==3.9.1

#**2: Import All the Required Libraries**

In [None]:
import sys
import os
import torch
import textwrap
from langchain.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.embeddings import HuggingFaceEmbeddings

In [None]:
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

#Create an Environment

In [None]:
os.environ['OPENAI_API_KEY'] = ""

#**3: Pass the URLs and extract the data from these URLs**

In [None]:
URLs=[
    'https://blog.gopenai.com/paper-review-llama-2-open-foundation-and-fine-tuned-chat-models-23e539522acb',
    'https://www.mosaicml.com/blog/mpt-7b',
    'https://stability.ai/blog/stability-ai-launches-the-first-of-its-stablelm-suite-of-language-models',
    'https://lmsys.org/blog/2023-03-30-vicuna/'

]

In [None]:
loaders = UnstructuredURLLoader(urls=URLs)
data = loaders.load()

In [None]:
data

In [None]:
len(data)

#**4: Split the Text into Chunks**

In [None]:
text_splitter=CharacterTextSplitter(separator='\n',
                                    chunk_size=1000,
                                    chunk_overlap=200)

In [None]:
text_chunks=text_splitter.split_documents(data)

In [None]:
len(text_chunks)

In [None]:
text_chunks[0]

In [None]:
text_chunks[1]

In [None]:
text_chunks[2]

#**5: Download the OpenAI Embeddings or Hugging Face Embeddings**

In [None]:
embeddings = OpenAIEmbeddings()

In [None]:
embeddings

In [None]:
query_result = embeddings.embed_query("Hello world")
len(query_result)

#**06: Convert the Text Chunks into Embeddings and Create a Knowledge Base**

In [None]:
vectorstore=FAISS.from_documents(text_chunks, embeddings)

#**07: Create a Large Language Model (LLM) Wrapper**

In [None]:
llm=ChatOpenAI()

In [None]:
llm

In [None]:
llm.predict("Please provide a concise summary of the Book Harry Potter")

#**08: Initialize the Retrieval QA with Source Chain**

In [None]:
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())

In [None]:
result=chain({"question": "How good is Vicuna?"}, return_only_outputs=True)

In [None]:
result['answer']

In [None]:
wrapped_text = textwrap.fill(result['answer'], width=500)

In [None]:
wrapped_text

In [None]:
result=chain({"question": "How does Llama 2 outperforms other models"}, return_only_outputs=True)

In [None]:
result['answer']

In [None]:
wrapped_text = textwrap.fill(result['answer'], width=500)

In [None]:
wrapped_text

In [None]:
result=chain({"question": "What is is stableLM?"}, return_only_outputs=True)


In [None]:
result['answer']

In [None]:
wrapped_text = textwrap.fill(result['answer'], width=500)

In [None]:
wrapped_text

In [None]:
result=chain({"question": "Can you please share some details about MPT-7b Model"}, return_only_outputs=True)

In [None]:
result['answer']

In [None]:
wrapped_text = textwrap.fill(result['answer'], width=100)

In [None]:
wrapped_text

In [None]:
while True:
  query=input(f"Prompt: ")
  if query == 'exit':
    print('Exiting')
    sys.exit()
  if query =='':
    continue
  result=chain({'question':query})
  print(f"Answer: " +result["answer"])
