In [3]:
import sys
import os
import torch
import textwrap
from langchain.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.embeddings import HuggingFaceEmbeddings

In [4]:
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [5]:
from dotenv import load_dotenv
import os
from langchain.llms import OpenAI

In [6]:
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

In [7]:

URLs = [
    'https://blog.gopenai.com/paper-review-llama-2-open-foundation-and-fine-tuned-chat-models-23e539522acb',
    'https://www.mosaicml.com/blog/mpt-7b',
    'https://stability.ai/blog/stability-ai-launches-the-first-of-its-stablem-suite-of-language-models',
    'https://lmsys.org/blog/2023-03-30-vicuna/',
    'https://www.datacamp.com/blog/top-open-source-llms'
]

In [8]:
loaders = UnstructuredURLLoader(urls=URLs)
data = loaders.load()

In [9]:
data

[Document(metadata={'source': 'https://blog.gopenai.com/paper-review-llama-2-open-foundation-and-fine-tuned-chat-models-23e539522acb'}, page_content='Open in app\n\nSign up\n\nSign in\n\nWrite\n\nSign up\n\nSign in\n\nPaper Review\n\nPaper Review: Llama 2: Open Foundation and Fine-Tuned Chat Models\n\nLlama 2: one of the best open source models\n\nAndrew Lukyanenko\n\nFollow\n\nPublished in\n\nGoPenAI\n\n15 min read\n\nJul 20, 2023\n\n--\n\nProject link\n\nModel link\n\nPaper link\n\nThe authors of the work present Llama 2, an assortment of pretrained and fine-tuned large language models (LLMs) with sizes varying from 7 billion to 70 billion parameters. The fine-tuned versions, named Llama 2-Chat, are specifically designed for dialogue applications. These models surpass the performance of existing open-source chat models on most benchmarks, and according to human evaluations for usefulness and safety, they could potentially replace closed-source models. The authors also detail their ap

In [10]:
text_splitter=CharacterTextSplitter(separator='\n',chunk_size=1000,chunk_overlap=200)

In [11]:
text_chunks=text_splitter.split_documents(data)

In [13]:
text_chunks[0]

Document(metadata={'source': 'https://blog.gopenai.com/paper-review-llama-2-open-foundation-and-fine-tuned-chat-models-23e539522acb'}, page_content='Open in app\nSign up\nSign in\nWrite\nSign up\nSign in\nPaper Review\nPaper Review: Llama 2: Open Foundation and Fine-Tuned Chat Models\nLlama 2: one of the best open source models\nAndrew Lukyanenko\nFollow\nPublished in\nGoPenAI\n15 min read\nJul 20, 2023\n--\nProject link\nModel link\nPaper link\nThe authors of the work present Llama 2, an assortment of pretrained and fine-tuned large language models (LLMs) with sizes varying from 7 billion to 70 billion parameters. The fine-tuned versions, named Llama 2-Chat, are specifically designed for dialogue applications. These models surpass the performance of existing open-source chat models on most benchmarks, and according to human evaluations for usefulness and safety, they could potentially replace closed-source models. The authors also detail their approach to fine-tuning and safety enhanc

In [15]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-large",openai_api_key=api_key)

In [16]:
query_result = embeddings.embed_query("Hello world")
len(query_result)

3072

In [17]:
query_result

[-0.008748789766086895,
 -0.010281014619745016,
 0.006202274590944085,
 0.03174942929053856,
 0.0082567506566706,
 -0.006379235649235594,
 -0.004193117144856979,
 0.07665441506979082,
 0.027416037744223347,
 0.029038902631127857,
 0.001998366677672231,
 -0.011627646452305495,
 -0.02013041526670046,
 -0.019198133481317287,
 -0.005019655639635284,
 0.03694604505714283,
 -0.012646251591040096,
 -0.00270405350221331,
 -0.007307203083030114,
 -0.018179527411260162,
 0.022202157120684286,
 0.003750714266297576,
 -0.017575268594412777,
 0.05510830947411324,
 0.0029802856252348175,
 0.024066724416740723,
 -0.014657567377074682,
 0.005943306859196106,
 -0.03604829112298419,
 -0.027933974139041827,
 0.0061461647372285395,
 0.0165135013133412,
 0.010919801287816937,
 0.01438996768252057,
 0.023462465599893335,
 0.005813823226152748,
 0.025223445616986077,
 0.01845575953428167,
 0.005205249126394187,
 -0.0028831729004522985,
 0.035461297163071594,
 0.02239206682167689,
 -0.017195449192427572,
 0.0

In [18]:
vectorstore=FAISS.from_documents(text_chunks, embeddings)

In [19]:
llm = OpenAI(openai_api_key=api_key, temperature=0.9)

  llm = OpenAI(openai_api_key=api_key, temperature=0.9)


In [20]:
llm.predict("Please provide a concise summary of the Book")

  llm.predict("Please provide a concise summary of the Book")


" of 1 Corinthians\n\nThe book of 1 Corinthians is a letter written by the Apostle Paul to the church in Corinth, a city in Greece. In this letter, Paul addresses various issues and challenges facing the Corinthian church, including divisions and conflicts among members, immorality, and confusion about spiritual gifts. He emphasizes the importance of unity among believers, living a life that glorifies God, and using one's gifts for the good of the church. Paul also addresses questions about marriage, idolatry, and the resurrection of the dead. He encourages the Corinthian believers to love one another, pursue righteousness, and keep Christ at the center of their lives. Ultimately, 1 Corinthians serves as a guide for how Christians can live out their faith in a practical and impactful way."

In [21]:
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm,
retriever=vectorstore.as_retriever())

In [23]:
result=chain({"question": "How good is Vicuna?"},
 return_only_outputs=True)

In [24]:
result['answer']

' Vicuna achieves competitive performance against proprietary models and is preferred by the GPT-4 evaluation. It outperforms other models in more than 90% of cases.\n'

In [26]:
wrapped_text = textwrap.fill(result['answer'], width=500)

In [27]:
wrapped_text

' Vicuna achieves competitive performance against proprietary models and is preferred by the GPT-4 evaluation. It outperforms other models in more than 90% of cases.'

In [25]:
print("The End")

The End
