In [1]:
import arxiv
import openai
import langchain
import pinecone
from langchain_community.document_loaders import ArxivLoader
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
from langchain.llms import OpenAI

  from tqdm.autonotebook import tqdm


In [2]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [16]:
openai_api_key = os.getenv('OPENAI_API_KEY')
pinecone_api_key = os.getenv('PINECONE_API_KEY')
environment = os.getenv('PINECONE_ENV')

## Reading the paper

Let's create a function which with help of `ArxivLoader` will load a paper based on its id.

In [4]:
def arxiv_loader(paper_id: str) -> list[Document]:
    docs = ArxivLoader(query=paper_id, load_max_docs=2).load()
    return docs

In [5]:
doc = arxiv_loader(paper_id='2402.17764')
print(len(doc), type(doc))

1 <class 'list'>


## Splitting document into chunks

Since the number of tokens for the LLM is limited we need to separate our document into chunks with a bit of overlap. Fir this purpose `RecursiveCharacterTextSplitter` will be used.

In [6]:
def chunk_data(docs, chunk_size=800, chunk_overlap=50) -> list:
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    split_docs = text_splitter.split_documents(docs)
    return split_docs

In [7]:
documents = chunk_data(docs=doc)

Here how `document` looks like after being separated into chunks.

In [30]:
# documents

In [9]:
type(documents)

list

## Initializing Embeddings

In order to load the document into vector search database (index) we need to convert our text into embeddings. I will use `openai` tools for that.

Let's test vectors with that embeddings and just convert any sentence into embeddings.

In [None]:
embeddings = OpenAIEmbeddings(api_key=openai_api_key)
embeddings

In [11]:
vectors = embeddings.embed_query("Good morning, how's your day?")

In [12]:
vectors

[-0.0023635016767517016,
 0.0046612636186404545,
 0.003153944514078891,
 -0.034961042460283254,
 -0.012897519910202862,
 0.046255765468743076,
 -0.019033233464465366,
 -0.003163335738465989,
 -0.028549849265375436,
 -0.011476288076927634,
 0.011470026795008262,
 0.010092622072523406,
 -0.009554181629781073,
 -0.014763277614042917,
 0.003274467438937849,
 0.005697448034294434,
 0.02947646683537609,
 -0.015990419952366372,
 0.027422881849826244,
 -0.011538897170830896,
 -0.003909951697833458,
 0.0042605639275433895,
 -0.004423348409882225,
 -0.0035061218314380127,
 0.014838408340462312,
 0.007369116708844023,
 0.006736762625246794,
 -0.010436973020313968,
 0.01015523116642667,
 -0.020936555134531198,
 0.010217840260329934,
 -0.01450031774326851,
 -0.020360550725563087,
 0.007594510285086122,
 0.0020066286307837047,
 -0.02028541906782108,
 -0.0012060120252448225,
 -0.010643584147781871,
 0.016829385163431507,
 -0.017668350374496638,
 0.018256877347303496,
 0.0067242409927306636,
 0.011513

Let's check the length, it will be used for the Pinecone index.

In [13]:
len(vectors)

1536

## Loading Data into Pinecone

On this stage I will load my document into Pinecone.

In [14]:
index_name = 'arxiv-summarizer'

In [18]:
pinecone.Pinecone(
    api_key=pinecone_api_key,
    enviornment=environment
)

<pinecone.control.pinecone.Pinecone at 0x204a8d12490>

In [19]:
index = Pinecone.from_documents(documents, embeddings, index_name=index_name)

## Cosine Similarity for Data Querying

In order to retrieve certain data from the query we will use cosine similarity for our Pinecone index.

In [20]:
from langchain.chains.question_answering import load_qa_chain
from langchain import OpenAI

In [21]:
openai.api_key = openai_api_key

In [22]:
def retrieve_query(query, k=2):
    matching_results = index.similarity_search(query, k=k)
    return matching_results

In [23]:
llm = OpenAI(model_name="gpt-3.5-turbo-0125", temperature=0.6, api_key=openai_api_key)



In [24]:
chain = load_qa_chain(llm, chain_type='stuff')

Now, let's retrieve queries.

In [25]:
def retrieve_answers(query):
    doc_search = retrieve_query(query)
    print(doc_search)
    response = chain.run(input_documents=doc_search, question=query)
    return response

In [27]:
custom_query = 'How to they reach the 1 bit?'

answer = retrieve_answers(custom_query)

print(answer)

[Document(page_content='LLMs are more friendly to CPU devices, which are the main processors used in edge and mobile\ndevices. This means that BitNet b1.58 can be efficiently executed on these devices, further improving\ntheir performance and capabilities.\nNew Hardware for 1-bit LLMs\nRecent work like Groq5 has demonstrated promising results and great potential for building specific\nhardware (e.g., LPUs) for LLMs. Going one step further, we envision and call for actions to design\nnew hardware and system specifically optimized for 1-bit LLMs, given the new computation paradigm\nenabled in BitNet [WMD+23].\nReferences\n[BZB+19] Yonatan Bisk, Rowan Zellers, Ronan Le Bras, Jianfeng Gao, and Yejin Choi. PIQA:\nreasoning about physical commonsense in natural language. CoRR, abs/1911.11641,\n2019.', metadata={'Authors': 'Shuming Ma, Hongyu Wang, Lingxiao Ma, Lei Wang, Wenhui Wang, Shaohan Huang, Li Dong, Ruiping Wang, Jilong Xue, Furu Wei', 'Published': '2024-02-27', 'Summary': 'Recent res

# Text Summarization

In this part of the notebook I will write code for text summarization for the same article. The method which will be used here is a MapReduce. What I will do is that the text is separated into chunks earlier. Each chunk is then sent over to the model. Model creates summary for every chunk. Then from those summaries the final summary gets created.

In [28]:
from langchain import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import RecursiveJsonSplitter

In [33]:
llm_summary = ChatOpenAI(temperature=0.3, model_name="gpt-3.5-turbo-0125")    

  warn_deprecated(


In [34]:
chain = load_summarize_chain(
    llm=llm_summary,
    chain_type='map_reduce',
    verbose=False
)

In [35]:
summary = chain.run(documents)

In [36]:
print(summary)

The paper introduces BitNet b1.58, a 1-bit Large Language Model that matches the performance of full-precision models while being more cost-effective in terms of latency, memory, throughput, and energy consumption. It introduces a new scaling law and training method for high-performance and cost-effective LLMs, allowing for improved efficiency in loading weights from DRAM. BitNet b1.58 outperforms existing models in terms of memory consumption, throughput, and latency, showing promise in reducing the cost of large language models while maintaining performance. The study compares BitNet b1.58 to LLaMA LLM models, demonstrating its superior performance in various tasks and its potential for addressing challenges related to memory consumption and energy efficiency. Further compression to 4 bits or lower is possible for future work, making 1.58-bit LLMs a cost-effective solution for edge and mobile devices.


Let's use custom prompt for the MapReduce type of summarization technique.

In [76]:
mapreduce_prompt = """
You are an expert in Data Science and Data Analytics. You can easilty understand Data Science scientific papers.
Please summarize the following text:
Text: `{documents}`
Summary:
"""

In [77]:
map_prompt_template = PromptTemplate(input_variables=['documents'],
                                     template=mapreduce_prompt
                                     )

In [78]:
final_comb_prompt = """
You are an expert in Data Science and Data Analytics. You can easilty understand Data Science scientific papers.
Now I want you to take a deep breath and provide a final summary of the entire text with these important points.
Add a Generic Motivation Title.
Start with comprehensive summary. Limit yourself with 250 word. In the end add key takeaways in up to 5 bullit points.
Text: `{documents}`
"""

In [79]:
final_comb_prompt_template = PromptTemplate(input_variables=['documents'],
                                            template=final_comb_prompt)

In [80]:
summary_chain = load_summarize_chain(
    llm=llm_summary,
    chain_type='map_reduce',
    map_prompt=map_prompt_template,
    combine_prompt=final_comb_prompt_template,
    verbose=False
)

ValidationError: 1 validation error for StuffDocumentsChain
__root__
  document_variable_name text was not found in llm_chain input_variables: ['documents'] (type=value_error)