In [1]:
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import faiss
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

In [2]:
# creating custom template to guide llm model
custom_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(custom_template)

In [3]:
# extracting text from pdf
def get_pdf_text(docs):
    text=""
    for pdf in docs:
        pdf_reader=PdfReader(pdf)
        for page in pdf_reader.pages:
            text+=page.extract_text()
    return text

# converting text to chunks
def get_chunks(raw_text):
    text_splitter=CharacterTextSplitter(separator="\n",
                                        chunk_size=200,
                                        chunk_overlap=50,
                                        length_function=len)   
    chunks=text_splitter.split_text(raw_text)
    return chunks

In [4]:
# using all-MiniLm embeddings model and faiss to get vectorstore
def get_vectorstore(chunks):
    embeddings=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
                                     model_kwargs={'device':'cpu'})
    vectorstore=faiss.FAISS.from_texts(texts=chunks,embedding=embeddings)
    return vectorstore

In [5]:
# generating conversation chain  

from langchain_community.llms import VLLM
from langchain.chains import LLMChain
from langchain_core.prompts import PromptTemplate

def get_conversationchain(vectorstore):
    llm = VLLM(
        model="baichuan-inc/Baichuan2-13B-Chat",
        trust_remote_code=True,  # mandatory for hf models
        max_new_tokens=512,
        top_k=20,
        top_p=0.8,
        temperature=0.8,
        dtype="float16",
        tensor_parallel_size=4
        )
    memory = ConversationBufferMemory(memory_key='chat_history', 
                                      return_messages=True,
                                      output_key='answer') # using conversation buffer memory to hold past information
    conversation_chain = ConversationalRetrievalChain.from_llm(
                                llm=llm,
                                retriever=vectorstore.as_retriever(),
                                condense_question_prompt=CUSTOM_QUESTION_PROMPT,
                                memory=memory)
    return conversation_chain

In [6]:
docs = ["document_sample/China_Lake_Energetics_Brochures_Proposal_4.pdf", "document_sample/Design-West_FAMS-S_Proposal_signed.pdf"]
raw_text=get_pdf_text(docs)

In [7]:
text_chunks=get_chunks(raw_text)
vectorstore=get_vectorstore(text_chunks)

Created a chunk of size 209, which is longer than the specified 200
Created a chunk of size 322, which is longer than the specified 200
Created a chunk of size 255, which is longer than the specified 200
Created a chunk of size 256, which is longer than the specified 200
Created a chunk of size 242, which is longer than the specified 200
Created a chunk of size 273, which is longer than the specified 200
Created a chunk of size 252, which is longer than the specified 200
Created a chunk of size 259, which is longer than the specified 200
Created a chunk of size 362, which is longer than the specified 200
Created a chunk of size 352, which is longer than the specified 200
Created a chunk of size 266, which is longer than the specified 200
Created a chunk of size 254, which is longer than the specified 200
  warn_deprecated(
  from tqdm.autonotebook import tqdm, trange


In [8]:
def handle_question(conversation_chain, question):
    response = conversation_chain({'question': question})
    chat_history = response["chat_history"]
    for i, msg in enumerate(chat_history):
        if i % 2 == 0:
            print(f"User: {msg.content}")
        else:
            print(f"Bot: {msg.content}")

In [9]:
conversation_chain = get_conversationchain(vectorstore)
handle_question(conversation_chain, "where will ARA travel to for CBOA 2024?")



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

INFO 08-01 20:50:37 config.py:623] Defaulting to use mp for distributed inference
INFO 08-01 20:50:37 llm_engine.py:161] Initializing an LLM engine (v0.5.0.post1) with config: model='baichuan-inc/Baichuan2-13B-Chat', speculative_config=None, tokenizer='baichuan-inc/Baichuan2-13B-Chat', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.float16, max_seq_len=4096, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=4, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), seed=0, served_model_name=baichuan-inc/Baichuan2-13B-Chat)


DeferredCudaCallError: CUDA call failed lazily at initialization with error: device >= 0 && device < num_gpus INTERNAL ASSERT FAILED at "../aten/src/ATen/cuda/CUDAContext.cpp":50, please report a bug to PyTorch. device=, num_gpus=

CUDA call was originally invoked at:

  File "/home/ubuntu/agent_testing/.conda/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/ubuntu/agent_testing/.conda/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/ubuntu/.local/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/home/ubuntu/.local/lib/python3.10/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/home/ubuntu/.local/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 739, in start
    self.io_loop.start()
  File "/home/ubuntu/.local/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 205, in start
    self.asyncio_loop.run_forever()
  File "/home/ubuntu/agent_testing/.conda/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
    self._run_once()
  File "/home/ubuntu/agent_testing/.conda/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once
    handle._run()
  File "/home/ubuntu/agent_testing/.conda/lib/python3.10/asyncio/events.py", line 80, in _run
    self._context.run(self._callback, *self._args)
  File "/home/ubuntu/.local/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 545, in dispatch_queue
    await self.process_one()
  File "/home/ubuntu/.local/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 534, in process_one
    await dispatch(*args)
  File "/home/ubuntu/.local/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 437, in dispatch_shell
    await result
  File "/home/ubuntu/.local/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 362, in execute_request
    await super().execute_request(stream, ident, parent)
  File "/home/ubuntu/.local/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 778, in execute_request
    reply_content = await reply_content
  File "/home/ubuntu/.local/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 449, in do_execute
    res = shell.run_cell(
  File "/home/ubuntu/.local/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 549, in run_cell
    return super().run_cell(*args, **kwargs)
  File "/home/ubuntu/.local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3075, in run_cell
    result = self._run_cell(
  File "/home/ubuntu/.local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3130, in _run_cell
    result = runner(coro)
  File "/home/ubuntu/.local/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
    coro.send(None)
  File "/home/ubuntu/.local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3334, in run_cell_async
    has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
  File "/home/ubuntu/.local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3517, in run_ast_nodes
    if await self.run_code(code, result, async_=asy):
  File "/home/ubuntu/.local/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3577, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_27318/2183081615.py", line 2, in <module>
    vectorstore=get_vectorstore(text_chunks)
  File "/tmp/ipykernel_27318/1057278163.py", line 3, in get_vectorstore
    embeddings=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
  File "/home/ubuntu/agent_testing/.conda/lib/python3.10/site-packages/langchain_core/_api/deprecation.py", line 203, in warn_if_direct_instance
    return wrapped(self, *args, **kwargs)
  File "/home/ubuntu/agent_testing/.conda/lib/python3.10/site-packages/langchain_community/embeddings/huggingface.py", line 71, in __init__
    import sentence_transformers
  File "<frozen importlib._bootstrap>", line 1027, in _find_and_load
  File "<frozen importlib._bootstrap>", line 1006, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 688, in _load_unlocked
  File "<frozen importlib._bootstrap_external>", line 883, in exec_module
  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
  File "/home/ubuntu/agent_testing/.conda/lib/python3.10/site-packages/sentence_transformers/__init__.py", line 7, in <module>
    from sentence_transformers.cross_encoder.CrossEncoder import CrossEncoder
  File "<frozen importlib._bootstrap>", line 1027, in _find_and_load
  File "<frozen importlib._bootstrap>", line 992, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
  File "<frozen importlib._bootstrap>", line 1027, in _find_and_load
  File "<frozen importlib._bootstrap>", line 1006, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 688, in _load_unlocked
  File "<frozen importlib._bootstrap_external>", line 883, in exec_module
  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
  File "/home/ubuntu/agent_testing/.conda/lib/python3.10/site-packages/sentence_transformers/cross_encoder/__init__.py", line 1, in <module>
    from .CrossEncoder import CrossEncoder
  File "<frozen importlib._bootstrap>", line 1027, in _find_and_load
  File "<frozen importlib._bootstrap>", line 1006, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 688, in _load_unlocked
  File "<frozen importlib._bootstrap_external>", line 883, in exec_module
  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
  File "/home/ubuntu/agent_testing/.conda/lib/python3.10/site-packages/sentence_transformers/cross_encoder/CrossEncoder.py", line 7, in <module>
    import torch
  File "<frozen importlib._bootstrap>", line 1027, in _find_and_load
  File "<frozen importlib._bootstrap>", line 1006, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 688, in _load_unlocked
  File "<frozen importlib._bootstrap_external>", line 883, in exec_module
  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
  File "/home/ubuntu/agent_testing/.conda/lib/python3.10/site-packages/torch/__init__.py", line 1478, in <module>
    _C._initExtension(manager_path())
  File "<frozen importlib._bootstrap>", line 1027, in _find_and_load
  File "<frozen importlib._bootstrap>", line 1006, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 688, in _load_unlocked
  File "<frozen importlib._bootstrap_external>", line 883, in exec_module
  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
  File "/home/ubuntu/agent_testing/.conda/lib/python3.10/site-packages/torch/cuda/__init__.py", line 238, in <module>
    _lazy_call(_check_capability)
  File "/home/ubuntu/agent_testing/.conda/lib/python3.10/site-packages/torch/cuda/__init__.py", line 235, in _lazy_call
    _queued_calls.append((callable, traceback.format_stack()))


[1;36m(VllmWorkerProcess pid=31361)[0;0m INFO 08-01 20:50:41 selector.py:131] Cannot use FlashAttention-2 backend for Volta and Turing GPUs.
[1;36m(VllmWorkerProcess pid=31361)[0;0m INFO 08-01 20:50:41 selector.py:51] Using XFormers backend.
[1;36m(VllmWorkerProcess pid=31362)[0;0m INFO 08-01 20:50:41 selector.py:131] Cannot use FlashAttention-2 backend for Volta and Turing GPUs.
[1;36m(VllmWorkerProcess pid=31362)[0;0m INFO 08-01 20:50:41 selector.py:51] Using XFormers backend.
[1;36m(VllmWorkerProcess pid=31360)[0;0m INFO 08-01 20:50:41 selector.py:131] Cannot use FlashAttention-2 backend for Volta and Turing GPUs.
[1;36m(VllmWorkerProcess pid=31360)[0;0m INFO 08-01 20:50:41 selector.py:51] Using XFormers backend.
[1;36m(VllmWorkerProcess pid=31361)[0;0m INFO 08-01 20:50:42 multiproc_worker_utils.py:215] Worker ready; awaiting tasks
[1;36m(VllmWorkerProcess pid=31362)[0;0m INFO 08-01 20:50:42 multiproc_worker_utils.py:215] Worker ready; awaiting tasks
[1;36m(VllmWork

In [11]:
handle_question(conversation_chain, "The FFP includes hours to what?")

Processed prompts: 100%|██████████| 1/1 [00:00<00:00,  3.41it/s, est. speed input: 339.20 toks/s, output: 54.81 toks/s]
Processed prompts: 100%|██████████| 1/1 [00:00<00:00,  1.47it/s, est. speed input: 365.96 toks/s, output: 63.45 toks/s]

User: where will ARA travel to for CBOA 2024?
Bot: 
ARA will travel to Camp Lejeune, North Carolina for the CBOA 2024 event.
User: The FFP includes hours to what?
Bot: 
To answer this question, you would need to look at the FFP for the entire project, which includes both phases. The quote states that the total FFP is $14,400.





In [10]:
handle_question(conversation_chain, "what is the scope of work for video production for FAM-S?")

Processed prompts: 100%|██████████| 1/1 [00:00<00:00,  3.99it/s, est. speed input: 421.16 toks/s, output: 52.14 toks/s]
Processed prompts: 100%|██████████| 1/1 [00:00<00:00,  1.09it/s, est. speed input: 248.73 toks/s, output: 64.65 toks/s]

User: where will ARA travel to for CBOA 2024?
Bot: 
ARA will travel to Camp Lejeune, North Carolina for the CBOA 2024 event.
User: what is the scope of work for video production for FAM-S?
Bot:  The scope of video production for FAM-S is to create a quality training video that covers the basic operations of the system, including maintenance/cleaning, and troubleshooting. The video will be approximately 10 minutes in duration and will feature video shots, graphics, and audio elements.





[1;36m(VllmWorkerProcess pid=52092)[0;0m ERROR 07-09 19:11:16 multiproc_worker_utils.py:226] Exception in worker VllmWorkerProcess while processing method start_worker_execution_loop: [../third_party/gloo/gloo/transport/tcp/unbound_buffer.cc:81] Timed out waiting 1800000ms for recv operation to complete, Traceback (most recent call last):
[1;36m(VllmWorkerProcess pid=52092)[0;0m ERROR 07-09 19:11:16 multiproc_worker_utils.py:226]   File "/home/ubuntu/agent_testing/.conda/lib/python3.10/site-packages/vllm/executor/multiproc_worker_utils.py", line 223, in _run_worker_process
[1;36m(VllmWorkerProcess pid=52092)[0;0m ERROR 07-09 19:11:16 multiproc_worker_utils.py:226]     output = executor(*args, **kwargs)
[1;36m(VllmWorkerProcess pid=52092)[0;0m ERROR 07-09 19:11:16 multiproc_worker_utils.py:226]   File "/home/ubuntu/agent_testing/.conda/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
[1;36m(VllmWorkerProcess pid=52092)[0;0m ERROR 07-09 19

In [13]:
handle_question(conversation_chain, "wat does each module of the FAM-S training video consist of?")

Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

RuntimeError: [../third_party/gloo/gloo/transport/tcp/pair.cc:534] Connection closed by peer [172.30.3.201]:8087

In [None]:

def main():
    load_dotenv()
    print("Chat with multiple PDFs")

    pdf_paths = input("Enter paths to your PDFs (comma separated): ").split(',')
    question = input("Ask a question from your document: ")
    
    if pdf_paths and question:
        docs = [path.strip() for path in pdf_paths]
        
        # Get the PDF text
        raw_text = get_pdf_text(docs)
        
        # Get the text chunks
        text_chunks = get_chunks(raw_text)
        
        # Create vector store
        vectorstore = get_vectorstore(text_chunks)
        
        # Create conversation chain
        conversation_chain = get_conversationchain(vectorstore)
        
        # Handle the question
        handle_question(conversation_chain, question)

if __name__ == '__main__':
    main()

In [None]:
from langchain_community.llms import VLLM
from langchain.chains import LLMChain
from langchain_core.prompts import PromptTemplate
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

# Step 1: Set up the LLM and Vector Store
llm = VLLM(
    model="baichuan-inc/Baichuan2-13B-Chat",
    trust_remote_code=True,
    max_new_tokens=512,
    top_k=20,
    top_p=0.8,
    temperature=0.8,
    dtype="float16",
    tensor_parallel_size=8
)

# Define the PromptTemplate for extracting claims
claim_prompt = PromptTemplate(
    input_variables=["text"],
    template="""
    Identify the main claims and conclusions from the following text:
    {text}
    """
)

# Define a function to extract claims using LLMChain
def extract_claims(paragraphs):
    chain = LLMChain(llm=llm, prompt=claim_prompt)
    claims = [chain.run({"text": para}) for para in paragraphs]
    return claims

# Step 2: Identify claims and conclusions in the text chunks
# Example chunks (replace with actual text chunks)
introduction_chunks = ["..."]  # Add introduction paragraphs here
related_work_chunks = ["..."]  # Add related work paragraphs here
conclusion_chunks = ["..."]  # Add conclusion paragraphs here

# Extract claims
intro_claims = extract_claims(introduction_chunks)
related_work_claims = extract_claims(related_work_chunks)
conclusion_claims = extract_claims(conclusion_chunks)

# Step 3: Summarize and Compile the Literature Survey
def summarize_claims(claims, section_name):
    summary_template = PromptTemplate(
        input_variables=["claims"],
        template="""
        Summarize the following claims from the {section_name} section in a concise manner:
        {claims}
        """
    )
    chain = LLMChain(llm=llm, prompt=summary_template)
    summary = chain.run({"claims": " ".join(claims), "section_name": section_name})
    return summary

intro_summary = summarize_claims(intro_claims, "Introduction")
related_work_summary = summarize_claims(related_work_claims, "Related Work")
conclusion_summary = summarize_claims(conclusion_claims, "Conclusion")

# Compile the final literature survey report
literature_survey = f"""
Literature Survey Report

Introduction:
{intro_summary}

Related Work:
{related_work_summary}

Conclusion:
{conclusion_summary}
"""

print(literature_survey)


In [1]:
from langchain_community.llms import VLLM
from langchain.chains import LLMChain
from langchain_core.prompts import PromptTemplate

# Step 1: Set up the LLM
llm = VLLM(
    model="baichuan-inc/Baichuan2-13B-Chat",
    trust_remote_code=True,
    max_new_tokens=512,
    top_k=20,
    top_p=0.8,
    temperature=0.8,
    dtype="float16",
    tensor_parallel_size=8
)

# Define the PromptTemplate for extracting claims
claim_prompt = PromptTemplate(
    input_variables=["text"],
    template="""
    Identify the main claims and conclusions that are the most important from the following text. Do not use any of your own wording just simple extract the main claims and conclusions made or any opinion by the paper:
    {text}
    """
)

# Define the PromptTemplate for summarizing claims
summary_prompt = PromptTemplate(
    input_variables=["claims"],
    template="""
    Summarize the following claims in a concise manner:
    {claims}
    """
)

# Function to extract claims using LLMChain
def extract_claims(paragraph):
    chain = LLMChain(llm=llm, prompt=claim_prompt)
    return chain.run({"text": paragraph})

# Function to summarize claims using LLMChain
def summarize_claims(claims):
    chain = LLMChain(llm=llm, prompt=summary_prompt)
    return chain.run({"claims": " ".join(claims)})

# Step 1: Extract claims from each paragraph
def process_chunks(chunks):
    claims = []
    for paragraph in chunks:
        claims.append(extract_claims(paragraph))
    return claims

# Example chunks (replace with actual text chunks)
introduction_chunks = ["""Should we need references for the evaluation of image captions? After all, when humans assess the appropriateness of an image caption, we do so just
by looking at the image and reading the candidate’s text.1 See Elliott and Keller (2014) and Kilickaya et al. (2017)for thorough comparisons of caption generation metrics.
arXiv:2104.08718v3 [cs.CV] 23 Mar 2022A recent trend in machine translation serves asinspiration: there, a key hurdle for reference-free
evaluation (sometimes called quality estimation) has been estimating cross-lingual similarity between source+candidate pairs (Blatz et al., 2004;
Specia et al., 2010; Mehdad et al., 2012; Specia and Shah, 2018). But recent work (Lo, 2019; Yankovskaya et al., 2019; Zhao et al., 2020) has
improved correlation with human judgment not by gathering more monolingual references, but instead by utilizing cross-lingual representations learned
by large-scale, pre-trained, multilingual models e.g., LASER (Artetxe and Schwenk, 2019) or MBERT (Devlin et al., 2019).""", """We hypothesize that the relationships learned by
pretrained vision+language models (e.g., ALIGN (Jia et al., 2021) and CLIP (Radford et al., 2021)) could similarly support reference-free evaluation
in the image captioning case. Indeed, they can: we show that a relatively direct application of CLIP to (image, generated caption) pairs results in surprisingly high correlation with human judgments
on a suite of standard image description benchmarks (e.g., MSCOCO (Lin et al., 2014)). We call this process CLIPScore (abbreviated to CLIP-S).
Beyond direct correlation with human judgments, an information gain analysis reveals that CLIP-S is complementary both to commonly reported metrics
(like BLEU-4, SPICE, and CIDEr) and to newly proposed reference-base"""]  # Replace with actual paragraphs
related_work_chunks = ["""Reference-only image caption evaluation In general, image caption generation models are evaluated by a suite of 5 reference based metrics:
BLEU-4 (Papineni et al., 2002) (which measures a version of precision between a candidate and the references), ROUGE-L (Lin, 2004) (which measures a version of recall), METEOR (Banerjee and
Lavie, 2005) (which computes a word-level alignment), CIDEr (Vedantam et al., 2015) (which combines n-gram tf-idf weighting and stemming) and
SPICE (Anderson et al., 2016) (which applies a semantic parser to a set of references, and computes similarity using the predicted scene graph).3
Yi et al. (2020) give a method for re-weighting BERTScore (Zhang et al., 2020) specifically tuned to the image caption generation domain (we refer
to their method as BERT-S++)"""]  # Replace with actual paragraphs
conclusion_chunks = ["""For literal image description tasks, CLIPScore
achieves high correlation with human judgments
of caption quality without references when used in
an off-the-shelf fashion. Additional experiments
in divergent domains suggest that CLIP can also
reason about non-photographic clip-art, and serves
as a reasonable option for reference-free evaluation
in the alt-text case. Promising future work includes
exploring 1) CLIP-S as a reinforcement learning reward for literal caption generators; and 2) whether
a small amount of labelled human rating data could
help CLIP-S adapt to domains where it struggles,
e.g., engagingness prediction. We hope our work
can contribute to the ongoing discussion about the
role of pretrained models in generation evaluation.
""", """Reference-free evaluation runs some risks.
Much like BERTScore, model-based metrics like
CLIP-S reflect the biases of the pre-training data.
While we believe that using CLIP-S as an offline
evaluation metric for literal caption quality accords
with the recommendations of CLIP’s model card18
(Mitchell et al., 2019), Agarwal et al. (2021)’s
study demonstrates that CLIP can make disproportionate incorrect classifications of people, e.g.,
“male images were misclassified into classes related to crime.” Exploring potential social biases of
candidate generations (as in, e.g., Hendricks et al.
(2018)) remains paramount, particularly if a system
is to be deployed."""]  # Replace with actual paragraphs

intro_claims = process_chunks(introduction_chunks)
related_work_claims = process_chunks(related_work_chunks)
conclusion_claims = process_chunks(conclusion_chunks)

# Step 2: Summarize each set of claims
def summarize_chunks(claims):
    summaries = []
    for claim in claims:
        summaries.append(summarize_claims(claim))
    return summaries

intro_summaries = summarize_chunks(intro_claims)
related_work_summaries = summarize_chunks(related_work_claims)
conclusion_summaries = summarize_chunks(conclusion_claims)

# Step 3: Concatenate summaries to form the final literature survey
final_intro_summary = " ".join(intro_summaries)
final_related_work_summary = " ".join(related_work_summaries)
final_conclusion_summary = " ".join(conclusion_summaries)

# Compile the final literature survey report
literature_survey = f"""
Literature Survey Report

Introduction:
{final_intro_summary}

Related Work:
{final_related_work_summary}

Conclusion:
{final_conclusion_summary}
"""

print(literature_survey)



2024-07-21 21:44:09,252	INFO worker.py:1770 -- Started a local Ray instance.


INFO 07-21 21:44:10 config.py:623] Defaulting to use mp for distributed inference
INFO 07-21 21:44:10 llm_engine.py:161] Initializing an LLM engine (v0.5.0.post1) with config: model='baichuan-inc/Baichuan2-13B-Chat', speculative_config=None, tokenizer='baichuan-inc/Baichuan2-13B-Chat', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.float16, max_seq_len=4096, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=8, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), seed=0, served_model_name=baichuan-inc/Baichuan2-13B-Chat)
INFO 07-21 21:44:11 selector.py:131] Cannot use FlashAttention-2 backend for Volta and Turing GPUs.
INFO 07-21 21:44:11 selector.py:51] Using XFormers backend.
[1;36m(VllmWorkerProce

Traceback (most recent call last):
  File "/home/ubuntu/agent_testing/.conda/lib/python3.10/multiprocessing/resource_tracker.py", line 209, in main
    cache[rtype].remove(name)
KeyError: '/psm_cbb87664'
Traceback (most recent call last):
  File "/home/ubuntu/agent_testing/.conda/lib/python3.10/multiprocessing/resource_tracker.py", line 209, in main
    cache[rtype].remove(name)
KeyError: '/psm_cbb87664'
Traceback (most recent call last):
  File "/home/ubuntu/agent_testing/.conda/lib/python3.10/multiprocessing/resource_tracker.py", line 209, in main
    cache[rtype].remove(name)
KeyError: '/psm_cbb87664'
Traceback (most recent call last):
  File "/home/ubuntu/agent_testing/.conda/lib/python3.10/multiprocessing/resource_tracker.py", line 209, in main
    cache[rtype].remove(name)
KeyError: '/psm_cbb87664'
Traceback (most recent call last):
  File "/home/ubuntu/agent_testing/.conda/lib/python3.10/multiprocessing/resource_tracker.py", line 209, in main
    cache[rtype].remove(name)
KeyErr

[1;36m(VllmWorkerProcess pid=40139)[0;0m INFO 07-21 21:44:18 selector.py:131] Cannot use FlashAttention-2 backend for Volta and Turing GPUs.
[1;36m(VllmWorkerProcess pid=40139)[0;0m INFO 07-21 21:44:18 selector.py:51] Using XFormers backend.
[1;36m(VllmWorkerProcess pid=40140)[0;0m INFO 07-21 21:44:18 selector.py:131] Cannot use FlashAttention-2 backend for Volta and Turing GPUs.
[1;36m(VllmWorkerProcess pid=40140)[0;0m INFO 07-21 21:44:18 selector.py:51] Using XFormers backend.
[1;36m(VllmWorkerProcess pid=40145)[0;0m INFO 07-21 21:44:18 selector.py:131] Cannot use FlashAttention-2 backend for Volta and Turing GPUs.
[1;36m(VllmWorkerProcess pid=40145)[0;0m INFO 07-21 21:44:18 selector.py:51] Using XFormers backend.
[1;36m(VllmWorkerProcess pid=40141)[0;0m INFO 07-21 21:44:18 selector.py:131] Cannot use FlashAttention-2 backend for Volta and Turing GPUs.
[1;36m(VllmWorkerProcess pid=40141)[0;0m INFO 07-21 21:44:18 selector.py:51] Using XFormers backend.
[1;36m(VllmWork

In [3]:
conclusion_claims = process_chunks(conclusion_chunks)

  warn_deprecated(
  warn_deprecated(
Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processed prompts: 100%|██████████| 1/1 [00:02<00:00,  2.20s/it, est. speed input: 97.64 toks/s, output: 69.48 toks/s]
Processed prompts: 100%|██████████| 1/1 [00:03<00:00,  3.77s/it, est. speed input: 59.71 toks/s, output: 70.85 toks/s]


In [4]:
conclusion_claims

['Main Claims and Conclusions:\n    1. CLIPScore achieves high correlation with human judgments of caption quality without references when used in an off-the-shelf fashion for literal image description tasks.\n    2. CLIP can also reason about non-photographic clip-art, making it a reasonable option for reference-free evaluation in the alt-text case.\n    3. Promising future work includes exploring the use of CLIP-S as a reinforcement learning reward for literal caption generators and whether a small amount of labelled human rating data could help CLIP-S adapt to domains where it struggles.\n    4. The authors hope their work can contribute to the ongoing discussion about the role of pretrained models in generation evaluation.',
 'We encourage researchers to continue exploring\nand comparing metrics for this purpose.\n    The authors acknowledge that CLIP-S may also be\nbiased, and we call for further research into the biases\nof CLIP-S and other metrics.\n\n    In conclusion, CLIP-S c

In [2]:
conclusion_chunks = ["""For literal image description tasks, CLIPScore
achieves high correlation with human judgments
of caption quality without references when used in
an off-the-shelf fashion. Additional experiments
in divergent domains suggest that CLIP can also
reason about non-photographic clip-art, and serves
as a reasonable option for reference-free evaluation
in the alt-text case. Promising future work includes
exploring 1) CLIP-S as a reinforcement learning reward for literal caption generators; and 2) whether
a small amount of labelled human rating data could
help CLIP-S adapt to domains where it struggles,
e.g., engagingness prediction. We hope our work
can contribute to the ongoing discussion about the
role of pretrained models in generation evaluation.
""", """Reference-free evaluation runs some risks.
Much like BERTScore, model-based metrics like
CLIP-S reflect the biases of the pre-training data.
While we believe that using CLIP-S as an offline
evaluation metric for literal caption quality accords
with the recommendations of CLIP’s model card18
(Mitchell et al., 2019), Agarwal et al. (2021)’s
study demonstrates that CLIP can make disproportionate incorrect classifications of people, e.g.,
“male images were misclassified into classes related to crime.” Exploring potential social biases of
candidate generations (as in, e.g., Hendricks et al.
(2018)) remains paramount, particularly if a system
is to be deployed."""]  

In [7]:
# Step 2: Summarize each set of claims
def summarize_chunks(claims):
    summaries = []
    for claim in claims:
        summaries.append(summarize_claims(claim))
    return summaries


conclusion_summaries = summarize_chunks(conclusion_claims)


final_conclusion_summary = " ".join(conclusion_summaries)

print(final_conclusion_summary)

Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Processed prompts: 100%|██████████| 1/1 [00:07<00:00,  7.29s/it, est. speed input: 106.41 toks/s, output: 70.21 toks/s]
Processed prompts: 100%|██████████| 1/1 [00:01<00:00,  1.41s/it, est. speed input: 888.20 toks/s, output: 64.82 toks/s]


    Summarize the following claims in a consise and small manner. do not add anything that might not be relevant for summarizing this into a literature survey:
    M a i n   C l a i m s   a n d   C o n c l u s i o n s : 
         1 .   C L I P S c o r e   a c h i e v e s   h i g h   c o r r e l a t i o n   w i t h   h u m a n   j u d g m e n t s   o f   c a p t i o n   q u a l i t y   w i t h o u t   r e f e r e n c e s   w h e n   u s e d   i n   a n   o f f - t h e - s h e l f   f a s h i o f i o n   f o r   l i t e r a l   i m a g e   d e s c r i p t i o n   t a s k s . 
         2 .   C L I P   c a n   a l s o   r e a s o n a b o u t   n o n - p h o t o g r a p h i c   c l i p - a r t ,   m a k i n g   i t   a   r e a s o n a b l e   o p t i o n   f o r   r e f e r e n c e - f r e e   e v a l u a t i o n   i n   t h e   a l t - t e x t   c a s e . 
         3 .   P r o m i s i n g   f u t u r e   w o r k   i n c l u d e s   e x p l o r i n g   t h e   u s e   o f   C L I P - S   a




In [6]:
summary_prompt = PromptTemplate(
    input_variables=["claims"],
    template="""
    Summarize the following claims in a consise and small manner. do not add anything that might not be relevant for summarizing this into a literature survey:
    {claims}
    """
)

# Function to summarize claims using LLMChain
def summarize_claims(claims):
    chain = LLMChain(llm=llm, prompt=summary_prompt)
    return chain.run({"claims": " ".join(claims)})

In [1]:
import os

def count_pdfs_in_folder(folder_path):
    # List all files in the specified folder
    files = os.listdir(folder_path)
    # Count the number of PDF files
    pdf_count = sum(1 for file in files if file.lower().endswith('.pdf'))
    return pdf_count

# Specify the path to your folder
folder_path = 'pdf_proposals'

# Get the count of PDF files
pdf_count = count_pdfs_in_folder(folder_path)
print(f'There are {pdf_count} PDF files in the folder.')


There are 415 PDF files in the folder.


In [1]:
from langchain_community.llms import VLLM
from langchain.chains import LLMChain
from langchain_core.prompts import PromptTemplate

def get_conversation_chain():
    llm = VLLM(
        model="baichuan-inc/Baichuan2-13B-Chat",
        trust_remote_code=True,  # mandatory for hf models
        max_new_tokens=4096,
        top_k=20,
        top_p=0.8,
        temperature=0.8,
        dtype="float16",
        tensor_parallel_size=8
    )
    
    # Define the prompt template
    prompt_template = PromptTemplate(
        input_variables=["question"],
        template="""
        Given the question, generate a conversational response.

        Question: {question}
        
        Response:
        """
    )
    
    # Create the LLMChain
    conversation_chain = LLMChain(
        llm=llm,
        prompt=prompt_template
    )
    
    return conversation_chain

# Example usage
# Initialize the conversation chain
chain = get_conversation_chain()

# Ask a simple question
question = "What is the significance of cake in bakery?"
response = chain.run(question=question)
print(response)




2024-08-01 22:46:54,798	INFO worker.py:1770 -- Started a local Ray instance.


INFO 08-01 22:46:55 config.py:623] Defaulting to use mp for distributed inference
INFO 08-01 22:46:55 llm_engine.py:161] Initializing an LLM engine (v0.5.0.post1) with config: model='baichuan-inc/Baichuan2-13B-Chat', speculative_config=None, tokenizer='baichuan-inc/Baichuan2-13B-Chat', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.float16, max_seq_len=4096, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=8, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), seed=0, served_model_name=baichuan-inc/Baichuan2-13B-Chat)
INFO 08-01 22:46:57 selector.py:131] Cannot use FlashAttention-2 backend for Volta and Turing GPUs.
INFO 08-01 22:46:57 selector.py:51] Using XFormers backend.
[1;36m(VllmWorkerProce

Traceback (most recent call last):
  File "/home/ubuntu/agent_testing/.conda/lib/python3.10/multiprocessing/resource_tracker.py", line 209, in main
    cache[rtype].remove(name)
KeyError: '/psm_e26434bd'
Traceback (most recent call last):
  File "/home/ubuntu/agent_testing/.conda/lib/python3.10/multiprocessing/resource_tracker.py", line 209, in main
    cache[rtype].remove(name)
KeyError: '/psm_e26434bd'
Traceback (most recent call last):
  File "/home/ubuntu/agent_testing/.conda/lib/python3.10/multiprocessing/resource_tracker.py", line 209, in main
    cache[rtype].remove(name)
KeyError: '/psm_e26434bd'
Traceback (most recent call last):
  File "/home/ubuntu/agent_testing/.conda/lib/python3.10/multiprocessing/resource_tracker.py", line 209, in main
    cache[rtype].remove(name)
KeyError: '/psm_e26434bd'
Traceback (most recent call last):
  File "/home/ubuntu/agent_testing/.conda/lib/python3.10/multiprocessing/resource_tracker.py", line 209, in main
    cache[rtype].remove(name)
KeyErr

[1;36m(VllmWorkerProcess pid=318967)[0;0m INFO 08-01 22:47:03 selector.py:131] Cannot use FlashAttention-2 backend for Volta and Turing GPUs.
[1;36m(VllmWorkerProcess pid=318967)[0;0m INFO 08-01 22:47:03 selector.py:51] Using XFormers backend.
[1;36m(VllmWorkerProcess pid=318966)[0;0m INFO 08-01 22:47:04 weight_utils.py:218] Using model weights format ['*.bin']
[1;36m(VllmWorkerProcess pid=318965)[0;0m INFO 08-01 22:47:04 weight_utils.py:218] Using model weights format ['*.bin']
[1;36m(VllmWorkerProcess pid=318969)[0;0m INFO 08-01 22:47:04 weight_utils.py:218] Using model weights format ['*.bin']
[1;36m(VllmWorkerProcess pid=318970)[0;0m INFO 08-01 22:47:04 weight_utils.py:218] Using model weights format ['*.bin']
INFO 08-01 22:47:04 weight_utils.py:218] Using model weights format ['*.bin']
[1;36m(VllmWorkerProcess pid=318968)[0;0m INFO 08-01 22:47:04 weight_utils.py:218] Using model weights format ['*.bin']
[1;36m(VllmWorkerProcess pid=318971)[0;0m INFO 08-01 22:47:04 

  warn_deprecated(
  warn_deprecated(
Processed prompts: 100%|██████████| 1/1 [00:01<00:00,  1.34s/it, est. speed input: 25.35 toks/s, output: 70.08 toks/s]

Cake is an essential part of any bakery because it is a delicious and popular dessert. It not only satisfies customers' sweet tooth but also adds variety to the bakery's menu. Cake can be customized to different flavors, textures, and decorations, allowing bakeries to cater to various preferences and occasions. Additionally, cake is a great way to showcase the baker's creativity and skill, making it a significant aspect of any bakery's success.





In [3]:
paragraph = "Automatic evaluation of natural language generation, for example in machine translation and caption\ngeneration, requires comparing candidate sentences to annotated references. The goal is to evaluate\nsemantic equivalence. However, commonly used methods rely on surface-form similarity only. For\nexample, BLEU (Papineni et al., 2002), the most common machine translation metric, simply counts\nn-gram overlap between the candidate and the reference. While this provides a simple and general\nmeasure, it fails to account for meaning-preserving lexical and compositional diversity."
paragraph = "Future research is needed to explore this further."
template="Evaluate the following paragraph to determine if it contains a conclusion, claim, or opinion that is substantiated and can contribute to a literature review discussing the pros and cons of this method. NOTE - Exclude any sections that are purely methodological or lack substantiation. Be extremely strict with these requirements. Use the examples provided as a guide. Answer with 'yes' only if confident in its usefulness, otherwise 'no'. Only respond with 'yes' or 'no':\n\n"+paragraph+"\n\nExamples of valid content: \n1. 'This method significantly improves accuracy compared to previous approaches.'\n2. 'The technique offers a novel perspective that challenges conventional theories.'\n\nExamples of invalid content:\n1. 'The methodology involved multiple regression analyses.'\n2. 'Future research is needed to explore this further.'\n\nAnswer:"
response = chain.run(question=template)
print(response)

Processed prompts: 100%|██████████| 1/1 [00:00<00:00,  6.07it/s, est. speed input: 1281.96 toks/s, output: 36.62 toks/s]


        'no'



