In [27]:
import numpy as np
import pandas as pd

from langchain_community.document_loaders.dataframe import DataFrameLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings

import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM,  BitsAndBytesConfig
import torch

import os
import dotenv
dotenv.load_dotenv()


True

In [2]:
# Read data
path = './data/acl_main_20102024.pkl'
data_acl = pd.read_pickle(path)

print(data_acl.shape)
data_acl.head()

(7509, 9)


Unnamed: 0,titel,header,abstract,main_body,references,text,year,pages,conference
0,Learning Disentangled Representations of Negat...,Proceedings of the 60th Annual Meeting of the ...,Abstract\nNegation and uncertainty modeling ar...,"1 Introduction\nIn formal semantics, negation...",\nReferences\nHeike Adel and Hinrich Schütze. ...,Abstract\nNegation and uncertainty modeling a...,2022,18,acl
1,Explanation Graph Generation via Pre-trained L...,Proceedings of the 60th Annual Meeting of the ...,Abstract\nPre-trained sequence-to-sequence lan...,1 Introduction\nPre-trained sequence-to-seque...,"\nReferences\nYonatan Bisk, Rowan Zellers, Jia...",Abstract\nPre-trained sequence-to-sequence la...,2022,19,acl
2,Updated Headline Generation Creating Updated S...,Proceedings of the 60th Annual Meeting of the ...,Abstract\nWe propose the task of updated headl...,1 Introduction\nAutomatic text summarization ...,"reference, we also eval-\nuate the gold headl...",Abstract\nWe propose the task of updated head...,2022,24,acl
3,Conditional Bilingual Mutual Information Based...,Proceedings of the 60th Annual Meeting of the ...,Abstract\nToken-level adaptive training approa...,1 Introduction\nNeural machine translation (N...,"\nReferences\nDzmitry Bahdanau, Kyunghyun Cho,...",Abstract\nToken-level adaptive training appro...,2022,13,acl
4,ReCLIP A Strong Zero-Shot Baseline for Referri...,Proceedings of the 60th Annual Meeting of the ...,Abstract\nTraining a referring expression comp...,1 Introduction\nVisual referring expression c...,references the\ncolor in the text prompt to s...,Abstract\nTraining a referring expression com...,2022,18,acl


In [3]:
# Read data
path = './data/emnlp_main_20102024.pkl'
data_emnlp = pd.read_pickle(path)

print(data_emnlp.shape)
data_emnlp.head()

(7706, 9)


Unnamed: 0,titel,header,abstract,main_body,references,text,year,pages,conference
0,Generating Summaries with Controllable Readabi...,Proceedings of the 2023 Conference on Empirica...,Abstract\nReadability refers to how easily a r...,1 Introduction\nSummaries convey salient piec...,"reference summary; that way,\nthe model can l...",Abstract\nReadability refers to how easily a ...,2023,19,emnlp
1,"AMR Parsing is Far from Solved GrAPES, the Gra...",Proceedings of the 2023 Conference on Empirica...,Abstract\nWe present the Granular AMR Parsing ...,1 Introduction\nAbstract Meaning Representati...,references for each possible\nreading.\nEthic...,Abstract\nWe present the Granular AMR Parsing...,2023,25,emnlp
2,ViT-TTS Visual Text-to-Speech with Scalable Di...,Proceedings of the 2023 Conference on Empirica...,Abstract\nText-to-speech(TTS) has undergone re...,1 Introduction\nText-to-speech (TTS) (Ren et ...,reference samples in terms of\nsemantic meani...,Abstract\nText-to-speech(TTS) has undergone r...,2023,13,emnlp
3,XLM-V Overcoming the Vocabulary Bottleneck in ...,Proceedings of the 2023 Conference on Empirica...,Abstract\nLarge multilingual language models t...,1 Introduction\nWhile multilingual language m...,"\nReferences\nDavid Ifeoluwa Adelani, Jade Abb...",Abstract\nLarge multilingual language models ...,2023,11,emnlp
4,"FAME Flexible, Scalable Analogy Mappings Engine",Proceedings of the 2023 Conference on Empirica...,Abstract\nAnalogy is one of the core capacitie...,1 Introduction\nOne of the pinnacles of human...,\nReferences\nCarl Allen and Timothy Hospedale...,Abstract\nAnalogy is one of the core capaciti...,2023,17,emnlp


In [4]:
# Combine dataframes
data = pd.concat([data_acl, data_emnlp])
for col in data.columns:
    try:
        data[col] = data[col].str.encode('utf-8', errors='ignore').str.decode('utf-8')
    except:
        continue

data.shape

(15215, 9)

In [5]:
data.head()

Unnamed: 0,titel,header,abstract,main_body,references,text,year,pages,conference
0,Learning Disentangled Representations of Negat...,Proceedings of the 60th Annual Meeting of the ...,Abstract\nNegation and uncertainty modeling ar...,"1 Introduction\nIn formal semantics, negation...",\nReferences\nHeike Adel and Hinrich Schütze. ...,Abstract\nNegation and uncertainty modeling a...,2022,18,acl
1,Explanation Graph Generation via Pre-trained L...,Proceedings of the 60th Annual Meeting of the ...,Abstract\nPre-trained sequence-to-sequence lan...,1 Introduction\nPre-trained sequence-to-seque...,"\nReferences\nYonatan Bisk, Rowan Zellers, Jia...",Abstract\nPre-trained sequence-to-sequence la...,2022,19,acl
2,Updated Headline Generation Creating Updated S...,Proceedings of the 60th Annual Meeting of the ...,Abstract\nWe propose the task of updated headl...,1 Introduction\nAutomatic text summarization ...,"reference, we also eval-\nuate the gold headl...",Abstract\nWe propose the task of updated head...,2022,24,acl
3,Conditional Bilingual Mutual Information Based...,Proceedings of the 60th Annual Meeting of the ...,Abstract\nToken-level adaptive training approa...,1 Introduction\nNeural machine translation (N...,"\nReferences\nDzmitry Bahdanau, Kyunghyun Cho,...",Abstract\nToken-level adaptive training appro...,2022,13,acl
4,ReCLIP A Strong Zero-Shot Baseline for Referri...,Proceedings of the 60th Annual Meeting of the ...,Abstract\nTraining a referring expression comp...,1 Introduction\nVisual referring expression c...,references the\ncolor in the text prompt to s...,Abstract\nTraining a referring expression com...,2022,18,acl


In [6]:
loader = DataFrameLoader(data, page_content_column='text')
docs = loader.load()
len(docs)

15215

In [7]:
# Split text
splitter = RecursiveCharacterTextSplitter(chunk_size=2048, chunk_overlap=30)
chunked_docs = splitter.split_documents(docs)

In [4]:
# Embeddings
model_name = "BAAI/bge-base-en-v1.5"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    #model_kwargs=model_kwargs,
    #encode_kwargs=encode_kwargs,
    cache_folder = './hf'
)


In [None]:
db = FAISS.from_documents(chunked_docs, embeddings)
db.save_local("faiss_index")

In [5]:
db = FAISS.load_local("faiss_index", embeddings=embeddings, allow_dangerous_deserialization=True)
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 4})

In [6]:
from transformers import TextStreamer

In [7]:
model_name = "meta-llama/Llama-3.1-8B-Instruct"
config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=config, device_map="auto", token=os.environ['hf_token'])
tokenizer = AutoTokenizer.from_pretrained(model_name, token=os.environ['hf_token'])

streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

config.json:   0%|          | 0.00/855 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/55.4k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

In [8]:
from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline

In [9]:
pipe = transformers.pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=200, streamer=streamer)

llm = HuggingFacePipeline(model_id=model_name, pipeline=pipe)
llm = ChatHuggingFace(llm=llm)

Device set to use cuda:1


In [10]:
res = llm.invoke("hi how are you?")

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


I'm just a language model, so I don't have feelings or emotions like humans do, but I'm functioning properly and ready to assist you with any questions or tasks you have. How can I help you today?


In [44]:
from langchain import hub
prompt = hub.pull("rlm/rag-prompt")



In [11]:
from langchain.prompts import PromptTemplate
from transformers import pipeline, TextStreamer
from langchain_core.output_parsers import StrOutputParser

prompt_template = """
<|system|>
Answer the question based on the following context:

{context}

</s>
<|user|>
{question}
</s>
<|assistant|>

 """

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

llm_chain = prompt | llm | StrOutputParser()


In [12]:
from langchain_core.runnables import RunnablePassthrough

rag_chain = {"context": retriever, "question": RunnablePassthrough()} | llm_chain

In [16]:
question = "Who invented the Chain of Thought method?"
res = rag_chain.invoke(question)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


The Chain of Thought (CoT) method was introduced by researchers at the University of California, Berkeley, and has since been developed and improved by various researchers.


In [15]:
question = "What are Task Oriented Dialoge systems?"
res = rag_chain.invoke(question)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Task-oriented dialogue systems are designed to assist users in achieving specific goals or tasks through conversations. These systems aim to understand the user's intent, provide relevant information, and guide the user through the task completion process. They are often used in various domains such as customer service, education, and healthcare.

In a task-oriented dialogue system, the user typically initiates a conversation by stating their goal or task, and the system responds with a series of questions or prompts to clarify the user's intent and gather necessary information. The system then uses this information to provide relevant guidance, instructions, or answers to help the user complete the task.

Task-oriented dialogue systems can be categorized into two main types:

1. **Goal-oriented dialogue systems**: These systems are designed to achieve a specific goal or task, such as booking a flight, making a reservation, or answering a question.
2. **Procedural dialogue systems**: T

In [19]:
from langchain.chains import RetrievalQA
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="refine", retriever=retriever, return_source_documents=True)

In [21]:
question = "What is Chain of Thought?"
result = qa.invoke({"query": question})

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Chain-of-Thought (CoT) is an approach to artificial intelligence, specifically in the context of Large Language Models (LLMs), that encourages the model to articulate a step-by-step reasoning process leading to the final answer. This approach is supported by in-context demonstrations, which means that the model is provided with a prompt or context that guides its reasoning and helps it to arrive at a solution.

In essence, CoT is a way of training LLMs to think like humans, by having them explain their thought process and reasoning behind their answers. This is in contrast to more traditional approaches that simply provide a final answer without explaining the reasoning behind it.

CoT is often used in conjunction with other approaches, such as Logic-LM, Tree-of-Thought (ToT), Cumulative Reasoning (CR), and DetermLR, to improve the performance and efficiency of the model.


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Chain-of-Thought (CoT) is an approach to artificial intelligence that encourages a Large Language Model (LLM) to articulate a step-by-step reasoning process leading to the final answer. This approach is supported by in-context demonstrations, where the model is guided by a prompt or context to arrive at a solution.

CoT is a method of reasoning that breaks down complex problems into manageable steps, creating a chain of thoughts that links these steps together, ensuring that no important conditions are overlooked. This approach provides an observable reasoning process, allowing users to understand the model's decision-making trajectory and increasing the trustworthiness and interpretability of the final answer.

The benefits of CoT prompting have led to its widespread attention in both academia and industry, evolving into a distinct research branch within the field of prompt engineering. It is also a crucial component in the landscape of AI autonomous agents. However, existing studies 

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Chain-of-Thought (CoT) is a linear problem-solving approach that involves a step-by-step reasoning process, where each step builds upon the previous one. This approach has been applied to multi-step reasoning tasks, and various methods have been proposed to automate CoT, such as Auto-CoT, which constructs demonstrations by sampling diverse questions and generating reasoning chains.

CoT has been combined with other techniques, including:

1. **PS Prompt**: breaks tasks into subtasks
2. **ToT**: expands on the reasoning process by considering multiple paths of reasoning and self-evaluating choices
3. **effective GoT**: frames thoughts as graphs
4. **Natural Program**: improves deductive reasoning tasks
5. **Re-reading prompt**: revisits the question information embedded within input prompts

Additionally, multi-agent discussion among LLMs has been explored as a method to improve reasoning abilities. This involves multiple LLMs discussing and reasoning given problems in an interactive wa

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Chain-of-Thought (CoT) is an approach to multi-hop question answering (QA) that involves breaking down questions into a sequence of reasoning steps before arriving at a final answer. However, traditional CoT approaches discard the intermediate steps and aggregate the final answers through a voting mechanism, which can lead to several shortcomings.

To address these limitations, we introduce Multi-Chain Reasoning (MCR), an approach that prompts large language models to meta-reason over multiple chains of thought, rather than aggregating their answers. MCR examines different reasoning chains, mixes information between them, and selects the most relevant facts to generate an explanation and predict the answer.

MCR outperforms strong baselines on 7 multi-hop QA datasets, and our analysis reveals that MCR explanations exhibit high quality, enabling humans to verify its answers.


In [26]:
for doc in result['source_documents']:
    print(doc.metadata['titel'])

Faithful Logical Reasoning via Symbolic Chain-of-Thought
Navigate through Enigmatic Labyrinth A Survey of Chain of Thought Reasoning Advances, Frontiers and Future
Rethinking the Bounds of LLM Reasoning Are Multi-Agent Discussions the Key
Answering Questions by Meta-Reasoning over Multiple Chains of Thought


In [None]:
from dotenv import load_dotenv

load_dotenv()
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


from typing import Any, Dict, List

from langchain import hub
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.history_aware_retriever import create_history_aware_retriever
from langchain.chains.retrieval import create_retrieval_chain
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore

from consts import INDEX_NAME


def run_llm(query: str, chat_history: List[Dict[str, Any]] = []):
    embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
    docsearch = PineconeVectorStore(index_name=INDEX_NAME, embedding=embeddings)
    chat = ChatOpenAI(verbose=True, temperature=0)

    rephrase_prompt = hub.pull("langchain-ai/chat-langchain-rephrase")

    retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat")
    stuff_documents_chain = create_stuff_documents_chain(chat, retrieval_qa_chat_prompt)

    history_aware_retriever = create_history_aware_retriever(
        llm=chat, retriever=docsearch.as_retriever(), prompt=rephrase_prompt
    )
    qa = create_retrieval_chain(
        retriever=history_aware_retriever, combine_docs_chain=stuff_documents_chain
    )

    result = qa.invoke(input={"input": query, "chat_history": chat_history})
    return result


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


def run_llm2(query: str, chat_history: List[Dict[str, Any]] = []):
    embeddings = OpenAIEmbeddings()
    docsearch = PineconeVectorStore(index_name=INDEX_NAME, embedding=embeddings)
    chat = ChatOpenAI(model_name="gpt-4o", verbose=True, temperature=0)

    rephrase_prompt = hub.pull("langchain-ai/chat-langchain-rephrase")

    retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat")

    rag_chain = (
        {
            "context": docsearch.as_retriever() | format_docs,
            "input": RunnablePassthrough(),
        }
        | retrieval_qa_chat_prompt
        | chat
        | StrOutputParser()
    )

    retrieve_docs_chain = (lambda x: x["input"]) | docsearch.as_retriever()

    chain = RunnablePassthrough.assign(context=retrieve_docs_chain).assign(
        answer=rag_chain
    )

    result = chain.invoke({"input": query, "chat_history": chat_history})
    return result