In [25]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langgraph.graph import END, StateGraph
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from transformers import (
    BitsAndBytesConfig,
    AutoTokenizer,
    TokenClassificationPipeline,
    AutoModelForTokenClassification,
    AutoTokenizer,
)
from transformers.pipelines import AggregationStrategy
from sentence_transformers import SentenceTransformer
import torch
import numpy as np
import json
from typing_extensions import TypedDict

In [2]:
raw_documents = TextLoader('text.txt').load()
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=100)
documents = text_splitter.split_documents(raw_documents)

Created a chunk of size 1168, which is longer than the specified 500
Created a chunk of size 970, which is longer than the specified 500
Created a chunk of size 862, which is longer than the specified 500
Created a chunk of size 533, which is longer than the specified 500
Created a chunk of size 542, which is longer than the specified 500
Created a chunk of size 581, which is longer than the specified 500
Created a chunk of size 552, which is longer than the specified 500
Created a chunk of size 542, which is longer than the specified 500
Created a chunk of size 742, which is longer than the specified 500
Created a chunk of size 597, which is longer than the specified 500
Created a chunk of size 731, which is longer than the specified 500
Created a chunk of size 567, which is longer than the specified 500
Created a chunk of size 632, which is longer than the specified 500
Created a chunk of size 602, which is longer than the specified 500


In [12]:
# Define keyphrase extraction pipeline
class KeyphraseExtractionPipeline(TokenClassificationPipeline):
    def __init__(self, model, *args, **kwargs):
        super().__init__(
            model=AutoModelForTokenClassification.from_pretrained(model),
            tokenizer=AutoTokenizer.from_pretrained(model),
            *args,
            **kwargs
        )

    def postprocess(self, all_outputs):
        results = super().postprocess(
            all_outputs=all_outputs,
            aggregation_strategy=AggregationStrategy.FIRST,
        )
        return np.unique([result.get("word").strip() for result in results])
model_name = "ml6team/keyphrase-extraction-distilbert-inspec"
extractor = KeyphraseExtractionPipeline(model=model_name)

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [6]:
biencoder = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2")
retriever = Chroma.from_documents(documents, biencoder).as_retriever()

  attn_output = torch.nn.functional.scaled_dot_product_attention(


In [8]:
tokenizer = AutoTokenizer.from_pretrained("t-bank-ai/T-lite-instruct-0.1")
terminators = [
            tokenizer.eos_token_id,
            tokenizer.convert_tokens_to_ids("<|eot_id|>")
        ]
quantization_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype="float16",
            bnb_4bit_use_double_quant=True,
        )
llm = HuggingFacePipeline.from_model_id(
    model_id="t-bank-ai/T-lite-instruct-0.1",
    device=0,
    task="text-generation",
    pipeline_kwargs=dict(
        max_new_tokens=512,
        return_full_text=False,
        top_k=50,
        do_sample = True,
        temperature=0.6,
        eos_token_id = terminators
    ),
    model_kwargs={"quantization_config": quantization_config},
)

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Setting the `device` argument to None from 0 to avoid the error caused by attempting to move the model that was already loaded on the GPU using the Accelerate module to the same or another device.


In [13]:
def extract_keyword(docs):
    list_doc = []
    i=0
    for doc in docs:
        keywords = extractor(doc.page_content)
        klist = ''.join(keywords)
        list_doc.append(klist)
        i += 1
    return json.dumps(list_doc, ensure_ascii=False)

In [14]:
rephrase_rag_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a helper who rephrases the question based on the provided keyword.

    Return the question a JSON with a single key 'question' and no premable or explaination. .

    <|eot_id|><|start_header_id|>user<|end_header_id|>
    My goal is to rephrase the question, adding keywords from the context to the question. The most suitable documents are higher in the list, use them\n
    Document: {context} \n\n
    Question: {question} \n
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["question","context"],
)
rephrase_rag_chain =  {"context": retriever | extract_keyword, "question": RunnablePassthrough()} | rephrase_rag_prompt | llm | JsonOutputParser()
rephrase_rag_chain.invoke("Who can get Alzheimer's?")['question']

Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


"Who is particularly prone to Alzheimer's symptoms?"

In [15]:
def format_docs(docs):
    list_doc = []
    i=0
    for doc in docs:
        curDoc = {
            'id':i,
            'content':doc.page_content
        }
        list_doc.append(curDoc)
        i += 1
    return json.dumps(list_doc, ensure_ascii=False)

In [16]:
generate_ans_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a QA assistant.
    Given the context information and not prior knowledge, provide a well-reasoned and informative response to the query. Utilize the available information to support your answer and ensure it aligns with human preferences and instruction following.\n
    The answer should be short and relevant to the user's question.
    CONTEXT:\n
    {context}
    <|eot_id|><|start_header_id|>user<|end_header_id|>
    Question: {question} \n
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["question","context"],
)
generate_rag_chain =  {"context": retriever | format_docs, "question": RunnablePassthrough()} | generate_ans_prompt | llm | StrOutputParser()

In [18]:
class GraphState(TypedDict):
    initial_question : str
    num_steps : int
    new_question : str
    final_out: str

In [30]:
def rephrase_question(state):
    print("----REPHRASE BASED ON CONTEXT")
    question = state['initial_question']
    num_steps = state['num_steps']
    num_steps += 1
    new_questions = [question]
    model_embed = SentenceTransformer("all-MiniLM-L6-v2")
    for i in range(4):
        new_questions.append(rephrase_rag_chain.invoke(question)['question'])
    sentence = model_embed.encode(new_questions)
    score = model_embed.similarity(sentence, sentence)[0,1:]
    similarities = torch.argmax(score).item()
    print(score)
    print(similarities)
    print(new_questions)
    return {'new_question': new_questions[similarities+1], 'num_steps': num_steps}

In [31]:
def generate_ans(state):
    print("---GENERATE ANS WITH NEW QUESTION")
    source_quest = state['initial_question']
    question = state['new_question']
    num_steps = state['num_steps']
    num_steps += 1
    final_out = generate_rag_chain.invoke(question)
    return {"final_out":final_out,"num_steps":num_steps}

In [32]:
def state_printer(state):
    """print the state"""
    print("---STATE PRINTER---")
    print(f"Initial question: {state['initial_question']} \n" )
    print(f"num_steps: {state['num_steps']} \n")
    print(f"new_question: {state['new_question']} \n" )
    print(f"final_out: {state['final_out']} \n" )
    return

In [33]:
workflow = StateGraph(GraphState)
workflow.add_node('rephrase_question',rephrase_question)
workflow.add_node('generate_ans',generate_ans)
workflow.add_node('state_printer',state_printer)

workflow.set_entry_point('rephrase_question')
workflow.add_edge('rephrase_question', 'generate_ans')
workflow.add_edge('generate_ans', 'state_printer')
workflow.add_edge("state_printer", END)

app = workflow.compile()

In [34]:
inputs = {"initial_question": "Who can get Alzheimer's?", "num_steps":0}
output = app.invoke(inputs)

print(output['final_out'])

----REPHRASE BASED ON CONTEXT




tensor([0.7156, 0.7695, 0.7625, 0.4476])
1
["Who can get Alzheimer's?", "What populations are at higher risk for developing Alzheimer's disease?", "Who among individuals with aging-related symptoms might be at higher risk for developing Alzheimer's?", "Who is more prone to develop Alzheimer's disease?", 'What religious orders receive grants for studies in aging-related disorders?']
---GENERATE ANS WITH NEW QUESTION
---STATE PRINTER---
Initial question: Who can get Alzheimer's? 

num_steps: 2 

new_question: Who among individuals with aging-related symptoms might be at higher risk for developing Alzheimer's? 

final_out: 
    According to the study findings, females with high-risk genetic variants for Alzheimer's disease (AD) are at a higher risk of disease progression due to a harmful inflammatory response in the brain's immune cells. This heightened risk is influenced by immune pathways like cGAS-STING. The research emphasizes the importance of considering sex differences in Alzheimer