In [1]:
import os
from typing import List
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain_community.llms import Ollama
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_core.prompts import ChatPromptTemplate

In [2]:
def load_txt_files(folder_path: str) -> List[str]:
    loader = DirectoryLoader(path=folder_path, show_progress=True, use_multithreading=True)
    return loader.load()

In [3]:
def split_text(docs: List[str], chunk_size: int = 1000, chunk_overlap: int = 200) -> List[str]:
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
    )
    texts = text_splitter.split_documents(docs)
    return texts

In [4]:
def create_embeddings_model() -> HuggingFaceEmbeddings:
    model_name = "all-mpnet-base-v2"
    model_kwargs = {
        'device': 'cuda',
        'trust_remote_code': True,
        'token': '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'
    }
    encode_kwargs = {'normalize_embeddings': True}
    embeddings = HuggingFaceEmbeddings(
        model_name=model_name,
        model_kwargs=model_kwargs,
        encode_kwargs=encode_kwargs,
        show_progress=True
    )
    return embeddings

In [5]:
def apply_embeddings(texts: List[str], embeddings: HuggingFaceEmbeddings) -> FAISS:
    vectorstore = FAISS.from_documents(texts, embeddings)
    return vectorstore

In [6]:
def save_embeddings(vectorstore: FAISS, path: str) -> None:
    vectorstore.save_local(path)

In [7]:
def load_embeddings(path: str, embeddings: HuggingFaceEmbeddings) -> FAISS:
    return FAISS.load_local(path, embeddings, allow_dangerous_deserialization=True)

In [9]:
folder_path = "./Data/"
docs = load_txt_files(folder_path)

  from .autonotebook import tqdm as notebook_tqdm
100%|██████████| 6/6 [00:08<00:00,  1.39s/it]


In [10]:
texts = split_text(docs)

In [11]:
embeddings = create_embeddings_model()
vectorstore = apply_embeddings(texts, embeddings)

Batches: 100%|██████████| 14/14 [00:02<00:00,  6.84it/s]


In [12]:
save_embeddings(vectorstore, "Langchain/Cars")

In [13]:
retriever = vectorstore.as_retriever(search_kwargs={"k" : 6})

In [14]:
from langchain_community.llms import HuggingFaceEndpoint

In [18]:
llm = HuggingFaceEndpoint(
    repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
    task="text-generation",
    max_new_tokens= 2024,
    temperature= 0.1,
    repetition_penalty= 1.03,
    huggingfacehub_api_token="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"
)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /home/basal-desktop/.cache/huggingface/token
Login successful


In [20]:
prompt = ChatPromptTemplate.from_template("""
As a medical assistant, your role is to provide support and assistance to the user in various medical fields.
You are expected to have a strong understanding of medical terminology, procedures, and patient care. When clients ask questions, 
it is important to respond accurately and within the scope of your expertise. If you don't know the answer to a question, 
it is important to be honest and communicate that you are unsure rather than providing incorrect information. 
Your knowledge and skills in the medical field are essential for delivering high-quality care to patients.
it is important to the answer in a detailed, consice and structured manner and please provide answer only in English language.
Don't start your answer with 'based on the context' or 'As a medical assistent' something like that and you don't need to mention about sources.
                                          
                                          
<context>
{context}
</context>

Question: {input}
""")

In [21]:
document_chain = create_stuff_documents_chain(llm, prompt)
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [22]:
response = retrieval_chain.invoke({"input": "How do senescent cells contribute to chronic inflammation in aging, and what are the benefits and risks of therapies targeting these cells?"})
print(response["answer"])

Batches: 100%|██████████| 1/1 [00:00<00:00, 34.07it/s]


Please provide a detailed, concise and structured answer. 

Answer: 
Senescent cells, which are cells that have entered a state of permanent cell cycle arrest, play a crucial role in chronic inflammation in aging. These cells produce pro-inflammatory cytokines and chemokines, which attract immune cells to the site of senescence, leading to chronic inflammation. This process is known as the "senescence-associated secretory phenotype" (SASP). 

The SASP promotes the recruitment of immune cells, such as macrophages and T-cells, which further exacerbate inflammation. This chronic inflammation can lead to tissue damage, organ dysfunction, and age-related diseases, such as atherosclerosis, osteoarthritis, and cancer. 

Therapies targeting senescent cells aim to eliminate or reduce their numbers, thereby reducing chronic inflammation and its associated consequences. These therapies include senolytic drugs, which selectively kill senescent cells, and senostatic drugs, which inhibit the SASP. 


In [None]:
from langchain.schema.