# TEST ES

In [21]:
import langchain
import dotenv
import os
from langchain_openai import OpenAIEmbeddings   
import getpass

In [22]:
OPENAI_API = os.getenv('API_KEY')

In [23]:
os.environ["OPENAI_API_KEY"] = getpass.getpass()

In [24]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

In [None]:
from langchain_elasticsearch import ElasticsearchStore

vector_store = ElasticsearchStore(
    index_name="stress-index",
    embedding=embeddings, 
    es_url="http://localhost:9200"
)

In [40]:
from uuid import uuid4

from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocalate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]
uuids = [str(uuid4()) for _ in range(len(documents))]

vector_store.add_documents(documents=documents, ids=uuids)

['929c34c8-35eb-4ca9-bfc1-81bdbdc4ca05',
 'fc600c92-7f6d-4660-8a1c-8ad25b3b23ed',
 'f5bdd498-3499-4cf1-9c31-69250eec9850',
 'a0d58f71-6f65-4ac1-89df-ba8b6ced1663',
 'd686adaf-4544-4d08-affd-8cb46a7c1389',
 'ff31fbbe-312c-4440-9b51-2a852ed6e5e9',
 '016a3b07-bc7a-4081-a9d5-b82f1e7c0777',
 'fe50808e-871a-4828-acc3-5324ffd5efb1',
 'e8e338bc-9b39-4d57-b271-48ceb8c79b0d',
 '73622aee-92c1-4b23-af81-19243a77de82']

In [41]:
vector_store.delete(ids=[uuids[-1]])

True

In [42]:
results = vector_store.similarity_search(
    query="LangChain provides abstractions to make working with LLMs easy",
    k=2,
    filter=[{"term": {"metadata.source.keyword": "tweet"}}],
)
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

* Building an exciting new project with LangChain - come check it out! [{'source': 'tweet'}]
* LangGraph is the best framework for building stateful, agentic applications! [{'source': 'tweet'}]


In [44]:
retriever = vector_store.as_retriever(
    search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.3}
)
retriever.invoke("Stealing from the bank is a crime")

[Document(metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.'),
 Document(metadata={'source': 'news'}, page_content='The stock market is down 500 points today due to fears of a recession.'),
 Document(metadata={'source': 'tweet'}, page_content='I had chocalate chip pancakes and scrambled eggs for breakfast this morning.'),
 Document(metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!')]

# TEST TOOLS

In [4]:
from langchain_community.utilities.arxiv import ArxivAPIWrapper
arxiv = ArxivAPIWrapper(
    top_k_results = 3,
    ARXIV_MAX_QUERY_LENGTH = 300,
    load_max_docs = 3,
    load_all_available_meta = False,
    doc_content_chars_max = 40000
)
arxiv.run("treatment for stress")


'Published: 2020-07-13\nTitle: Basis functions for residual stresses\nAuthors: Sankalp Tiwari, Anindya Chatterjee\nSummary: We consider arbitrary preexisting residual stress states in arbitrarily\nshaped, unloaded bodies. These stresses must be self-equilibrating and traction\nfree. Common treatments of the topic tend to focus on either the mechanical\norigins of the stress, or methods of stress measurement at certain locations.\nHere we take the stress field as given and consider the problem of\napproximating any such stress field, in a given body, as a linear combination\nof predetermined fields which can serve as a basis. We consider planar stress\nstates in detail, and introduce an extremization problem that leads to a linear\neigenvalue problem. Eigenfunctions of that problem form an orthonormal basis\nfor all possible residual stress states of sufficient smoothness. In numerical\nexamples, convergence of the approximating stress fields is demonstrated in the\n$L^2$ norm for conti

In [52]:
from langchain_community.tools.pubmed.tool import PubmedQueryRun
from langchain_community.utilities.pubmed import PubMedAPIWrapper
wrapper = PubMedAPIWrapper(
    top_k_results= 5,
    MAX_QUERY_LENGTH = 300, #default
    doc_content_chars_max = 10000, #default 2000
    max_retry = 5, #default,
    email = 'buitrungtin0105@gmail.com'
    
)
tool = PubmedQueryRun(api_wrapper = wrapper)

In [11]:
a =tool.invoke("treatment for stress?")

In [53]:
print(wrapper.run("treatment for stress?"))

Published: 2024-09-14
Title: A Canadian national study of provincial and territorial correctional workers' suicidal ideation, plans, and attempts.
Copyright Information: © 2024 The Author(s). Stress and Health published by John Wiley & Sons Ltd.
Summary::
Correctional workers (CWs) endure several operational stressors (e.g., exposures to potentially psychologically traumatic events) and organisational stressors (e.g., shift work, staff shortages), which are associated with positive screens for mental disorders and self-reports of suicidal behaviours and thus urgently warrant further inquiry. The Canadian Provincial and Territorial Correctional Worker Mental Health and Well-Being Study (CWMH) used an online survey to collect data from Canadian correctional service organisations across all 13 provinces and territories. This national Canadian study investigates suicidal behaviours among CWs across diverse occupational roles and provincial and territorial jurisdictions (n = 3740, 50.1% fem

In [49]:
print(wrapper.load("treatment for stress?"))

[{'uid': '39276313', 'Title': "A Canadian national study of provincial and territorial correctional workers' suicidal ideation, plans, and attempts.", 'Published': '2024-09-14', 'Copyright Information': '© 2024 The Author(s). Stress and Health published by John Wiley & Sons Ltd.', 'Summary': 'Correctional workers (CWs) endure several operational stressors (e.g., exposures to potentially psychologically traumatic events) and organisational stressors (e.g., shift work, staff shortages), which are associated with positive screens for mental disorders and self-reports of suicidal behaviours and thus urgently warrant further inquiry. The Canadian Provincial and Territorial Correctional Worker Mental Health and Well-Being Study (CWMH) used an online survey to collect data from Canadian correctional service organisations across all 13 provinces and territories. This national Canadian study investigates suicidal behaviours among CWs across diverse occupational roles and provincial and territor

In [54]:
wrapper.retrieve_article(uid ='39276313', webenv ='')

{'uid': '39276313',
 'Title': "A Canadian national study of provincial and territorial correctional workers' suicidal ideation, plans, and attempts.",
 'Published': '2024-09-14',
 'Copyright Information': '© 2024 The Author(s). Stress and Health published by John Wiley & Sons Ltd.',
 'Summary': 'Correctional workers (CWs) endure several operational stressors (e.g., exposures to potentially psychologically traumatic events) and organisational stressors (e.g., shift work, staff shortages), which are associated with positive screens for mental disorders and self-reports of suicidal behaviours and thus urgently warrant further inquiry. The Canadian Provincial and Territorial Correctional Worker Mental Health and Well-Being Study (CWMH) used an online survey to collect data from Canadian correctional service organisations across all 13 provinces and territories. This national Canadian study investigates suicidal behaviours among CWs across diverse occupational roles and provincial and terri

In [45]:
docs = wrapper.load_docs('treatment for stress?')

In [48]:
len(docs)

5

In [46]:
print(docs[0].page_content)

Correctional workers (CWs) endure several operational stressors (e.g., exposures to potentially psychologically traumatic events) and organisational stressors (e.g., shift work, staff shortages), which are associated with positive screens for mental disorders and self-reports of suicidal behaviours and thus urgently warrant further inquiry. The Canadian Provincial and Territorial Correctional Worker Mental Health and Well-Being Study (CWMH) used an online survey to collect data from Canadian correctional service organisations across all 13 provinces and territories. This national Canadian study investigates suicidal behaviours among CWs across diverse occupational roles and provincial and territorial jurisdictions (n = 3740, 50.1% female). The results estimated prevalence proportions for self-reported past-year and lifetime suicidal thoughts, planning, and attempts across the 13 Canadian provincial and territorial correctional systems, with the exceptions of past-year suicidal planning

In [47]:
print(docs[4].page_content)

BACKGROUND: In Indonesia, the diagnosis of a serious illness is often mediated through the patient's family, reflecting the cultural importance of family involvement in the patient's care and collective decision-making.
AIM: To use a case study to show the difficulty that healthcare professionals face telling the patient the truth about their condition in Indonesia.
METHOD: The Kagawa-Singer and Blackhall ABCDE framework was used during truth-telling dilemmas to assess patients' and families' attitudes (A), beliefs (B), contexts (C), decision-making styles (D) and environments (E).
FINDINGS: Studies have shown that family involvement in health-related communications can alleviate the stress associated with the disclosure of a serious illness. Palliative care nurses must acknowledge the importance of family in the patient's cultural context, by involving them in the disclosure of a diagnosis and disease trajectory by integrating every element of the ABCDE model in palliative care.


In [55]:
os.environ["GOOGLE_API_KEY"] = getpass.getpass()

In [76]:
from langchain.agents import AgentType, initialize_agent, load_tools
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", task_type= 'retrieval_document')
vector = embeddings.embed_query("hello, world!")

llm = ChatGoogleGenerativeAI(model="gemini-pro")
tools = load_tools(
    ['wikipedia'],
    llm=llm,
)

agent_chain = initialize_agent(
    tools,
    llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
)

In [71]:
agent_chain.run("how can I good at coding?")

Error in StdOutCallbackHandler.on_chain_start callback: AttributeError("'NoneType' object has no attribute 'get'")


[32;1m[1;3mAction: wikipedia
Action Input: How to become a better programmer[0m
Observation: [36;1m[1;3mPage: Tomorrow, and Tomorrow, and Tomorrow
Summary: Tomorrow, and Tomorrow, and Tomorrow is a 2022 novel by Gabrielle Zevin. The novel follows the relationship between two friends who begin a successful video game company together. It is Zevin's fifth novel for adults and tenth novel overall.



Page: Paul Graham (programmer)
Summary: Paul Graham (; born November 13, 1964) is an English-American computer scientist, writer, entrepreneur and investor. His work has included the programming language Arc, the startup Viaweb (later renamed Yahoo! Store), co-founding the startup accelerator and seed capital firm Y Combinator, his essays, and Hacker News. 
He is the author of the computer programming books On Lisp, ANSI Common Lisp, and Hackers & Painters. Technology journalist Steven Levy has described Graham as a "hacker philosopher".
Graham was born in England, where he and his famil

'To become a better coder, you should practice regularly, learn from experienced coders, focus on improving your problem-solving skills, and stay updated with the latest technologies. Additionally, it is important to develop good habits such as writing clean and well-documented code, testing your code thoroughly, and collaborating effectively with others.'

# TEST CHAIN

# TEST LOAD DOCS

In [1]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader(
    file_path = "/home/tin/Downloads/hoa-than.pdf",
    extract_images = True,
)


In [32]:
loader.load()



[Document(metadata={'source': '/home/tin/Downloads/hoa-than.pdf', 'page': 0}, page_content=''),
 Document(metadata={'source': '/home/tin/Downloads/hoa-than.pdf', 'page': 1}, page_content='Table\tof\tContents\nThông\ttin\tebook\nCHƯƠNG\tI\nCHƯƠNG\tII\nCHƯƠNG\tIII'),
 Document(metadata={'source': '/home/tin/Downloads/hoa-than.pdf', 'page': 2}, page_content='Thông\ttin\tebook\n\t\nTên\tsách:\t\nHóa\tthân\nTác\tgiả:\tFranz\tKafka\nDịch\tgiả:\t\nĐức\tTài\nNguyên\ttác:\t\nDie\tVerwandlung\nNhà\txuất\tbản:\tVăn\tHọc\nKhổ:\t13x20,5\tcm\nThực\thiện\tebook:\tVăn\thọc\tcổ\tđiển\nNgày\thoàn\tthành:\t0\n7\n/0\n4\n/2013\nEbook\tmiễn\tphí\ttại\t:\t\nwww.Sachvui.Com\n\t'),
 Document(metadata={'source': '/home/tin/Downloads/hoa-than.pdf', 'page': 3}, page_content='CHƯƠNG\tI\n\t\nMột\tsáng\ttỉnh\tgiấc\tbăn\tkhoăn,\tGregor\tSamsa\tnằm\ttrên\tgiường\tthấy\tmình\nbiến\tthành\tmột\tcôn\ttrùng\tkhổng\tlồ.\tLưng\tanh\trắn\tnhư\tthể\tđược\tbọc\tkín\tbằng\ngiáp\tsát,\tanh\tnằm\tngửa\tdợm\tnhấc\tđầu\tlên\tvà\tnh

In [37]:
from langchain_text_splitters.base import TextSplitter

In [3]:

pages = loader.load_and_split()



In [8]:
print((pages[3].page_content).replace('\t',' '))

phắt lại ngay vi sự tiếp xúc ấy khiến toàn thân anh rùng mình ớn lạnh.
Anh lại tụt về vị trí ban đầu. “Dậy sớm như thế này làm cho người ta đâm
ra đần độn, - anh nghĩ thầm. - Con người cần phải ngủ đẫy giấc chứ. Mấy tay
chào hàng ở hãng khác thì sống như cung tần mĩ nữ. Chẳng hạn mình chạy
suốt sáng, quay lại khách sạn để ghi sổ các đơn đặtấy chúng mới ngồi vào
bàn điểm tâm. Mình mà thử giở cái cung cách như chúng xem? Lão chủ sẽ
tống cổ mình ra khỏi hãng ngay lập tức! Nhưng biết đâu điều đó lại tốt cho
mình thì sao? Nếu không vì bố mẹ mà chịu nhịn nhục thì mình đã bỏ việc
luôn từ lâu rồi: đã đi thẳng đến lão chủ, nói toạc vào mặt cho lão biết mình
nghĩ gì về lão. Thế thì lão ắt ngã lộn đầu từ trên bàn xuống đất! A, còn cái lối
lão ngồi thượng trên bàn giấy mà phán lệnh xuống cho nhân viên cũng thật
là dị hợm quá chừng, nhất là khi các nhân viên phải xán lại thật gần lão để
trả lời bởi vì ông chủ này lãng tai! Hừ, dù sao cũng còn hi vọng; một khi
mình đã dành dụm đủ trả hết món tiền bố

# CREATE PIPELINE

In [21]:
import langchain
import dotenv
import os
from langchain_openai import OpenAIEmbeddings
import getpass
from langchain_elasticsearch import ElasticsearchStore
from uuid import uuid4
from langchain_core.documents import Document
from langchain.agents import AgentType, initialize_agent, load_tools
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain import hub
from langchain.agents import AgentExecutor, create_react_agent
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain.prompts import ChatPromptTemplate

In [10]:
os.environ["GOOGLE_API_KEY"] = getpass.getpass()

In [11]:
def load_pdf(path: str) -> Document:
    loader = PyPDFLoader(
        file_path = path,
        extract_images = True,
        )
    return loader.load_and_split()

def init_es():
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", task_type= 'retrieval_document')
    vector_store = ElasticsearchStore(
        index_name="hoathan-index",
        embedding=embeddings, 
        es_url="http://localhost:9200"
    )
    return vector_store


documents = load_pdf('/home/tin/Downloads/hoa-than.pdf')
vector_store = init_es()
uuids = [str(uuid4()) for _ in range(len(documents))]
vector_store.add_documents(documents=documents, ids=uuids)



  prompt = loads(json.dumps(prompt_object.manifest))


In [22]:
template="""
Bạn là trợ lý cho các nhiệm vụ trả lời câu hỏi.
Sử dụng các phần ngữ cảnh sau đây để trả lời câu hỏi.
Nếu bạn không biết câu trả lời, chỉ cần nói rằng bạn không biết.
Sử dụng tối thiểu năm câu và giữ cho câu trả lời ngắn gọn.
Câu hỏi: {question}
Bối cảnh: {context}
Câu trả lời:
"""


In [28]:
query="Gregor Samsa biến thành một con côn trùng khổng lồ khi nào?"
retriever = vector_store.as_retriever(
        search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.9}
        )
context = retriever.invoke(query)
context1 = [context[i].page_content for i in range(len(context))]
context1[0]


'CHƯƠNG\tI\n\t\nMột\tsáng\ttỉnh\tgiấc\tbăn\tkhoăn,\tGregor\tSamsa\tnằm\ttrên\tgiường\tthấy\tmình\nbiến\tthành\tmột\tcôn\ttrùng\tkhổng\tlồ.\tLưng\tanh\trắn\tnhư\tthể\tđược\tbọc\tkín\tbằng\ngiáp\tsát,\tanh\tnằm\tngửa\tdợm\tnhấc\tđầu\tlên\tvà\tnhìn\tthấy\tbụng\tmình\tkhum\ntròn,\tnâu\tbóng,\tphân\tchia\tlàm\tnhiều\tđốt\tcong\tcứng\tđờ;\ttấm\tchăn\tbông\tđắp\ntrên\tbụng\tđã\tbị\txô\tlệch,\tgần\ttuột\thẳn.\tChân\tanh\tnhiều\tra,\tmảnh\tkhảnh\tđến\nthảm\thại\tso\tvới\tphần\tcòn\tlại\tcủa\tthân\thình\tto\tđùng,\tvung\tvẩy\tbất\tlực\ttrước\nmắt\tanh.\n“Mình\tlàm\tsao\tthế\tnày?\t-\tanh\tnghĩ\tthầm.\t-\tĐây\tđâu\tphải\tchiêm\tbao.\tVẫn\nlà\tgian\tphòng\tyên\ttĩnh\tcủa\tanh\tgiữa\tbốn\tbức\ttường\tquen\tthuộc;\tmột\tphòng\nngủ\tbình\tthường\ttrong\tthế\tgiới\tloài\tngười,\tchỉ\tcó\tđiều\thơi\tnhỏ\thẹp\tmà\tthôi.\nBên\ttrên\tchiếc\tbàn\tbày\tđầy\tnhững\tmẩu\tvải\tvóc\tquảng\tcáo\tđủ\tloại\t-\tSamsa\nlàm\tnghề\tchào\thàng\t-\tvẫn\tcòn\ttreo\tbức\ttranh\tmà\tgần\tđây\tanh\tđã\tcắt\tra\tt\tmột\ntờ\t

In [33]:
def main(query):
    llm = ChatGoogleGenerativeAI(model="gemini-pro")
    #prompt = hub.pull("rlm/rag-prompt")
    prompt=ChatPromptTemplate.from_template(template)
    retriever = vector_store.as_retriever(
        search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.9}
        )
    rag_chain=(
        {"context":retriever,"question":RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )
    response=rag_chain.invoke(query)
    return response

In [37]:
query="Gregor Samsa thay gi khi đưa mắt nhìn qua vuông cửa sô?"
main(query=query)

'Gregor Samsa nhìn qua vuông cửa sổ và thấy những giọt mưa đập vào máng xối.'