In [135]:
from langchain_groq import ChatGroq
import os
from dotenv import load_dotenv

load_dotenv()

llm = ChatGroq(groq_api_key = os.getenv('groq_api'), model_name="Gemma2-9b-It")

llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x000001FBC4D8E990>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000001FBC4D98810>, model_name='Gemma2-9b-It', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [29]:
from langchain_community.document_loaders import PyPDFLoader

file_path = (
    "../dataset/final_data/10.1159@000493088.pdf"
)

loader = PyPDFLoader(file_path)
pages = loader.load_and_split()

pages[0]

Document(metadata={'source': '../dataset/final_data/10.1159@000493088.pdf', 'page': 0}, page_content='Case Rep Oncol 2019;12:91 –97 \nDOI: 10.1159/000493088  \nPublished online: January 21, 2019  © 2019 The Author(s)  \nPublished by S. Karger AG, Basel  \nwww.karger.com/cro  \nThis article is licensed under the Creative Commons Attribution -NonCommercial 4.0 \nInternational License (CC BY -NC) (http://www.karger.com/Services/OpenAccessLicense). \nUsage and distribution for commercial purposes requires written permission.  \n \n \n           \n  Nobuhiko Seki, MD, PhD  \nDivision of Medical Oncology, Department of Internal Medicine  \nTeikyo University School of Medicine, 2 –11–1, Kaga  \nTokyo 173 –8606 (Japan)  \nE-Mail nseki@med.teikyo -u.ac.jp  \n \n  \nCase Report  \n \nPromising Combination Therapy \nwith Bevacizumab and Erlotinib in \nan EGFR -Mutated NSCLC Patient \nwith MET Amplification Who \nShowed Intrinsic Resistance to Initial \nEGFR -TKI Therapy  \nNobuhiko  Seki    Maika

In [52]:
from langchain_experimental.graph_transformers import LLMGraphTransformer

llm_transformer = LLMGraphTransformer(llm=llm)

graph_docs = llm_transformer.convert_to_graph_documents(pages)

graph_docs

Failed to write data to connection ResolvedIPv4Address(('34.126.114.186', 7687)) (ResolvedIPv4Address(('34.126.114.186', 7687)))
Failed to write data to connection IPv4Address(('c43e8dee.databases.neo4j.io', 7687)) (ResolvedIPv4Address(('34.126.114.186', 7687)))


[GraphDocument(nodes=[Node(id='Nobuhiko Seki', type='Person', properties={}), Node(id='Maika Natsume', type='Person', properties={}), Node(id='Ryosuke Ochiai', type='Person', properties={}), Node(id='Terunobu Haruyama', type='Person', properties={}), Node(id='Masashi Ishihara', type='Person', properties={}), Node(id='Yoko Fukasawa', type='Person', properties={}), Node(id='Takahiko Sakamoto', type='Person', properties={}), Node(id='Shigeru Tanzawa', type='Person', properties={}), Node(id='Ryo Usui', type='Person', properties={}), Node(id='Takeshi Honda', type='Person', properties={}), Node(id='Shuji Ota', type='Person', properties={}), Node(id='Yasuko Ichikawa', type='Person', properties={}), Node(id='Kiyotaka Watanabe', type='Person', properties={}), Node(id='Division Of Medical Oncology, Department Of Internal Medicine, Teikyo University School Of Medicine', type='Organization', properties={}), Node(id='Egfr', type='Gene', properties={}), Node(id='Met', type='Gene', properties={}), No

In [43]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")



In [136]:
from langchain.graphs import Neo4jGraph

graph = Neo4jGraph()

graph.add_graph_documents(
    graph_docs,
    baseEntityLabel=True,
    include_source=True
)

In [137]:
from neo4j import GraphDatabase
from yfiles_jupyter_graphs import GraphWidget

default_cypher = "MATCH (s)-[r:!MENTIONS]->(t) RETURN s,r,t LIMIT 50"
def showGraph(cypher: str = default_cypher):
    driver = GraphDatabase.driver(
        uri = os.environ["NEO4J_URI"],
        auth = (os.environ["NEO4J_USERNAME"], os.environ["NEO4J_PASSWORD"])
    )
    session = driver.session()
    widget = GraphWidget(graph = session.run(cypher).graph())
    widget.node_label_mapping = 'id'
    return widget

showGraph()

GraphWidget(layout=Layout(height='800px', width='100%'))

In [138]:
from langchain_core.prompts.prompt import PromptTemplate
from langchain.chains import GraphCypherQAChain

CYPHER_GENERATION_TEMPLATE = """Task: Generate Cypher statement to query a graph database.
...
"""
CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"], template=CYPHER_GENERATION_TEMPLATE
)

CYPHER_QA_TEMPLATE = """You are an assistant that helps to form nice and human understandable answers.
...
"""
CYPHER_QA_PROMPT = PromptTemplate(
    input_variables=["context", "question"], template=CYPHER_QA_TEMPLATE
)

graph_chain = GraphCypherQAChain.from_llm(
    llm,
    graph = graph,
    cypher_prompt = CYPHER_GENERATION_PROMPT,
    qa_prompt=CYPHER_QA_PROMPT,
    verbose=True,
    allow_dangerous_requests = True
)

In [141]:
store = Neo4jVector.from_existing_graph(
    embeddings_model,
    search_type = 'hybrid',
    node_label = 'Document',
    text_node_properties=['text'],
    embedding_node_property='embedding'
)


In [142]:
def retriever(question: str):
    print(f"Search query: {question}")
    structured_data = graph_chain.run(question)
    unstructured_data = [el.page_content for el in store.similarity_search(question)]
    final_data = f"""Structured data:
{structured_data}
Unstructured data:
{"#Document ". join(unstructured_data)}
    """
    print(f"Final context: {final_data}")
    return final_data

In [143]:
from langchain_core.runnables import RunnablePassthrough
from langchain.prompts import ChatPromptTemplate
from langchain.output_parsers import StructuredOutputParser
from langchain.retrievers.multi_vector import MultiVectorRetriever


template = """Answer the question based only on the following context:

{context}

Question: {query}
"""

prompt = ChatPromptTemplate.from_template(template)

retrieval_chain = (
    retriever 
    | RunnablePassthrough() 
    | prompt 
    | StructuredOutputParser 
)

In [146]:
retrieval_chain.invoke("What affects John Doe ?")

Error in StdOutCallbackHandler.on_chain_start callback: AttributeError("'NoneType' object has no attribute 'get'")


Search query: What affects John Doe ?
Generated Cypher:
[32;1m[1;3mcypher
MATCH (p:Person {name: "Tom Hanks"})-[:ACTED_IN]->(m:Movie)
RETURN m.title
[0m




Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m




Final context: Structured data:
I understand! I'm here to help you craft clear, concise, and friendly responses. 

What can I help you with today? 😊  Do you have a question you need answered in a nice way, or a piece of text you want me to make more human-friendly? 

Unstructured data:

text: treatment due to his poor PS. Furthermore, although crizotinib was known as potential MET 
inhibitor as well as anaplastic lymphoma kinase (ALK) inhibitor, combination therapy with 
crizotinib and EGFR -TKI was considered to lack the evidences about safety. Therefore,#Document 
text: Case Rep Oncol 2019;12:91 –97 
DOI: 10.1159/000493088  © 2019 The Author(s). Published by S. Karger AG, Basel  
www.karger.com/cro  
Seki et al.: Promising Combination Therapy with Bevacizumab and Erlotinib in an EGFR -
Mutated NSCLC Patient with MET Amplification Who Showed Intrinsic Resistance to 
Initial EGFR -TKI Therapy  
 
 
 
 
93 
combination therapy with bevacizumab and erlotinib was selected on the basis of 

TypeError: Expected mapping type as input to ChatPromptTemplate. Received <class 'str'>.