In [1]:
from langchain_groq import ChatGroq
import os
from dotenv import load_dotenv

load_dotenv()

llm = ChatGroq(groq_api_key = os.getenv('groq_api'), model_name="Gemma2-9b-It")

llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x000001E9F2AF6950>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000001E9F2BF4B90>, model_name='Gemma2-9b-It', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [2]:
from langchain_community.document_loaders import PyPDFLoader

file_path = (
    "../dataset/final_data/10.1159@000493088.pdf"
)

loader = PyPDFLoader(file_path)
pages = loader.load_and_split()

pages[0]

Document(metadata={'source': '../dataset/final_data/10.1159@000493088.pdf', 'page': 0}, page_content='Case Rep Oncol 2019;12:91 –97 \nDOI: 10.1159/000493088  \nPublished online: January 21, 2019  © 2019 The Author(s)  \nPublished by S. Karger AG, Basel  \nwww.karger.com/cro  \nThis article is licensed under the Creative Commons Attribution -NonCommercial 4.0 \nInternational License (CC BY -NC) (http://www.karger.com/Services/OpenAccessLicense). \nUsage and distribution for commercial purposes requires written permission.  \n \n \n           \n  Nobuhiko Seki, MD, PhD  \nDivision of Medical Oncology, Department of Internal Medicine  \nTeikyo University School of Medicine, 2 –11–1, Kaga  \nTokyo 173 –8606 (Japan)  \nE-Mail nseki@med.teikyo -u.ac.jp  \n \n  \nCase Report  \n \nPromising Combination Therapy \nwith Bevacizumab and Erlotinib in \nan EGFR -Mutated NSCLC Patient \nwith MET Amplification Who \nShowed Intrinsic Resistance to Initial \nEGFR -TKI Therapy  \nNobuhiko  Seki    Maika

In [3]:
from langchain_experimental.graph_transformers import LLMGraphTransformer

llm_transformer = LLMGraphTransformer(llm=llm)

graph_docs = llm_transformer.convert_to_graph_documents(pages)

graph_docs

[GraphDocument(nodes=[Node(id='Nobuhiko Seki', type='Person', properties={}), Node(id='Maika Natsume', type='Person', properties={}), Node(id='Ryosuke Ochiai', type='Person', properties={}), Node(id='Terunobu Haruyama', type='Person', properties={}), Node(id='Masashi Ishihara', type='Person', properties={}), Node(id='Yoko Fukasawa', type='Person', properties={}), Node(id='Takahiko Sakamoto', type='Person', properties={}), Node(id='Shigeru Tanzawa', type='Person', properties={}), Node(id='Ryo Usui', type='Person', properties={}), Node(id='Takeshi Honda', type='Person', properties={}), Node(id='Shuji Ota', type='Person', properties={}), Node(id='Yasuko Ichikawa', type='Person', properties={}), Node(id='Kiyotaka Watanabe', type='Person', properties={}), Node(id='Division Of Medical Oncology', type='Department', properties={}), Node(id='Department Of Internal Medicine', type='Department', properties={}), Node(id='Teikyo University School Of Medicine', type='Institution', properties={}), No

In [4]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

  from tqdm.autonotebook import tqdm, trange


In [5]:
from langchain.graphs import Neo4jGraph

graph = Neo4jGraph()

graph.add_graph_documents(
    graph_docs,
    baseEntityLabel=True,
    include_source=True
)

In [6]:
from neo4j import GraphDatabase
from yfiles_jupyter_graphs import GraphWidget

default_cypher = "MATCH (s)-[r:!MENTIONS]->(t) RETURN s,r,t LIMIT 50"
def showGraph(cypher: str = default_cypher):
    driver = GraphDatabase.driver(
        uri = os.environ["NEO4J_URI"],
        auth = (os.environ["NEO4J_USERNAME"], os.environ["NEO4J_PASSWORD"])
    )
    session = driver.session()
    widget = GraphWidget(graph = session.run(cypher).graph())
    widget.node_label_mapping = 'id'
    return widget

showGraph()

GraphWidget(layout=Layout(height='800px', width='100%'))

In [8]:
from langchain_community.vectorstores import Neo4jVector

store = Neo4jVector.from_existing_graph(
    embeddings_model,
    search_type = 'hybrid',
    node_label = 'Document',
    text_node_properties=['text'],
    embedding_node_property='embedding'
)


In [9]:
from langchain.chains import RetrievalQA

vector_qa = RetrievalQA.from_chain_type(llm = llm, chain_type='stuff', retriever = store.as_retriever())

In [11]:
r = vector_qa.invoke(
    {
        "query": "What happend to the patient after 2 weeks ?"
    }
)
print(r["result"])



The patient's symptoms gradually improved and the tumor size decreased.  

