CONNECT TO GALILEO

In [None]:
from langchain_community.document_loaders import JSONLoader
from langchain_openai import ChatOpenAI

aws_key = ""
azure_key = ""
llm = ChatOpenAI(
    model="GPT-4o-2024-05-13",
    base_url="https://eu.aigw.galileo.roche.com/v1",
    api_key=azure_key,
    temperature=0.0,
)

TRANSFORM TEXT TO GRAPH

In [None]:
import pandas as pd
from langchain_openai import ChatOpenAI
from langchain_core.documents import Document
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain.document_loaders import JSONLoader
from langchain.text_splitter import (
    RecursiveCharacterTextSplitter,
    RecursiveJsonSplitter,
)
import json
from pathlib import Path

aws_key = ""
azure_key = ""
# Initialize LLM
llm_grapher = ChatOpenAI(
    model="mistral.mistral-7b-instruct-v0:2",
    base_url="https://eu.aigw.galileo.roche.com/v1",
    api_key=azure_key,
    temperature=0.0,
)

# Initialize LLMGraphTransformer
graph_transformer = LLMGraphTransformer(llm=llm_grapher, ignore_tool_usage=True)


file_path = ""
# loader = JSONLoader(file_path,
#     jq_schema='.RecordList[].content',
#     text_content=False)
# pages = loader.load_and_split()
pages = json.loads(Path(file_path).read_text(encoding="utf8"))

text_splitter = RecursiveJsonSplitter(max_chunk_size=300)
docs = text_splitter.create_documents(texts=[pages])


lc_docs = []
for doc in docs:
    lc_docs.append(
        Document(
            page_content=doc.page_content.replace("\n", ""),
            metadata={"source": file_path},
        )
    )

# Clear the graph database
# cypher = """
#   MATCH (n)
#   DETACH DELETE n;
# """
# graph.query(cypher)

# Define allowed nodes and relationships
allowed_nodes = ["Paper", "Skillset", "Owner"]
allowed_relationships = ["CREATED", "HAS_SKILLSET", "OWNS"]

# Transform documents into graph documents
transformer = LLMGraphTransformer(
    llm=llm_grapher,
    # allowed_nodes=allowed_nodes,
    # allowed_relationships=allowed_relationships,
    node_properties=False,
    relationship_properties=False,
    ignore_tool_usage=True,
)

graph_documents = transformer.convert_to_graph_documents(lc_docs)
print(graph_documents[0])
# graph.add_graph_documents(graph_documents, include_source=True)

UPLOAD DATA TO NEO4J


In [None]:
from langchain_community.graphs import Neo4jGraph
import os

os.environ["NEO4J_URI"] = "bolt://rkalvinpypoc.kau.roche.com:7687"
os.environ["NEO4J_USERNAME"] = ""
os.environ["NEO4J_PASSWORD"] = ""

graph = Neo4jGraph()

graph.add_graph_documents(graph_documents)

ASK

In [None]:
from langchain.prompts import PromptTemplate
from langchain.chains import GraphCypherQAChain
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough

aws_key = ""
azure_key = ""
# Initialize LLM
llm = ChatOpenAI(
    model="us.anthropic.claude-3-7-sonnet-20250219-v1:0",
    base_url="https://us.aigw.galileo.roche.com/v1",
    api_key=aws_key,
    temperature=0.0,
)


# Retrieve the graph schema
schema = graph.get_schema
# question = "search the persons who have know Bioinformatics and the Papers they Authored and return a comprehensive list with the name of the Author and the title of the paper next to it "
queries = ["give me persons who know Bioinformatics", "now list their Papers"]
question = "DAN | South East Asian Thoracic Society (SEATS) Annual Meeting has been created by who"
# Set up the QA chain
template = """
Instructions:
Use only relationship types and properties provided in schema.
Do not use other relationship types or properties that are not provided.
Use only data as your answer.


schema:
{schema}

Note:
Do not include explanations or apologies in your answers.
Do not answer questions that ask anything other than creating Cypher statements.
Do not include any text other than generated Cypher statements.


Question: {question}

Task: Generate a Cypher statement to query the graph database.
"""
question_prompt = PromptTemplate(
    template=template, input_variables=["schema", "question"]
)
qa = GraphCypherQAChain.from_llm(
    llm=llm,
    graph=graph,
    cypher_prompt=question_prompt,
    verbose=True,
    allow_dangerous_requests=True,
    output_parser=StrOutputParser(),
    runnable=RunnablePassthrough(),
    return_intermediate_steps=True,
)

res = qa.invoke(question)["intermediate_steps"][1]["context"]

answer_template = """
Instructions:
Use only the Query Result as the basis for your answer
Start your answer with the following line: "The answer to your question is:"

Query Result:
{res}

Note:
Do not include explanations or apologies in your answers.

Task: Provide the Query Result as a human readible bulletpoint list.
"""

# answer_prompt = PromptTemplate(
# template=answer_template,
# input_variables=["question", "res"]
# )
prompt = PromptTemplate.from_template(template=answer_template)
prompt_formatted_str: str = prompt.format(question=question, res=res)
result = llm.invoke(prompt_formatted_str)
print(str(result.pretty_print()))