In [1]:
from langchain_community.vectorstores import Neo4jVector
from langchain_community.embeddings import HuggingFaceEmbeddings

In [2]:
biobert = HuggingFaceEmbeddings(model_name="dmis-lab/biobert-base-cased-v1.1")

  from .autonotebook import tqdm as notebook_tqdm
No sentence-transformers model found with name dmis-lab/biobert-base-cased-v1.1. Creating a new one with MEAN pooling.


In [3]:
# username = "tester"
# password  = "tester"
username = "tester"
password = "password"
url = "bolt://localhost:7687"
database="ctgov"

In [4]:
from neo4j import GraphDatabase
driver = GraphDatabase.driver(url, auth=(username, password), encrypted=False)
driver.verify_connectivity()

In [71]:
adverse_event = Neo4jVector.from_existing_graph(
    embedding=biobert,
    node_label= "AdverseEvent", 
    embedding_node_property="biobert_emb", 
    text_node_properties=["term","organ_system"],
    url=url,
    index_name="adverse_event",
    keyword_index_name= "adverse_event_kw",
    search_type="vector",
    username=username, 
    password=password, 
    database=database,
    )

In [79]:
# Replace retrieval_query so it includes ID
#adverse_event.retrieval_query = adverse_event.retrieval_query.replace("id: Null", "`trial2vec_emb` : Null")
adverse_event.retrieval_query = adverse_event.retrieval_query.replace("id: Null", "").replace(",,",",").replace(", ,",",")

In [80]:
adverse_event.retrieval_query

"RETURN reduce(str='', k IN ['term', 'organ_system'] | str + '\\n' + k + ': ' + coalesce(node[k], '')) AS text, node {.*, `biobert_emb`: Null, `term`: Null, `organ_system`: Null} AS metadata, score"

In [81]:
test = adverse_event.similarity_search_with_score("Anaemia", k=3)
print(test)

[(Document(page_content='\nterm: Anaemia\norgan_system: Blood and lymphatic system disorders', metadata={'id': 'NCT02441218_AE_1', 'serious_event': False, 'stats': 'groupId: EG000 numAffected: 6 numAtRisk: 3232 numEvents: 6 |groupId: EG001 numAffected: 8 numAtRisk: 3260 numEvents: 8 ', 'source_vocabulary': 'MedDRA 7.0', 'trial2vec_emb': [0.0698281974, -0.0756936893, -0.0134816468, 0.054254964, -0.2799727619, -0.2646048665, 0.1587825418, -0.1641837955, 0.3493466079, 0.1573281884, -0.2796331048, 0.157405749, -0.2436094284, -0.0498914272, -0.1190751046, 0.0810322389, -0.4767526388, 0.160757184, 0.1610260606, -0.0559347048, -0.1453883648, -0.1346461773, 0.2558683157, -0.0262321085, -0.25312078, -0.3629468679, -0.0332389772, -0.4343235195, 0.0394110829, 0.0024562553, -0.1401403844, 0.2678958774, -0.2164738774, 0.2199445367, -0.3520880938, 0.1596906185, 0.0774481893, -0.1150921136, 0.3182322085, -0.0918498337, -0.016867511, -0.5537743568, -0.0142824352, 0.3192598224, 0.1418001205, 0.12551304

In [63]:
test[0][0].page_content.lstrip("\n").replace("\n", " | ")

'term: Anaemia | organ_system: Blood and lymphatic system disorders'

In [82]:
test[0][0].metadata["id"]

'NCT02441218_AE_1'

In [8]:
condition = Neo4jVector.from_existing_graph(
    embedding=biobert,
    node_label= "Condition", 
    embedding_node_property="biobert_emb", 
    text_node_properties=["id",],
    url=url,
    index_name="condition",
    keyword_index_name="condition_kw",
    search_type="hybrid",
    username=username, 
    password=password, 
    database=database,
    )

In [9]:
condition.similarity_search_with_score("Cancer", k=3)

[(Document(page_content='\nid: Inflammation', metadata={'trial2vec_emb': [0.1132248119, -0.1819897443, 0.2681306601, 0.0780908614, -0.1140159145, -0.2055858076, -0.0752699822, -0.0816997588, 0.2548806369, -0.0966108888, -0.4184099436, 0.5372205377, -0.0760122389, 0.2467414737, -0.2250434607, -0.1274100095, -0.5770201087, -0.2224581838, -0.1091044843, 0.1551926136, 0.018511489, -0.0198757723, 0.1780452728, 0.117195867, -0.0592266023, -0.0068507865, -0.0499388874, -0.2310013622, -0.2320647836, 0.0441021696, 0.2948380709, 0.1325530112, 0.1720799208, 0.0444509089, -0.1038254499, 0.0555843823, 0.3767074943, 0.2755174637, -0.2422550023, -0.2165592462, -0.3289942145, -0.4210926294, -0.0573851764, 0.1759999543, 0.1012713313, 0.2746407986, 0.2750133872, -0.2590749264, 0.1876943558, -0.16891101, -0.0492266268, 0.2175938785, -0.038926214, 0.1931704432, 0.3016637564, -0.0184962675, -0.2295925021, 0.0604169145, -0.1203362271, -0.4071564674, -0.0210923553, 0.3209297359, 0.0803034902, 0.0552078784, -

In [None]:
intervention = Neo4jVector.from_existing_graph(
    embedding=biobert,
    node_label= "Intervention", 
    embedding_node_property="biobert_emb", 
    text_node_properties=["id", "type"],
    url=url,
    index_name="intervention",
    keyword_index_name="intervention_kw",
    username=username, 
    password=password, 
    database=database,
    search_type="hybrid")

In [None]:
intervention.similarity_search_with_score("electrocardiogram", k=3)