### Imports 

In [None]:
from langchain_neo4j import Neo4jVector
from langchain_openai import OpenAIEmbeddings
import os 
from dotenv import load_dotenv
import json 
from CypherGenerator.cypher_generator import CypherGenerator, CypherUtils
from CypherGenerator.models.data_model import DocumentData, ReasoningStep, QA

load_dotenv()

ModuleNotFoundError: No module named 'langchain_community'

### Cypher spliter and executor function  

In [None]:
def ingest_to_neo4j(cypher_script):
    with driver.session() as session:
        # Split by ';' to execute each statement separately, ignore empty
        for stmt in cypher_script.split(';'):
            stmt = stmt.strip()
            if stmt:
                session.run(stmt)

### Ingest into Neo4j Database

In [None]:
with open('data/train.json') as f:
    raw_data = json.load(f)

for doc in raw_data:
    document_data = DocumentData(**doc)

    # Create the generator instance correctly with DocumentData
    cypher_generator = CypherGenerator(document_data)
    # Generate Cypher statements
    try:
        cypher_statements = cypher_generator.generate()
        try:       
        ingest_to_neo4j(cypher_statements)
    except Exception as neo4j_exc:
            if 'error_list' not in locals():
                error_list = []
            error_list.append({"doc_id": document_data.id, "error": f"Neo4j error: {neo4j_exc}"})
    except Exception as e:
        if 'error_list' not in locals():
            error_list = []
        error_list.append({"doc_id": document_data.id, "error": str(e)})
    # Output statements
    print(error_list)
    





### Vectorise relevent nodes

In [None]:
# Create the vectorstore for our existing graph
cell_graph = Neo4jVector.from_existing_graph(
    embedding=OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY")),
    url="bolt://localhost:7687",
    username="neo4j",
    password="pass@123",
    index_name="vinx_cellval",
    node_label="Cell",
    text_node_properties=["value"],
    embedding_node_property="cell_embedding",
)

In [23]:
# Create the vectorstore for our existing graph
Sent_graph = Neo4jVector.from_existing_graph(
    embedding=OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY")),
    url="bolt://localhost:7687",
    username="neo4j",
    password="pass@123",
    index_name="vinx_sentval",
    node_label="Sentence",
    text_node_properties=["text"],
    embedding_node_property="cell_embedding",)

In [21]:
from pprint import pprint

result = paper_graph.similarity_search("repurchase of shares")
pprint(result)

[Document(metadata={'row_id': 'Single_CB/2010/page_114.pdf-4_table_row_5', 'col': 0, 'row_index': 5}, page_content='\nvalue: repurchase of shares'),
 Document(metadata={'row_id': 'Single_SLB/2010/page_58.pdf-2_table_row_4', 'col': 0, 'row_index': 4}, page_content='\nvalue: stock repurchase program'),
 Document(metadata={'row_id': 'Single_SLB/2010/page_58.pdf-2_table_row_10', 'col': 0, 'row_index': 10}, page_content='\nvalue: stock repurchase program'),
 Document(metadata={'row_id': 'Single_SLB/2010/page_58.pdf-2_table_row_15', 'col': 0, 'row_index': 15}, page_content='\nvalue: stock repurchase program')]


In [24]:
result = Sent_graph.similarity_search("repurchase of shares")
pprint(result)

[Document(metadata={'type': 'post'}, page_content='\ntext: share repurchases .'),
 Document(metadata={'type': 'post'}, page_content='\ntext: share repurchases .'),
 Document(metadata={'type': 'post'}, page_content='\ntext: share repurchases .'),
 Document(metadata={'type': 'post'}, page_content='\ntext: share repurchases .')]


In [18]:
    openai_embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))

In [30]:
OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))

embedding = openai_embeddings.embed_query("net cash from operating activities 2008,2009 ")
print(embedding)

[-0.028200525790452957, -0.031039323657751083, 0.002073863288387656, -0.014635885134339333, -0.012004639022052288, 0.0036054090596735477, -0.004613048862665892, -0.018237946555018425, -0.018345071002840996, 0.001842876197770238, 0.014461807906627655, 0.04301048815250397, -0.025013571605086327, 0.017059577628970146, 0.005751246586441994, 0.027477433905005455, 0.00464317761361599, -0.021344557404518127, -0.00805442314594984, -0.008161547593772411, -0.03149460256099701, 0.03441374748945236, -0.02135794796049595, -0.009734938852488995, 0.0038430914282798767, 0.008094594813883305, 0.025616146624088287, -0.00909888744354248, 0.01837185211479664, -0.00958764273673296, -0.001918198075145483, 0.0067187147215008736, -0.01018352247774601, 0.0008603436290286481, -0.019041379913687706, 0.022991595789790154, -0.0026295718271285295, 0.014287730678915977, 0.015452709048986435, 0.007840174250304699, 0.029673486948013306, 0.01610884629189968, -0.011582836508750916, 0.018358461558818817, -0.0067957104183

In [None]:

MATCH (node:Cell)
WITH node, vector.similarity.cosine($Query, node.cell_embedding) AS score
RETURN node, score
ORDER BY score DESCENDING
LIMIT 1;

