# Graph Operations

In [1]:
import pandas as pd
from langchain_community.graphs.neo4j_graph import Neo4jGraph
from langchain_community.chat_models import ChatOllama
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_core.documents import Document
from langchain_groq import ChatGroq
from neo4j import GraphDatabase
from neo4j.exceptions import ClientError
import os

In [3]:
df_departments = pd.read_csv('../data/silver/cleaned_departments.csv')
df_jobs = pd.read_csv('../data/silver/cleaned_jobs.csv')
df_hired_employees = pd.read_csv('../data/silver/cleaned_hired_employees.csv')

In [4]:
df_hired_employees.isnull().sum()

employee_id       0
name              0
datetime         14
department_id     0
job_id            0
year              0
quarter          14
dtype: int64

In [5]:
fecha_dummy = pd.to_datetime('2000-01-01')

In [6]:
df_hired_employees_c = df_hired_employees.fillna(fecha_dummy)

In [7]:
df_hired_employees_c.isnull().sum()

employee_id      0
name             0
datetime         0
department_id    0
job_id           0
year             0
quarter          0
dtype: int64

In [8]:
def extract_hired_employee(tx, employee_id, name, datetime, department_id, job_id, year, quarter):
    try:
        query = """
                MERGE (p:Person {id: $employee_id, name: $name, datetime: $datetime, year: $year, quarter: $quarter})
                MERGE (d:Department {id: $department_id})
                MERGE (j:Job {id: $job_id})
                MERGE (p)-[:WORKS_AT]->(d)
                MERGE (p)-[:HOLDS]->(j)
                RETURN p.name AS name
            """
        result = tx.run(query, 
                        employee_id=employee_id, 
                        name=name, 
                        datetime=datetime,
                        department_id=department_id,
                        job_id=job_id,
                        year=year,
                        quarter=quarter)
        return result
    except ClientError as e:
        print(f"Error: {e}")

In [9]:
def extract_department(tx, department_id, department_name):
    try:
        query = """
                MERGE (d:Department {id: $department_id, name: $department_name})
                RETURN d.name AS department_name
            """
        result = tx.run(query, 
                        department_id=department_id, 
                        department_name=department_name)
        return result
    except ClientError as e:
        print(f"Error: {e}")

In [10]:
def extract_job(tx, job_id, job_name):
    try:
        query = """
                MERGE (j:Job {id: $job_id, name: $job_name})
                RETURN j.name AS job_name
            """
        result = tx.run(query, 
                        job_id=job_id, 
                        job_name=job_name)
        return result
    except ClientError as e:
        print(f"Error: {e}")

In [2]:
def graph_session():
    URI = os.environ["NEO4J_URI"]
    AUTH = ("neo4j", os.environ["NEO4J_PASS"])

    with GraphDatabase.driver(URI, auth=AUTH) as driver:    
        return driver
                
driver = graph_session()
driver = graph_session()
session = driver.session(database="neo4j")

  session = driver.session(database="neo4j")


In [3]:
driver = graph_session()
session = driver.session(database="neo4j")


  session = driver.session(database="neo4j")


In [46]:

 def graph_session():
        URI = "neo4j+s://a575d5a9.databases.neo4j.io:7687"
        AUTH = ("neo4j", "yIjVz15ZVDbIFAyjLdA8tjacG-Re7_Cd8G_rY5YesTo")

        with GraphDatabase.driver(URI, auth=AUTH) as driver:    
            return driver


    driver = graph_session()
    session = driver.session(database="neo4j")


    def query_graph(query, parameters=None):    
        with driver.session() as session:
            result = session.run(query, parameters)
            return [record for record in result]

    nodes = []
    edges = []

    relations_and_nodes = """

    MATCH (p:Person)-[h:HOLDS]->(j:Job) RETURN p,h,j

    """
    results = query_graph(relations_and_nodes)

    

    for _, result in enumerate(results):
        id = results[_].data()["p"]['id']
        name = results[_].data()["p"]['name']
        relation =results[_].data()["h"][1]
        target = results[_].data()["j"]["name"]
        nodes.append(Node(id=id, label=name, size=25))
        edges.append(Edge(source=name, label=relation, target=target,))


    config = Config(width=750,
                        height=950,
                        directed=True, 
                        physics=True, 
                        hierarchical=False,
                        # **kwargs
                        ) 

    return_value = agraph(nodes=nodes[:3], 
                            edges=edges[:3], 
                            config=config)



In [47]:
def query_graph(query, parameters=None):    
    with driver.session() as session:
        result = session.run(query, parameters)
        return [record for record in result]


In [48]:
results = query_graph(relations_and_nodes)

  with driver.session() as session:


In [49]:
results

[<Record p=<Node element_id='4:73106e34-1242-420e-abf7-397cef8aa1bd:1513' labels=frozenset({'Person'}) properties={'datetime': '2021-06-23 22:38:42+00:00', 'year': 2021, 'name': 'Nerta Castro', 'id': 1163, 'quarter': '2021Q2'}> h=<Relationship element_id='5:73106e34-1242-420e-abf7-397cef8aa1bd:1152922604118476265' nodes=(<Node element_id='4:73106e34-1242-420e-abf7-397cef8aa1bd:1513' labels=frozenset({'Person'}) properties={'datetime': '2021-06-23 22:38:42+00:00', 'year': 2021, 'name': 'Nerta Castro', 'id': 1163, 'quarter': '2021Q2'}>, <Node element_id='4:73106e34-1242-420e-abf7-397cef8aa1bd:11' labels=frozenset({'Job'}) properties={'name': 'Marketing Assistant', 'id': 1}>) type='HOLDS' properties={}> j=<Node element_id='4:73106e34-1242-420e-abf7-397cef8aa1bd:11' labels=frozenset({'Job'}) properties={'name': 'Marketing Assistant', 'id': 1}>>,
 <Record p=<Node element_id='4:73106e34-1242-420e-abf7-397cef8aa1bd:266' labels=frozenset({'Person'}) properties={'year': 2021, 'name': 'Matthus S

In [13]:
df_departments

Unnamed: 0,department_id,department_name
0,1,Product Management
1,2,Sales
2,3,Research and Development
3,4,Business Development
4,5,Engineering
5,6,Human Resources
6,7,Services
7,8,Support
8,9,Marketing
9,10,Training


In [14]:
df_departments.columns

Index(['department_id', 'department_name'], dtype='object')

In [15]:
df_departments[["department_id","department_name"]].apply(
    lambda
        row: session.execute_write(extract_department,
            row["department_id"],
            row["department_name"],
        ),
    axis=1
)

0     <neo4j._sync.work.result.Result object at 0x00...
1     <neo4j._sync.work.result.Result object at 0x00...
2     <neo4j._sync.work.result.Result object at 0x00...
3     <neo4j._sync.work.result.Result object at 0x00...
4     <neo4j._sync.work.result.Result object at 0x00...
5     <neo4j._sync.work.result.Result object at 0x00...
6     <neo4j._sync.work.result.Result object at 0x00...
7     <neo4j._sync.work.result.Result object at 0x00...
8     <neo4j._sync.work.result.Result object at 0x00...
9     <neo4j._sync.work.result.Result object at 0x00...
10    <neo4j._sync.work.result.Result object at 0x00...
11    <neo4j._sync.work.result.Result object at 0x00...
dtype: object

In [16]:
df_jobs.apply(
    lambda row: session.execute_write(
        extract_job, 
        row["job_id"], 
        row["job_name"]
    ), 
    axis=1
)

0      <neo4j._sync.work.result.Result object at 0x00...
1      <neo4j._sync.work.result.Result object at 0x00...
2      <neo4j._sync.work.result.Result object at 0x00...
3      <neo4j._sync.work.result.Result object at 0x00...
4      <neo4j._sync.work.result.Result object at 0x00...
                             ...                        
178    <neo4j._sync.work.result.Result object at 0x00...
179    <neo4j._sync.work.result.Result object at 0x00...
180    <neo4j._sync.work.result.Result object at 0x00...
181    <neo4j._sync.work.result.Result object at 0x00...
182    <neo4j._sync.work.result.Result object at 0x00...
Length: 183, dtype: object

In [17]:
df_hired_employees_c.apply(
    lambda row: session.execute_write(
        extract_hired_employee, 
        row["employee_id"], 
        row["name"], 
        row["datetime"],
        row["department_id"],
        row["job_id"],
        row["year"],
        row["quarter"]
    ), 
    axis=1
)

0       <neo4j._sync.work.result.Result object at 0x00...
1       <neo4j._sync.work.result.Result object at 0x00...
2       <neo4j._sync.work.result.Result object at 0x00...
3       <neo4j._sync.work.result.Result object at 0x00...
4       <neo4j._sync.work.result.Result object at 0x00...
                              ...                        
1994    <neo4j._sync.work.result.Result object at 0x00...
1995    <neo4j._sync.work.result.Result object at 0x00...
1996    <neo4j._sync.work.result.Result object at 0x00...
1997    <neo4j._sync.work.result.Result object at 0x00...
1998    <neo4j._sync.work.result.Result object at 0x00...
Length: 1999, dtype: object

In [24]:
def query_graph(query, parameters=None):
    with driver.session() as session:
        result = session.run(query, parameters)
        return [record for record in result]

In [25]:
cypher_query = """
MATCH (n)-[r]->(m)
RETURN n, r, m
"""

In [29]:
results = query_graph(query=cypher_query)
#<Record n=<Node element_id='4:73106e34-1242-420e-abf7-397cef8aa1bd:194' labels=frozenset({'Person'}) properties={'year': 2021, 'name': 'Harold Vogt', 'id': 1, 'hire_date': '2021-11-07 02:48:42+00:00', 'quarter': '2021Q4'}> r=<Relationship element_id='5:73106e34-1242-420e-abf7-397cef8aa1bd:1152921504606847170' nodes=(<Node element_id='4:73106e34-1242-420e-abf7-397cef8aa1bd:194' labels=frozenset({'Person'}) properties={'year': 2021, 'name': 'Harold Vogt', 'id': 1, 'hire_date': '2021-11-07 02:48:42+00:00', 'quarter': '2021Q4'}>, <Node element_id='4:73106e34-1242-420e-abf7-397cef8aa1bd:0' labels=frozenset({'Department'}) properties={'name': 'Sales', 'id': 2}>) type='WORKS_AT' properties={}> m=<Node element_id='4:73106e34-1242-420e-abf7-397cef8aa1bd:0' labels=frozenset({'Department'}) properties={'name': 'Sales', 'id': 2}>>,


  with driver.session() as session:


In [30]:
from streamlit_agraph import agraph, Node, Edge, Config

In [31]:
def populate_graph_from_neo4j(results):
    nodes = []
    edges = []
    
    # Procesamos los resultados del query (asumiendo que devuelve nodos y relaciones)
    for record in results:
        # Obtenemos información del nodo
        if 'n' in record:  # 'n' sería el alias en el query de Cypher
            node_id = record['n']['id']
            label = record['n']['label']
            size = record['n'].get('size', 25)  # Default a 25 si no existe
            
            # Creamos el nodo
            nodes.append(Node(id=node_id, label=label, size=size))
        
        # Obtenemos información de la relación si existe
        if 'r' in record:  # 'r' sería el alias de la relación en el query
            source = record['r'].start_node['id']
            target = record['r'].end_node['id']
            label = record['r'].type
            
            # Creamos el edge (arista)
            edges.append(Edge(source=source, target=target, label=label))
    
    
    
    # Configuración de visualización
    config = Config(width=750,
                    height=950,
                    directed=True, 
                    physics=True, 
                    hierarchical=False)
    
    # Retornamos el grafo con nodos y edges
    return agraph(nodes=nodes, edges=edges, config=config)

return_value = populate_graph_from_neo4j(results)