In [40]:
import os
import tqdm
os.chdir("/Users/adityachhabra/Github/zavmo/zavmo-api/zavmo")

In [41]:
import pandas as pd
from dotenv import load_dotenv
from typing import List, Dict, Any

load_dotenv()
pd.set_option('display.max_columns',500)

In [42]:
from helpers.chat import get_openai_embedding

### Connect to gdb

In [43]:
# Configure neomodel
from neomodel import config, db
DATABASE_URL = f'bolt://{os.getenv("NEO4J_USERNAME")}:{os.getenv("NEO4J_PASSWORD")}@{os.getenv("NEO4J_URI")}'
# DATABASE_URL = "bolt://neo4j:secretgraph@localhost:7687"
config.DATABASE_URL = DATABASE_URL

## Retrieval

In [44]:
def retrieve_nos_from_neo4j(query, index_name='nos_vector_index', top_k=5):
    """Retrieve NOS from Neo4j"""
    query_embedding = get_openai_embedding(query)
    cypher_query = f"""
        CALL db.index.vector.queryNodes('{index_name}', $top_k, $query_embedding) 
            YIELD node, score
            RETURN 
                node.nos_id AS nos_id, 
                node.title AS title, 
                node.performance_criteria AS performance_criteria,
                node.knowledge_understanding AS knowledge_understanding,
                score
            ORDER BY score DESC
        """

    result, columns = db.cypher_query(cypher_query, {"query_embedding": query_embedding, "top_k": top_k})
        
    formatted_results = [dict(zip(columns, row)) for row in result]
        
    return formatted_results[:top_k]

In [45]:
query_text = """The Ethics & Compliance function provides assurance that Centrica operates in a manner consistent with its legal and regulatory obligations. 
The Energy Compliance team is responsible for establishing and maintaining a robust compliance framework for energy and ensuring the governance structure within which the framework sits is effective."""

query_text = "Ethics & Compliance professional in the energy sector, with a focus on establishing and maintaining compliance frameworks, regulatory compliance, and governance structures."

query_embedding = get_openai_embedding(query_text)

In [46]:
nos_results = retrieve_nos_from_neo4j(query_text)

In [47]:
import random

In [48]:
nos_res = random.choice(nos_results)
nos_res

{'nos_id': 'CCSAPLE12',
 'title': 'Ensure arts projects and live events are legally, ethically and socially compliant',
 'performance_criteria': '- Monitor legal, regulatory, ethical, and social impacts on work continually.  \n- Develop and maintain compliance policies and procedures with relevant legislation and ethical standards.  \n- Communicate policies, procedures, and values to stakeholders effectively.  \n- Utilize reliable information to monitor compliance and identify shortcomings.  \n- Provide recommendations for improvements in policies and procedures.  \n- Regularly review and amend policies and procedures as necessary.  \n- Prepare comprehensive reports on non-compliance for stakeholders.',
 'knowledge_understanding': '- Importance of ethical governance and its practical application.  \n- Impact of projects on local community quality of life.  \n- Responsibilities towards staff, customers, investors, and communities.  \n- Relevant legal obligations and ethical standards sp

In [53]:
import json

In [78]:

def retrieve_ofquals_from_neo4j(nos_id: str) -> List[Dict[str, Any]]:
    """Get the ofquals mapped to a nos_id"""
    query = """
    MATCH (n:NOSNode {nos_id: $nos_id})-[:MAPS_TO]->(o:OFQUALUnit)
    RETURN o.unit_id AS unit_id, 
           o.unit_uid AS unit_uid,
           o.unit_title AS unit_title, 
           o.overview AS overview, 
           o.qualification_type AS qualification_type, 
           o.qualification_level AS qualification_level, 
           o.awarding_organisation AS awarding_organisation, 
           o.total_credits AS total_credits, 
           o.guided_learning_hours AS guided_learning_hours, 
           o.total_qualification_time AS total_qualification_time, 
           o.unit_learning_outcomes AS learning_outcomes, 
           o.assessment_methods AS assessment_methods,
           o.markscheme AS markscheme
    """
    json_columns = ['markscheme']
    # Execute the query
    results, meta = db.cypher_query(query, {'nos_id': nos_id})
    
    # Process and return results
    ofqual_units = []
    for row in results:
        # Convert row to dictionary using column names from meta
        unit = {}
        for i, col_name in enumerate(meta):
            val = row[i]
            if col_name in json_columns:
                val = [json.loads(x) for x in json.loads(val)]               
            unit[col_name] = val
        ofqual_units.append(unit)
    
    return ofqual_units

In [79]:
ofqual_results = retrieve_ofquals_from_neo4j(nos_res['nos_id'])

In [81]:
markscheme = ofqual_results[0]['markscheme']

In [None]:
db.cypher_query("MATCH (n:NOS) WHERE n.embedding IS NOT NULL RETURN COUNT(n) AS indexed_documents;")