In [None]:
import json
import os
import json
import pandas as pd
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.graphs import OntotextGraphDBGraph
from langchain.chains import OntotextGraphDBQAChain
from typing import Dict, TypedDict
from langgraph.graph import StateGraph


os.environ["GOOGLE_API_KEY"] = "AIzaSyB1j50tKq4yMm4vOQrqWb9yZ1p-G8ZP07Q"

# Initialize LLM
# llm = ChatOpenAI(model_name="gpt-4", temperature=0)
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

# Connect to Ontotext GraphDB
graph = OntotextGraphDBGraph(
    query_endpoint="http://LP148:7200/repositories/genai_graph_DB",
    query_ontology="CONSTRUCT {?s ?p ?o} WHERE {?s ?p ?o}"
)

# Define prompt template for query generation
template = """Convert this question into an optimized GraphDB query with appropriate filters: {query}"""
prompt = PromptTemplate(input_variables=["query"], template=template)

# Initialize query chain
chain = OntotextGraphDBQAChain.from_llm(
    llm=llm,
    graph=graph,
    verbose=True,
    allow_dangerous_requests=True,
    result_limit=100 
)


def define_workflow(question):
    workflow = StateGraph(GraphState)
    workflow.add_node("process_query", lambda x: {"result": chain.invoke(question)})
    # Define edges
    workflow.set_entry_point("process_query")
    workflow.set_finish_point("process_query")
    
    # Compile the graph
    app = workflow.compile()
    
    return app

def execute_query(question):
    """Executes a query on Ontotext GraphDB and handles large result sets."""
    try:
        workflow_app = define_workflow(question)
        result = workflow_app.invoke({"query": question})
        
        # Extract the query and the result from the response
        query = result.get("query", "")
        query_result = result.get("result", {}).get("result", "")
        
        if len(query_result) > 4000:
            return json.dumps({
                "query": query,
                "truncated_result": query_result[:4000] + "...",
                "note": "Result was truncated due to size. Please refine your query."
            }, indent=2)
        
        # Return the query along with the result
        return json.dumps({
            "query": query,
            "result": query_result
        }, indent=2)
    
    except Exception as e:
        return f"Error processing request: {str(e)}"



# Example Usage
if __name__ == "__main__":
    question = "count of transformers"
    result = execute_query(question)
    print(result)




[1m> Entering new OntotextGraphDBQAChain chain...[0m
Generated SPARQL:
[32;1m[1;3mPREFIX : <http://proton.semanticweb.org/protonsys#>
PREFIX default1: <http://iec.ch/TC57/CIM101#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

SELECT (COUNT(?transformer) AS ?count)
WHERE {
  ?transformer rdf:type default1:PowerTransformer .
}[0m

[1m> Finished chain.[0m
{
  "query": "count of transformers",
  "result": "The count of transformers is 1."
}


In [33]:
# !pip install sweetviz

In [None]:
import psycopg2
import pandas as pd
import sweetviz as sv

def get_schema_info(conn):
    """Fetch schema details for all schemas in the database."""
    cur = conn.cursor()
    query = """
    SELECT table_schema, table_name, column_name, data_type
    FROM information_schema.columns;
    """
    cur.execute(query)
    schema_info = cur.fetchall()
    organized_schema = {}
    
    for schema, table, column, dtype in schema_info:
        if schema not in organized_schema:
            organized_schema[schema] = {}
        if table not in organized_schema[schema]:
            organized_schema[schema][table] = {}
        
        organized_schema[schema][table][column] = dtype
    
    cur.close()
    return organized_schema

def generate_sql_query(prompt, schema_info):
    """Ask LLM to generate SQL based on schema and user question."""
    # Create a structured schema string to pass to the LLM
    schema_text = ""
    
    for schema, tables in schema_info.items():
        schema_text += f"Schema: {schema}\n"
        for table, columns in tables.items():
            columns_text = ", ".join([f"{col}: {dtype}" for col, dtype in columns.items()])
            schema_text += f"Table: {table}\nColumns: {{{columns_text}}}\n"
    
    # Construct the prompt for the LLM
    full_prompt = f"Database Schema Information:\n{schema_text}\n\nUser Query: {prompt}\nGenerate an SQL query:"
    
    # Replace with your LLM code (e.g., OpenAI)
    response = "Generated SQL Query: SELECT * FROM customers;"  # Replace with actual LLM API call
    return response  # Here, you would return the LLM's response

def execute_query(conn, query):
    """Run the generated SQL query on PostgreSQL."""
    cur = conn.cursor()
    cur.execute(query)
    results = cur.fetchall()
    cur.close()
    return results

def visualize_data(data):
    """Visualize the data using sweetviz"""
    df = pd.DataFrame(data)
    report = sv.analyze(df)
    report.show_html('sweetviz_report.html')

# Connect to PostgreSQL
conn = psycopg2.connect(host="0.0.0.0", port=5000, dbname="mydatabase", user="myuser", password="mypassword")
# for this you can take credentials from main.py

# Get schema info
schema_info = get_schema_info(conn)

# Generate SQL using LLM
user_prompt = "Get all customers from the public schema."
generated_sql = generate_sql_query(user_prompt, schema_info)

# Execute SQL
query_result = execute_query(conn, generated_sql)

# Visualize data
visualize_data(query_result)

conn.close()
    
# In this script, we first connect to a PostgreSQL database using the  psycopg2  library. We then define a function  get_schema_info  to fetch schema details for all schemas in the database. Next, we define a function  generate_sql_query  to ask the large language model (LLM) to generate SQL based on the schema and user question. 
# The  execute_query  function runs the generated SQL query on PostgreSQL. Finally, we define a function  visualize_data  to visualize the data using the  sweetviz  library. 
# We then connect to the PostgreSQL database, get the schema information, generate SQL using the LLM, execute the SQL query, and visualize the data. 


OperationalError: connection to server at "0.0.0.0", port 5000 failed: Cannot assign requested address (0x00002741/10049)
	Is the server running on that host and accepting TCP/IP connections?
