In [1]:
import time
from datetime import datetime
from dotenv import load_dotenv
from neo4j import GraphDatabase
from whyhow import WhyHow
import os

# Load environment variables from .env file
load_dotenv()

# Environment variables list
required_env_vars = {
    'WHYHOW_API_KEY': os.getenv('WHYHOW_API_KEY'),
    'NEO4J_URL': os.getenv('NEO4J_URL'),
    'NEO4J_USER': os.getenv('NEO4J_USER'),
    'NEO4J_PASSWORD': os.getenv('NEO4J_PASSWORD'),
    'OPENAI_API_KEY': os.getenv('OPENAI_API_KEY'),
    'PINECONE_API_KEY': os.getenv('PINECONE_API_KEY')
}

# Check if all necessary environment variables are set
missing_vars = [var for var, value in required_env_vars.items() if not value]
if missing_vars:
    raise Exception(f"Missing environment variables: {', '.join(missing_vars)}")

# If all environment variables are present, continue with initialization
print("All required environment variables are set. Continuing with application initialization...")

All required environment variables are set. Continuing with application initialization...


In [3]:
"""
Utility functions
"""

def current_time():
    return datetime.now().strftime("%Y-%m-%d %H:%M:%S")

In [4]:
"""
Convert Text files into PDFs START
"""
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas

def text_to_pdf(text_filename, pdf_filename):
    # Create a canvas to write to PDF
    c = canvas.Canvas(pdf_filename, pagesize=letter)
    width, height = letter  # Get dimensions of the letter size

    # Open text file and read lines
    with open(text_filename, 'r') as file:
        lines = file.readlines()

    # Start writing from the top (1 inch margin)
    y = height - 72
    for line in lines:
        # Draw the line and move to next line position
        c.drawString(72, y, line.strip())
        y -= 15  # Decrease Y coordinate to move to the next line

    c.save()

# Specify directory paths and filenames
base_dir = '../data'
text_files = ['paper1.txt', 'paper2.txt']

# Loop through each text file and convert it to PDF
for text_file in text_files:
    text_path = os.path.join(base_dir, text_file)
    pdf_path = os.path.join(base_dir, text_file.replace('.txt', '.pdf'))
    text_to_pdf(text_path, pdf_path)
    print(f"Converted {text_path} to {pdf_path}")

Converted ../data\paper1.txt to ../data\paper1.pdf
Converted ../data\paper2.txt to ../data\paper2.pdf


In [13]:
def main():
    try:
        print(f"{current_time()} - Starting the WhyHow client initialization...")
        start_time = time.time()
        client = WhyHow(
            api_key = os.environ.get("WHYHOW_API_KEY"),
            openai_api_key="sk-proj-dOdUoVNk3UKCjJSZ5j3PT3BlbkFJRf2VjqW187CaZWq8EHTi",
            pinecone_api_key=os.getenv("PINECONE_API_KEY"),
            neo4j_url=os.getenv("NEO4J_URI"),
            neo4j_user=os.getenv("NEO4J_USERNAME"),
            neo4j_password=os.getenv("NEO4J_PASSWORD"),
        )

        initialization_time = time.time() - start_time
        print(f"{current_time()} - WhyHow client initialized in {initialization_time:.2f} seconds.")

        # Define namespace and specify documents
        namespace = "scientific-research-test2"
        documents = [
            # "../data/paper1.pdf",
            # "../data/paper2.pdf"
            "../data/test.pdf"
        ]

        print(f"{current_time()} - Uploading documents to namespace '{namespace}'...")
        start_time = time.time()
        documents_response = client.graph.add_documents(namespace, documents)
        upload_time = time.time() - start_time
        print(f"{current_time()} - Documents uploaded in {upload_time:.2f} seconds. Response: {documents_response}")

        # Define schema for creating the graph
        questions = ["For each paragraph, what is the stated hypothesis?",
                     "Which ideas and their corresponding hypotheses have the most supporting results?",
                     "What are the results of each section, and do they support or contradict the hypothesis?",
                     "Which sections contain results that contradict their initial hypothesis?",
                     "Are there any results that neither support nor contradict their stated hypotheses but offer new insights?"
                     ]
        print(f"{current_time()} - Creating graph from questions...")
        start_time = time.time()
        
        extracted_graph = client.graph.create_graph(namespace, questions)
        graph_creation_time = time.time() - start_time
        print(f"{current_time()} - Graph created in {graph_creation_time:.2f} seconds. Extracted Graph: {extracted_graph}")

        sleeb_time = 20
        print(f"{current_time()} - Waiting for {sleeb_time} seconds for the graph to be created...")
        time.sleep(sleeb_time)

        # Query the graph
        query_list = ["What",
                      "What is philosophy of language meant for?",
                      "Why is the philosophy of language important?",
                      "What is the philosophy of language?"]
        print(f"{current_time()} - Querying the graph...")
        start_time = time.time()

        # loop through query_list and query the graph
        for query in query_list:
            query_response = client.graph.query_graph(namespace, query)
            time.sleep(5)
            query_time = time.time() - start_time
            print(f"{current_time()} - Graph queried in {query_time:.2f} seconds. Query Response: {query_response}")

    except Exception as e:
        print(f"{current_time()} - Error occurred: {str(e)}")


if __name__ == "__main__":
    main()

2024-05-08 23:07:57 - Starting the WhyHow client initialization...
2024-05-08 23:07:57 - WhyHow client initialized in 0.41 seconds.
2024-05-08 23:07:57 - Uploading documents to namespace 'scientific-research-test2'...
2024-05-08 23:07:59 - Documents uploaded in 1.58 seconds. Response: Your documents are being added in the background.
2024-05-08 23:07:59 - Creating graph from questions...
2024-05-08 23:07:59 - Graph created in 0.66 seconds. Extracted Graph: Your graph creation has started.
2024-05-08 23:07:59 - Waiting for 3 seconds for the graph to be created...
2024-05-08 23:08:19 - Querying the graph...
2024-05-08 23:08:29 - Graph queried in 9.40 seconds. Query Response: answer='No context provided.  Please add more specific information to the graph, or ask a more specific question based on the entities and relations.'
2024-05-08 23:08:38 - Graph queried in 18.39 seconds. Query Response: answer='Philosophy of language is meant for studying the nature of language, its use, and its rel

In [16]:
client = WhyHow(
            api_key = os.environ.get("WHYHOW_API_KEY"),
            openai_api_key="sk-proj-dOdUoVNk3UKCjJSZ5j3PT3BlbkFJRf2VjqW187CaZWq8EHTi",
            pinecone_api_key=os.getenv("PINECONE_API_KEY"),
            neo4j_url=os.getenv("NEO4J_URI"),
            neo4j_user=os.getenv("NEO4J_USERNAME"),
            neo4j_password=os.getenv("NEO4J_PASSWORD"),
        )

namespace = "scientific-research-test2"
# query = "What is De Re and De Dicto Necessity?"
query = "What is a vivid designator in context to the philosophy of language? Please provide supporting quotations."
query_response = client.graph.query_graph(namespace, query)
print(query_response)


answer='A vivid designator, according to the philosophy of language, is a term that evokes a clear mental image or idea. This concept is discussed by philosopher Saul Kripke in his work "Naming and Necessity," where he argues that some names have a direct, vivid connection to their referents.'


In [11]:
client = WhyHow(
            api_key = os.environ.get("WHYHOW_API_KEY"),
            openai_api_key="sk-proj-dOdUoVNk3UKCjJSZ5j3PT3BlbkFJRf2VjqW187CaZWq8EHTi",
            pinecone_api_key=os.getenv("PINECONE_API_KEY"),
            neo4j_url=os.getenv("NEO4J_URI"),
            neo4j_user=os.getenv("NEO4J_USERNAME"),
            neo4j_password=os.getenv("NEO4J_PASSWORD"),
        )

namespace = "scientific-research-test"
query = "What are the results?"
query_response = client.graph.query_graph(namespace, query)
print(query_response)


answer='No context provided.  Please add more specific information to the graph, or ask a more specific question based on the entities and relations.'
