In [1255]:
import os
from datetime import datetime, timedelta
from dotenv import load_dotenv
from langchain_community.graphs import Neo4jGraph
from langchain_openai import ChatOpenAI
from neo4j import GraphDatabase
from yfiles_jupyter_graphs import GraphWidget
     

load_dotenv()

True

In [1256]:
url=os.getenv("NEO4J_URI")
username=os.getenv("NEO4J_USERNAME")
password=os.getenv("NEO4J_PASSWORD")

In [1257]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API")

In [1258]:
llm = ChatOpenAI(model="gpt-3.5-turbo")

In [1259]:
# llm = OllamaLLM(model="llama3.2")
# response = llm.invoke("Who is latest president of united states you know...")
# print(response)

In [1260]:
graph = Neo4jGraph(
    url=url,
    username=username,
    password=password,
)

In [1261]:
driver = GraphDatabase.driver(uri=url, auth=(username, password))

In [1262]:
default_cypher = "MATCH (s)-[r]->(t) RETURN s, r, t LIMIT 50"

In [1263]:
def show_graph(cypher: str = default_cypher):
    driver = GraphDatabase.driver(
        uri = url,
        auth = (username,
                password)
    )
    session = driver.session()
    widget = GraphWidget(graph = session.run(cypher).graph())
    widget.node_label_mapping = 'name'
    display(widget)
    return widget

In [1264]:
def extract_entities(query):
    entities = {}

    # Extract user name (assuming it's provided beforehand)
    entities["user_name"] = "Adrian Putra Pratama Badjideh"  # Default or derived user name

    # Extract specific date (e.g., "October 15, 2024")
    date_match = re.search(r'\b(January|February|March|April|May|June|July|August|September|October|November|December) \d{1,2}, \d{4}\b', query)
    if date_match:
        date_str = date_match.group(0)
        try:
            specific_date = datetime.strptime(date_str, '%B %d, %Y')
            entities['date'] = specific_date.strftime('%Y-%m-%d')
        except ValueError:
            pass

    # Extract month (e.g., "June 2024")
    month_match = re.search(r'\b(January|February|March|April|May|June|July|August|September|October|November|December) \d{4}\b', query)
    if month_match:
        month_str = month_match.group(0)
        try:
            specific_month = datetime.strptime(month_str, '%B %Y')
            entities['month'] = specific_month.strftime('%Y-%m')
        except ValueError:
            pass

    # Extract activity type
    if "steps" in query or "physical activity" in query:
        entities['type'] = "physical_activity"
    elif "heart rate" in query or "blood pressure" in query:
        entities['type'] = "physiological_parameter"
    elif "sleep" in query:
        entities['type'] = "sleep_duration"
    elif "food" in query or "calories" in query:
        entities['type'] = "food"

    return entities



In [1265]:
def retrieve_context(entities):
    try:
        context = []
        user_name = entities.get('user_name')
        if not user_name:
            raise ValueError("User name is required to retrieve context.")

        with driver.session() as session:
            if entities.get('type') == "physical_activity":
                print("Running query for User-Activity relationships...")
                time_condition = ""
                if 'date' in entities:
                    specific_date = datetime.strptime(entities['date'], '%Y-%m-%d')
                    epoch_time = int(specific_date.timestamp())
                    time_condition = f" AND a.timestamp >= {epoch_time} AND a.timestamp < {epoch_time + 86400}"
                elif 'month' in entities:
                    specific_month = datetime.strptime(entities['month'], '%Y-%m')
                    start_epoch = int(specific_month.timestamp())
                    next_month = specific_month.replace(day=28) + timedelta(days=4)  
                    next_month = next_month.replace(day=1)  
                    end_epoch = int(next_month.timestamp())
                    time_condition = f" AND a.timestamp >= {start_epoch} AND a.timestamp < {end_epoch}"

                activity_query = f"""
                MATCH (u:User)-[:HAS_DONE]->(a:PhysicalActivity)
                WHERE u.name = '{user_name}' {time_condition}
                RETURN u, a
                """
                print(f"Generated query: {activity_query}")

                result = session.run(activity_query)

                print("Raw data from User-Activity query:")
                found_data = False
                for record in result:
                    # print(record) 
                    found_data = True
                    timestamp_date = datetime.utcfromtimestamp(record['a']['timestamp']).strftime('%Y-%m-%d')
                    context.append(f"User {record['u']['name']} did {record['a']['daily_steps']} steps on {timestamp_date}.")

                if not found_data:
                    print("No data found for the query.")

            elif entities.get('type') == "physiological_parameter":
                print("Running query for User-PhysiologicalParameter relationships...")
                time_condition = ""
                if 'date' in entities:
                    specific_date = datetime.strptime(entities['date'], '%Y-%m-%d')
                    epoch_time = int(specific_date.timestamp())
                    time_condition = f" AND p.timestamp >= {epoch_time} AND p.timestamp < {epoch_time + 86400}"
                elif 'month' in entities:
                    specific_month = datetime.strptime(entities['month'], '%Y-%m')
                    start_epoch = int(specific_month.timestamp())
                    next_month = specific_month.replace(day=28) + timedelta(days=4)
                    next_month = next_month.replace(day=1)
                    end_epoch = int(next_month.timestamp())
                    time_condition = f" AND p.timestamp >= {start_epoch} AND p.timestamp < {end_epoch}"

                parameter_query = f"""
                MATCH (u:User)-[:HAS_MONITORED]->(p:PhysiologicalParameter)
                WHERE u.name = '{user_name}' {time_condition}
                RETURN u, p
                """
                print(f"Generated query: {parameter_query}")

                result = session.run(parameter_query)

                print("Raw data from User-PhysiologicalParameter query:")
                found_data = False
                for record in result:
                    # print(record)  
                    found_data = True
                    timestamp_date = datetime.utcfromtimestamp(record['p']['timestamp']).strftime('%Y-%m-%d')
                    context.append(f"User {record['u']['name']} had a heart rate of {record['p']['heart_rate']} bpm on {timestamp_date}.")

                if not found_data:
                    print("No data found for the query.")

            elif entities.get('type') == "sleep_duration":
                print("Running query for User-Sleep relationships...")
                time_condition = ""
                if 'date' in entities:
                    specific_date = datetime.strptime(entities['date'], '%Y-%m-%d')
                    epoch_time = int(specific_date.timestamp())
                    time_condition = f" AND s.timestamp >= {epoch_time} AND s.timestamp < {epoch_time + 86400}"
                elif 'month' in entities:
                    specific_month = datetime.strptime(entities['month'], '%Y-%m')
                    start_epoch = int(specific_month.timestamp())
                    next_month = specific_month.replace(day=28) + timedelta(days=4)
                    next_month = next_month.replace(day=1)
                    end_epoch = int(next_month.timestamp())
                    time_condition = f" AND s.timestamp >= {start_epoch} AND s.timestamp < {end_epoch}"

                sleep_query = f"""
                MATCH (u:User)-[:HAS_SLEEP]->(s:SleepDuration)
                WHERE u.name = '{user_name}' {time_condition}
                RETURN u, s
                """
                print(f"Generated query: {sleep_query}")

                result = session.run(sleep_query)

                print("Raw data from User-Sleep query:")
                found_data = False
                for record in result:
                    # print(record)  
                    found_data = True
                    timestamp_date = datetime.utcfromtimestamp(record['s']['timestamp']).strftime('%Y-%m-%d')
                    context.append(f"User {record['u']['name']} slept for {record['s']['hours']} hours and {record['s']['minutes']} minutes on {timestamp_date}.")

                if not found_data:
                    print("No data found for the query.")

            elif entities.get('type') == "food":
                print("Running query for User-Food relationships...")
                time_condition = ""
                if 'date' in entities:
                    specific_date = datetime.strptime(entities['date'], '%Y-%m-%d')
                    epoch_time = int(specific_date.timestamp())
                    time_condition = f" AND f.timestamp >= {epoch_time} AND f.timestamp < {epoch_time + 86400}"
                elif 'month' in entities:
                    specific_month = datetime.strptime(entities['month'], '%Y-%m')
                    start_epoch = int(specific_month.timestamp())
                    next_month = specific_month.replace(day=28) + timedelta(days=4)
                    next_month = next_month.replace(day=1)
                    end_epoch = int(next_month.timestamp())
                    time_condition = f" AND f.timestamp >= {start_epoch} AND f.timestamp < {end_epoch}"

                food_query = f"""
                MATCH (u:User)-[:HAS_CONSUMED]->(f:Food)
                WHERE u.name = '{user_name}' {time_condition}
                RETURN u, f
                """
                print(f"Generated query: {food_query}")

                result = session.run(food_query)

                print("Raw data from User-Food query:")
                found_data = False
                for record in result:
                    # print(record)  
                    found_data = True
                    timestamp_date = datetime.utcfromtimestamp(record['f']['timestamp']).strftime('%Y-%m-%d')
                    context.append(f"User {record['u']['name']} consumed {record['f']['name']} with {record['f']['calories']} calories on {timestamp_date}.")

                if not found_data:
                    print("No data found for the query.")

        return " ".join(context)

    except Exception as e:
        print(f"Error retrieving user context: {e}")
        return "An error occurred while retrieving user context."


In [1266]:
# Initialize an empty list to store conversation history
conversation_history = []

def generate_response(query):
    # Extract entities to determine which relationships to query
    entities = extract_entities(query)

    # Retrieve context from the knowledge graph
    context = retrieve_context(entities)

    if not context or context == "An error occurred while retrieving user context.":
        context = "No specific data was found in the knowledge graph."

    # Few-shot examples to guide the LLM
    few_shot_examples = """
    Example 1:
    Data Available:
    User Adrian consumed 200 calories from Rice on April 1, 2024.
    User Adrian walked 5000 steps on April 1, 2024.
    Question: How many calories did Adrian consume on April 1, 2024?
    Answer: On April 1, 2024, Adrian consumed 200 calories.
    
    Example 2:
    Data Available:
    User Alex did 7000 steps on April 3, 2024.
    User Alex consumed 150 calories from Pasta on April 3, 2024.
    Question: How many steps did Alex take on April 3, 2024?
    Answer: On April 3, 2024, Alex did 7000 steps.
    """

    # Construct the conversation history prompt
    history_prompt = "\n".join([f"User: {q}\nAssistant: {r}" for q, r in conversation_history])

    # Combine few-shot examples with conversation history and the current context and user query
    prompt = f"""
    You are an assistant with access to user data. Use the following examples as a guide.

    {few_shot_examples}

    Conversation History:
    {history_prompt}

    Data Available:
    {context}

    User's Current Question: '{query}'
    """

    # Generate the response using the LLM
    response = llm(prompt)

    # Append the current query and response to conversation history
    conversation_history.append((query, response.content))

    return response.content


In [1267]:
if __name__ == "__main__":
    print("Welcome to the Health Assistant Chatbot!")
    print("Ask me questions about your health data (e.g., calories, steps, sleep).")
    print("Type 'exit' to end the conversation.")

    while True:
        user_input = input("You: ")

        if user_input.lower() == 'exit':
            print("Goodbye!")
            break

        # Generate and print the assistant's response
        response = generate_response(user_input)
        print("Assistant:", response)

Welcome to the Health Assistant Chatbot!
Ask me questions about your health data (e.g., calories, steps, sleep).
Type 'exit' to end the conversation.


In [None]:
if __name__ == "__main__":
    query = "What did I eat on April 13, 2024, and how many calories were consumed?"
    response = generate_response(query)
    
    print("Raw Response:", response)
    print("Response Type:", type(response))

    if hasattr(response, 'content'):
        content = response.content
        print("Query Result Content:", content)
    elif isinstance(response, dict):
        content = response.get('content', None)
        if content:
            print("Query Result Content:", content)
        else:
            print("No content found in response.")
    elif isinstance(response, str):
        print("Query Result Content:", response)
    else:
        print("Response is not a known type for content extraction.")

Running query for User-Food relationships...
Generated query: 
                MATCH (u:User)-[:HAS_CONSUMED]->(f:Food)
                WHERE u.name = 'Adrian Putra Pratama Badjideh'  AND f.timestamp >= 1712941200 AND f.timestamp < 1713027600
                RETURN u, f
                
Raw data from User-Food query:


  timestamp_date = datetime.utcfromtimestamp(record['f']['timestamp']).strftime('%Y-%m-%d')


Raw Response: content='Answer: On April 13, 2024, Adrian Putra Pratama Badjideh consumed the following:\n- Rice with 200.0 calories\n- Fried Tempeh with 226.0 calories\n- Stir-fried Bean Sprouts with 150.0 calories\n- Fried Chicken Wings with 84.0 calories\n- Meatball Soup with 97.0 calories\n- Fried Vegetable Fritters with 46.0 calories\n- Canned Sardines with 120.0 calories\n- Bread with 240.0 calories\n- Chocolate Jam with 80.0 calories\n\nIn total, Adrian consumed 1143.0 calories on April 13, 2024.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 147, 'prompt_tokens': 606, 'total_tokens': 753, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-ea3b4637-160d-4afd-831f-8d3c48

In [None]:
if __name__ == "__main__":
    query = "How many steps i take for my activity on April 13, 2024?"
    response = generate_response(query)
    
    # Print the raw response to understand its type and structure
    print("Raw Response:", response)
    print("Response Type:", type(response))

    # Extract the content from the response
    if hasattr(response, 'content'):
        content = response.content
        print("Query Result Content:", content)
    elif isinstance(response, dict):
        content = response.get('content', None)
        if content:
            print("Query Result Content:", content)
        else:
            print("No content found in response.")
    elif isinstance(response, str):
        # If the response is a string, just print it directly
        print("Query Result Content:", response)
    else:
        print("Response is not a known type for content extraction.")

Running query for User-Activity relationships...
Generated query: 
                MATCH (u:User)-[:HAS_DONE]->(a:PhysicalActivity)
                WHERE u.name = 'Adrian Putra Pratama Badjideh'  AND a.timestamp >= 1712941200 AND a.timestamp < 1713027600
                RETURN u, a
                
Raw data from User-Activity query:


  timestamp_date = datetime.utcfromtimestamp(record['a']['timestamp']).strftime('%Y-%m-%d')


Raw Response: content='Answer: On April 13, 2024, Adrian Putra Pratama Badjideh did 4776 steps for their activity.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 30, 'prompt_tokens': 327, 'total_tokens': 357, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-4e0dd9c1-1175-4297-881b-addf1f141971-0' usage_metadata={'input_tokens': 327, 'output_tokens': 30, 'total_tokens': 357, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}
Response Type: <class 'langchain_core.messages.ai.AIMessage'>
Query Result Content: Answer: On April 13, 2024, Adrian Putra Pratama Badjideh did 4776 steps for their activity.
