In [2]:
import nest_asyncio

nest_asyncio.apply()

from llama_index.llms.openai import OpenAI
from llama_index.agent.openai import OpenAIAgent
from llama_index.core import Document, SimpleDirectoryReader, VectorStoreIndex
from llama_index.core.tools import QueryEngineTool, ToolMetadata, FunctionTool
from llama_index.core.node_parser import SentenceSplitter
from llama_index.readers.file import UnstructuredReader
from pathlib import Path
from llama_index.readers.json import JSONReader
from llama_index.core.node_parser import JSONNodeParser


import pandas as pd

# LLM

In [3]:
llm = OpenAI(model="gpt-3.5-turbo")

# Data For UserProfile

## Content

In [4]:
data_content_filepath = "./data/data_content/df_content_description_programid.csv"
data_content = pd.read_csv(data_content_filepath)
data_content_documents = [
    Document(
        text=row['description'],
        metadata={
            'ID': row['id'],
            'Type': row['tipo'],
            'ProgramID': row['program_id'],
            'Author': row['autor'],
            'Title': row['title'],
            'Section': row['seccion'],
            'Category': row['categoria'],
            'URL': row['video_url'],
        }
    )
    for _, row in data_content.iterrows()
]

## Membership

In [46]:
reader = JSONReader()
data_membership_documents = reader.load_data(input_file="./data/data_membership/dict_membership.json")

## Quiz

In [45]:
reader = JSONReader()
data_quiz_documents = reader.load_data(input_file="./data/data_quiz/quiz_dict.json")

## Program

In [40]:
reader = SimpleDirectoryReader(input_dir="./data/data_articles/")
data_program_documents = reader.load_data()
splitter = SentenceSplitter(chunk_size=256, chunk_overlap=20)
data_program_nodes = splitter.get_nodes_from_documents(data_program_documents)

## Website

In [41]:
# get all names of html file in forlder data/data_website in a list
data_website_dir = Path("./data/data_website/")
html_page_names = []
for file in data_website_dir.iterdir():
    if file.suffix == ".html":
        html_page_names.append(file.stem)
        

loader = UnstructuredReader()
html_page_doc_set = {}
html_page_all_docs = []
for html_page in html_page_names:
    html_page_docs = loader.load_data(file=Path(f"./data/data_website/{html_page}.html"), split_documents=False)

    # insert year metadata into each year
    for d in html_page_docs:
        d.metadata = {"html_page": html_page}
    html_page_doc_set[html_page] = html_page_docs
    html_page_all_docs.extend(html_page_docs)

splitter = SentenceSplitter(chunk_size=256, chunk_overlap=20)
data_website_nodes = splitter.get_nodes_from_documents(html_page_all_docs)

In [None]:
len(data_website_nodes)

29

# Indexing

## Content

In [42]:
data_content_index = VectorStoreIndex.from_documents(data_content_documents)
data_content_query_engine = data_content_index.as_query_engine()
data_content_query_retiever = data_content_index.as_retriever(similarity_top_k=5)

## Membership

In [43]:
data_membership_index = VectorStoreIndex.from_documents(data_membership_documents)
data_membership_query_engine = data_membership_index.as_query_engine()
data_membership_query_retiever = data_membership_index.as_retriever(similarity_top_k=5)

## Quiz

In [47]:
data_quiz_index = VectorStoreIndex.from_documents(data_quiz_documents)
data_quiz_query_engine = data_quiz_index.as_query_engine()
data_quiz_query_retiever = data_quiz_index.as_retriever(similarity_top_k=5)

## Program

In [48]:
data_program_index = VectorStoreIndex(data_program_nodes)
data_program_query_engine = data_program_index.as_query_engine()
data_program_query_retriever = data_program_index.as_retriever(similarity_top_k=5)

## Website

In [49]:
data_website_index = VectorStoreIndex.from_documents(html_page_all_docs)
data_website_query_engine = data_website_index.as_query_engine()
data_website_query_retriever = data_website_index.as_retriever(similarity_top_k=5)

# Tools

In [50]:
data_content_query_engine_tool = QueryEngineTool(
    query_engine=data_content_query_engine,
    metadata=ToolMetadata(
        name="data_content_query_engine",
        description="A RAG engine with information about video for mental health made by specialists."
    )     
)

data_membership_query_engine_tool = QueryEngineTool(
    query_engine=data_membership_query_engine,
    metadata=ToolMetadata(
        name="data_membership_query_engine",
        description="A RAG engine with information about membership for mental health made by specialists."
    )     
)

data_quiz_query_engine_tool = QueryEngineTool(
    query_engine=data_quiz_query_engine,
    metadata=ToolMetadata(
        name="data_quiz_query_engine",
        description="A RAG engine with information about quiz for mental health made by specialists."
    )     
)

data_program_query_engine_tool = QueryEngineTool(
    query_engine=data_program_query_engine,
    metadata=ToolMetadata(
        name="data_program_query_engine",
        description="A RAG engine with information about programs for mental health made by specialists."
    )     
)

data_website_query_engine_tool = QueryEngineTool(
    query_engine=data_website_query_engine,
    metadata=ToolMetadata(
        name="data_website_query_engine",
        description="A RAG engine with information about LetsCope.org website that offer services to user for helping them to cope with mental health issues."
    )     
)

In [51]:
tools = [data_content_query_engine_tool, data_membership_query_engine_tool, data_quiz_query_engine_tool, data_program_query_engine_tool, data_website_query_engine_tool]

In [59]:
openai_agent = OpenAIAgent.from_tools(
    tools, 
    llm=llm, 
    verbose=True,
    system_prompt="You are helpful assistant that of the website LetsCope.org. You are asked to provide a user profile from a given user information dictionary and all tools in your possession. You are also asked to provide a response to a user query from a given user query and all tools in your possession."
)

In [60]:
# read user information dictionary from json file
import json
with open("./data/data_user/r3h5KzAFj1N0LDqW8aJciUuDRBH2/dict_user.json", "r") as f:
    user_info = json.load(f)

In [61]:
user_info

{'user_uid': 'r3h5KzAFj1N0LDqW8aJciUuDRBH2',
 'quiz_submitted': [{'dev_cope_customer_uid': 'r3h5KzAFj1N0LDqW8aJciUuDRBH2',
   'dev_cope_quiz_submit_quiz_id': 1,
   'dev_cope_quiz_submit_data': '[{"value": "1", "question_id": 1}, {"value": "0", "question_id": 2}, {"value": "2", "question_id": 3}, {"value": "2", "question_id": 4}, {"value": "2", "question_id": 5}, {"value": "0", "question_id": 6}, {"value": "2", "question_id": 7}, {"value": "2", "question_id": 8}, {"value": "1", "question_id": 9}]',
   'dev_cope_quiz_submit_score': 12.0,
   'dev_cope_quiz_submit_date': '2024-07-20 22:03:13'}],
 'content_watched': [{'dev_cope_customer_uid': 'r3h5KzAFj1N0LDqW8aJciUuDRBH2',
   'dev_cope_content_id': 339.0},
  {'dev_cope_customer_uid': 'r3h5KzAFj1N0LDqW8aJciUuDRBH2',
   'dev_cope_content_id': 129.0},
  {'dev_cope_customer_uid': 'r3h5KzAFj1N0LDqW8aJciUuDRBH2',
   'dev_cope_content_id': 299.0}],
 'membership': [{'dev_cope_customer_uid': 'r3h5KzAFj1N0LDqW8aJciUuDRBH2',
   'dev_cope_membership_i

In [62]:
response = openai_agent.chat(str(user_info))

Added user message to memory: {'user_uid': 'r3h5KzAFj1N0LDqW8aJciUuDRBH2', 'quiz_submitted': [{'dev_cope_customer_uid': 'r3h5KzAFj1N0LDqW8aJciUuDRBH2', 'dev_cope_quiz_submit_quiz_id': 1, 'dev_cope_quiz_submit_data': '[{"value": "1", "question_id": 1}, {"value": "0", "question_id": 2}, {"value": "2", "question_id": 3}, {"value": "2", "question_id": 4}, {"value": "2", "question_id": 5}, {"value": "0", "question_id": 6}, {"value": "2", "question_id": 7}, {"value": "2", "question_id": 8}, {"value": "1", "question_id": 9}]', 'dev_cope_quiz_submit_score': 12.0, 'dev_cope_quiz_submit_date': '2024-07-20 22:03:13'}], 'content_watched': [{'dev_cope_customer_uid': 'r3h5KzAFj1N0LDqW8aJciUuDRBH2', 'dev_cope_content_id': 339.0}, {'dev_cope_customer_uid': 'r3h5KzAFj1N0LDqW8aJciUuDRBH2', 'dev_cope_content_id': 129.0}, {'dev_cope_customer_uid': 'r3h5KzAFj1N0LDqW8aJciUuDRBH2', 'dev_cope_content_id': 299.0}], 'membership': [{'dev_cope_customer_uid': 'r3h5KzAFj1N0LDqW8aJciUuDRBH2', 'dev_cope_membership_id

In [64]:
print(response.response)

### User Profile:
- **User ID:** r3h5KzAFj1N0LDqW8aJciUuDRBH2
- **Quiz Submitted:**
  - **Quiz ID:** 1
  - **Score:** 12.0
  - **Date Submitted:** 2024-07-20 22:03:13
  - **Quiz Data:**
    - Question 1: 1
    - Question 2: 0
    - Question 3: 2
    - Question 4: 2
    - Question 5: 2
    - Question 6: 0
    - Question 7: 2
    - Question 8: 2
    - Question 9: 1
- **Content Watched:**
  - Content ID 339.0
  - Content ID 129.0
  - Content ID 299.0
- **Membership:**
  - **Membership ID:** 1
  - **Subscription Name:** Main subscription
  - **Subscription Cost:** $9
  - **Subscription Period:** Monthly
  - **Free Trial Days:** 14
  - **Subscription Start Date:** 2024-07-20 21:24:36
  - **Next Payment Date:** 2024-08-10 09:27:12
  - **Subscription Description:** 14 days free trial, and then $9 a month
  - **Stripe URL:** [Link](https://buy.stripe.com/5kAdSg3Sh2yqcAU144)
  - **Stripe Button ID:** buy_btn_1NnOUoHR2YjQ1Zme78d8pZwC

### How can I assist you today?
