# Import

## Libraries

In [None]:
# Warning control
import warnings
import os
import yaml
from pathlib import Path
from dotenv import load_dotenv
from pydantic import BaseModel, Field, PositiveInt
from typing import List, Optional
from IPython.display import display, Markdown

from crewai import Agent, Task, Crew
from crewai_tools import FileReadTool, DirectoryReadTool

load_dotenv()  # take environment variables from .env
warnings.filterwarnings("ignore")

# os.environ["OPENAI_MODEL_NAME"] = "gpt-4o"

## Conversation Transcript

Output from `pdf_to_md.ipynb`:

In [None]:
transcript_content = Path("data/conversation_01.txt").read_text()
print(transcript_content[:500])

## Tasks & Agents YAML files

In [None]:
# Define file paths for YAML configurations
files = {
    "agents": "src/recsys/config/agents.yaml",
    "tasks": "src/recsys/config/tasks.yaml",
}

# Load configurations from YAML files
configs = {}
for config_type, file_path in files.items():
    with open(file_path, "r") as file:
        configs[config_type] = yaml.safe_load(file)

# Assign loaded configurations to specific variables
agents_config = configs["agents"]
tasks_config = configs["tasks"]

In [None]:
print(
    "agents_config contains:",
    ", ".join(agents_config.keys()),
    "\ntasks_config contains:",
    ", ".join(tasks_config.keys()),
)

## Tools (Agent Usage)

Got `FileReadTool()` to work, thanks to this [article](https://lablab.ai/t/crewai-multi-agent-system)!

In [None]:
directory_read_tool = DirectoryReadTool(directory="./data")
file_read_tool = FileReadTool()


# read_conversation = FileReadTool(file_path="./data/conversation_01.txt")
# read_income = FileReadTool(file_path="./data/income_travel_insurance_2024.txt")
# read_ge = FileReadTool(file_path="data/ge_travel_insurance_2024.txt")

Certain policies (even in Markdown) will give this error:  `Error: 'charmap' codec can't decode byte 0x81 in position 1886: character maps to <undefined>`  
This occurs because the AI agent attempting to read the file is using a character encoding incompatible with the text file's encoding. Specifically:

0x81 is a byte value that may not be defined in the default character map (often cp1252 or similar) the AI agent is using.
The error suggests the text file might be encoded in a different character set (e.g., UTF-8, ISO-8859-1, or Shift-JIS), which contains characters outside the expected range for the chosen codec.

**My solution: Fix the source code to use utf-8.**  
To specify the encoding when reading the file in the FileReadTool class, the _run method was modified to include the encoding argument in the open() function call.   
See this [link](https://chatgpt.com/share/673eeca7-0654-8010-aa31-31a1c5996d27) for details.

# Pydantic Classes

In [None]:
class CustomerInfo(BaseModel):
    age: PositiveInt = Field(..., description="Age of the customer")
    medical_conditions: Optional[List[str]] = Field(
        default_factory=list, description="List of pre-existing medical conditions"
    )
    destination: str = Field(..., description="Travel destination")
    coverage_factors: List[str] = Field(
        default_factory=list, description="Factors affecting coverage"
    )
    policy_customization_factors: List[str] = Field(
        default_factory=list, description="Factors for policy customization"
    )
    coverage_priority: List[str] = Field(
        default_factory=list,
        description="Customer's priorities for coverage, e.g., destination coverage, PA benefits",
    )


class CustomerInfoList(BaseModel):
    customers: List[CustomerInfo] = Field(default_factory=list)

# Create Crew, Agents and Tasks

[Agent attributes](https://docs.crewai.com/concepts/agents) for reference.  
Tools can be assigned when specifying Agent:

In [None]:
conversation_transcript_agent = Agent(
    config=agents_config["conversation_transcript_agent"],
    tools=[directory_read_tool, file_read_tool],
    verbose=True,
)

underwriting_report_agent = Agent(
    config=agents_config["underwriting_report_agent"],
    tools=[directory_read_tool, file_read_tool],
    verbose=True,
)

[Task attributes](https://docs.crewai.com/concepts/tasks) for reference.

In [None]:
process_transcript_task = Task(
    config=tasks_config["process_transcript_task"],
    agent=conversation_transcript_agent,
    output_pydantic=CustomerInfoList,
)

generate_underwriting_report_task = Task(
    config=tasks_config["generate_underwriting_report_task"],
    context=[process_transcript_task],
    agent=underwriting_report_agent,
    async_execution=True,
)

[Crew attributes](https://docs.crewai.com/concepts/crews) for reference.  

The `process` parameter controls how tasks are executed by the crew. The options include:

* Sequential (Default): Tasks are performed one after the other in a specific order.
* Hierarchical: One agent acts as a manager, delegating tasks to other agents while maintaining an overarching memory of the tasks.
* Parallel: Tasks are executed concurrently, allowing multiple tasks to run at the same time.


In [None]:
recsys_crew = Crew(
    agents=[conversation_transcript_agent, underwriting_report_agent],
    tasks=[
        process_transcript_task,
        generate_underwriting_report_task,
    ],
    verbose=True,
    process="sequential",
)

In [None]:
result = recsys_crew.kickoff()
display(Markdown(result.raw))

# Notes

(21 Nov 2024)  
- ~~More explicit referencing of insurance policy. Defined in agent or task?~~
- ~~Tasked to read Income but can't open. Reads GE instead~~
- Latest GPT-4o (gpt-4o-2024-11-20) unable to use tools.
- Table comparison format acceptable?

In [None]:
print(recsys_crew.usage_metrics)