In [1]:
# Warning control
import warnings
warnings.filterwarnings('ignore')

In [2]:
from crewai import Agent, Crew, Task

In [3]:
from dotenv import load_dotenv
import os

load_dotenv()  # Load environment variables from .env

openai_api_key = os.getenv("OPENAI_API_KEY")
if openai_api_key is None:
    raise ValueError("OPENAI_API_KEY is not set. Please add it to your .env file.")

os.environ["OPENAI_API_KEY"] = openai_api_key  # Ensuring CrewAI uses the correct key
os.environ["OPENAI_MODEL_NAME"] = 'gpt-4o'

In [4]:
# Step 1: Requirement Extraction Agent - Processes Call Transcripts
transcript_analyst = Agent(
    role="Call Transcript Analyst",
    goal="Extract and structure travel insurance customer requirements from call transcripts into a validated JSON object.",
    backstory=(
        "A seasoned customer service analyst specializing in extracting travel insurance requirements from call transcripts. "
        "This agent listens to conversations between customers and service staff, identifies key requirements, and formats "
        "the insights into a structured JSON output that conforms to the TravelInsuranceRequirement model for accurate policy matching."
    ),
    allow_delegation=False,
    verbose=True
)

In [5]:
from pydantic import BaseModel, Field
from typing import List, Optional
from datetime import date
class TravelInsuranceRequirement(BaseModel):
    requirement_id: str = Field(..., description="Unique identifier for tracking the insurance requirement.")
    requirement_summary: str = Field(..., description="Concise summary of the customer's insurance needs.")
    detailed_description: str = Field(..., description="Detailed narrative extracted from the transcript.")
    
    travel_destination: Optional[str] = Field(None, description="Country or region the customer is traveling to.")
    travel_duration: Optional[str] = Field(None, description="Duration of the trip (e.g., '7 days', '1 month').")
    travel_start_date: Optional[date] = Field(None, description="Start date of the travel.")
    travel_end_date: Optional[date] = Field(None, description="End date of the travel.")
    
    insurance_coverage_type: Optional[List[str]] = Field(
        None, description="Types of insurance coverage requested (e.g., ['Medical', 'Trip Cancellation'])."
    )
    pre_existing_conditions: Optional[List[str]] = Field(
        None, description="Any pre-existing conditions mentioned that might affect coverage."
    )
    age_group: Optional[str] = Field(None, description="Age bracket of the travelers (e.g., '26-40').")
    travelers_count: Optional[int] = Field(None, ge=1, description="Number of travelers to be insured (must be at least 1).")
    
    budget_range: Optional[str] = Field(None, description="Budget constraints (e.g., '$100-$200').")
    preferred_insurance_provider: Optional[str] = Field(None, description="Preferred insurance provider, if any.")
    
    additional_requests: Optional[str] = Field(
        None, description="Any special requests or concerns noted by the customer."
    )
    keywords: Optional[List[str]] = Field(
        None, description="Important keywords or terms extracted from the transcript for further analysis."
    )

In [6]:
# Define the agent task
transcript_analyst_task = Task(
    description="""Analyze the travel insurance call transcript below and extract key customer requirements.
Step 1: Read the transcript carefully and extract all relevant details. For each field in the schema, provide a brief annotation or reference to the specific portion(s) of the transcript where the detail was found.
Step 2: Review your annotations to verify that every extracted detail directly matches the transcript. Resolve any discrepancies or conflicts in the data.
Step 3: Produce a final, validated JSON object that adheres exactly to the TravelInsuranceRequirement schema with the following fields:
- requirement_id (str): A unique identifier.
- requirement_summary (str): A concise summary of the customer's insurance needs.
- detailed_description (str): A detailed narrative extracted from the transcript.
- travel_destination (Optional[str]): The destination (country or region) mentioned.
- travel_duration (Optional[str]): Duration of the trip (e.g., "7 days", "1 month").
- travel_start_date (Optional[date]): The travel start date.
- travel_end_date (Optional[date]): The travel end date.
- insurance_coverage_type (Optional[List[str]]): The types of insurance coverage requested (e.g., ["Medical", "Trip Cancellation"]).
- pre_existing_conditions (Optional[List[str]]): Any pre-existing conditions mentioned.
- age_group (Optional[str]): Age bracket of the travelers (e.g., "26-40").
- travelers_count (Optional[int]): Number of travelers.
- budget_range (Optional[str]): Budget constraints (e.g., "$100-$200").
- preferred_insurance_provider (Optional[str]): Preferred insurance provider, if any.
- additional_requests (Optional[str]): Any special requests or concerns.
- keywords (Optional[List[str]]): Important keywords or terms for further analysis.

If a field is not mentioned in the transcript, use null.

Transcript:
{parsed_transcripts}
""",
    expected_output="A JSON object that matches the TravelInsuranceRequirement model.",
    agent=transcript_analyst,
    output_json=TravelInsuranceRequirement,
    output_file="insurance_requirement.json"
)

In [7]:
# Define the crew with agents and tasks
insurance_recommendation_crew = Crew(
    agents=[transcript_analyst],
    tasks=[transcript_analyst_task],
    verbose=True
)

In [9]:
import json
import uuid
from datetime import datetime

current_dir = os.getcwd()
project_root = os.path.dirname(os.path.dirname(os.path.dirname(current_dir)))

# Update the path to point to the JSON transcript
sample_transcript = os.path.join(project_root, "data", "processed_transcript", "parsed_transcript_05.json")

# Read and parse the JSON transcript
with open(sample_transcript, 'r') as f:
    transcript_data = json.load(f)

# Convert the transcript data to a string format for the agent
formatted_transcript = "\n".join([f"{msg['speaker']}: {msg['dialogue']}" for msg in transcript_data])

input_transcript = {
    'parsed_transcripts': formatted_transcript
}

In [10]:
result = insurance_recommendation_crew.kickoff(inputs=input_transcript)

[1m[95m [DEBUG]: == Working Agent: Call Transcript Analyst[00m
[1m[95m [INFO]: == Starting Task: Analyze the travel insurance call transcript below and extract key customer requirements.
Step 1: Read the transcript carefully and extract all relevant details. For each field in the schema, provide a brief annotation or reference to the specific portion(s) of the transcript where the detail was found.
Step 2: Review your annotations to verify that every extracted detail directly matches the transcript. Resolve any discrepancies or conflicts in the data.
Step 3: Produce a final, validated JSON object that adheres exactly to the TravelInsuranceRequirement schema with the following fields:
- requirement_id (str): A unique identifier.
- requirement_summary (str): A concise summary of the customer's insurance needs.
- detailed_description (str): A detailed narrative extracted from the transcript.
- travel_destination (Optional[str]): The destination (country or region) mentioned.
- travel

In [11]:
from pprint import pformat
from IPython.display import Markdown, display

pretty_dict = pformat(result, sort_dicts=False)
display(Markdown(f"```\n{pretty_dict}\n```"))

```
('{\n'
 '  "requirement_id": "12345",\n'
 '  "requirement_summary": "Travel insurance for a couple travelling to '
 'Thailand for 10 days, requiring medical and lost luggage coverage.",\n'
 '  "detailed_description": "The customer, a 45-year-old male, and his '
 '42-year-old wife, both Singaporean citizens, are planning a trip to Thailand '
 'from the 15th of November to the 25th of November. They require travel '
 'insurance that includes medical emergency coverage and lost luggage '
 'coverage, with a focus on getting the best value for money.",\n'
 '  "travel_destination": "Thailand",\n'
 '  "travel_duration": "10 days",\n'
 '  "travel_start_date": "2023-11-15",\n'
 '  "travel_end_date": "2023-11-25",\n'
 '  "insurance_coverage_type": [\n'
 '    "Medical",\n'
 '    "Lost Luggage"\n'
 '  ],\n'
 '  "pre_existing_conditions": null,\n'
 '  "age_group": "41-50",\n'
 '  "travelers_count": 2,\n'
 '  "budget_range": null,\n'
 '  "preferred_insurance_provider": null,\n'
 '  "additional_requests": "Looking for the best possible price and '
 'value-for-money option.",\n'
 '  "keywords": [\n'
 '    "Thailand",\n'
 '    "Medical",\n'
 '    "Lost Luggage",\n'
 '    "Value-for-money"\n'
 '  ]\n'
 '}')
```