In [1]:
import os
from openai import OpenAI
from pydantic import BaseModel
import json

In [2]:
client = OpenAI(
    api_key = os.getenv('OPEN_AI_API')
)

In [3]:
class breakfast(BaseModel):
    name: str
    address: str

class morning_attraction(BaseModel):
    name: str
    address: str

class lunch(BaseModel):
    name: str
    address: str

class afternoon_attraction(BaseModel):
    name: str
    address: str

class dinner(BaseModel):
    name: str
    address: str

class night_attraction(BaseModel):
    name: str
    address: str

class accommodation(BaseModel):
    name: str
    address: str

class OneDay(BaseModel):
    days: str
    breakfast: breakfast
    morning_attractions: list[morning_attraction]
    lunch: lunch
    afternoon_attractions: list[afternoon_attraction]
    dinner: dinner
    night_attractions: list[night_attraction]
    accommodation: accommodation

class WholePlan(BaseModel):
    itinerary: list[OneDay]

In [4]:
with open('Utils/Plan_extraction_task1.txt', 'r') as file:
    system_prompt = file.read()

In [5]:
system_prompt

'Extract the travel itinerary and parse the businesses\' information into the JSON format as below. Be faithful and concise. Correctly document the right number of the attractions. Only write down the name and address of the businesses. If certain recommendations (like meals or accommodations) are not provided, replace the information with "-" for name and address. If recommendations for a session of attraction is not provided, replace the information as an empty array. \n\n----- Example Starts -----\n{\n    "itinerary":[\n        {   \n            "days": "x",\n            "breakfast": {\n                "name": "xxx",\n                "address": "xxx"\n            },\n            "morning_attractions": [\n                {\n                    "name": "xxx",\n                    "address": "xxx"\n                },\n                {\n                    "name": "xxx",\n                    "address": "xxx"\n                }\n            ],\n            "lunch": {\n                "n

In [None]:
def extract_plan(user_prompt, system_prompt):
    chat_completion = client.beta.chat.completions.parse(
        model="gpt-4o-mini",
        temperature=0,
        messages=[
            {"role": "system","content": system_prompt,},
            {"role": "user","content": user_prompt,}
        ],
        response_format=WholePlan
    )

    return json.loads(chat_completion.choices[0].message.parsed.json())

In [None]:
for filename in os.listdir('Outputs/plans'):
    index = filename[:-4][5:]
    with open(f'Outputs/plans/{filename}', 'r') as file:
        user_prompt = file.read()
        plan_eval = extract_plan(system_prompt, user_prompt)
        with open(f'Outputs/evals/Plan_Eval_{index}.json', 'w') as f:
            json.dump(plan_eval, f, indent=4)

In [37]:
#post process of the evaluation of each plan

#carry on the hotel if not mentioned specifically
for filename in os.listdir('Outputs/evals'):
    with open(f'Outputs/evals/{filename}', 'r') as file:
        plan_eval = json.load(file)
        
        hotel_list = []
        hotel_carriedOn = []
        current_hotel = {"name":"-", "address":"-"}

        for day in plan_eval['itinerary']:
            hotel_list.append(day['accommodation'])
            
        for item in hotel_list:
            if item != {"name":"-", "address":"-"}:
                current_hotel = item
            hotel_carriedOn.append(current_hotel)
        
        for i in range(len(plan_eval['itinerary'])):
            plan_eval['itinerary'][i]['accommodation'] = hotel_carriedOn[i]
    
    with open(f'Outputs/evals/{filename}', 'w') as f:
        json.dump(plan_eval, f, indent=4)

Test extraction from a plan missing info

In [35]:
with open('Outputs/Task1/Task1_1_missingInfo.txt', 'r') as file:
    user_prompt = file.read()

with open('Prompts/Task1_extract/Task1_extraction_prompt.txt', 'r') as file:
    system_prompt = file.read()

chat_completion = client.beta.chat.completions.parse(
    model="gpt-4o-mini",
    temperature=0,
    messages=[
        {"role": "system","content": system_prompt,},
        {"role": "user","content": user_prompt,}
    ],
    response_format=WholePlan
)

result = json.loads(chat_completion.choices[0].message.parsed.json())
result

{'itinerary': [{'days': '1',
   'breakfast': {'name': '-', 'address': '-'},
   'morning_attractions': [{'name': 'Independence Hall',
     'address': '520 Chestnut St, Philadelphia, PA 19106'}],
   'lunch': {'name': "Campo's Philly Cheesesteaks",
    'address': '214 Market St, Philadelphia, PA 19106'},
   'afternoon_attractions': [],
   'dinner': {'name': 'Amada',
    'address': '217 Chestnut St, Philadelphia, PA 19106'},
   'night_attractions': [{'name': 'Spruce Street Harbor Park',
     'address': '121 N Columbus Blvd, Philadelphia, PA 19106'}],
   'accommodation': {'name': '-', 'address': '-'}},
  {'days': '2',
   'breakfast': {'name': 'Green Eggs Cafe',
    'address': '1306 Dickinson St, Philadelphia, PA 19146'},
   'morning_attractions': [{'name': 'Philadelphia Museum of Art',
     'address': '2600 Benjamin Franklin Pkwy, Philadelphia, PA 19130'}],
   'lunch': {'name': 'Barbuzzo',
    'address': '110 S 13th St, Philadelphia, PA 19107'},
   'afternoon_attractions': [{'name': "Philad

In [36]:
with open('Outputs/Task1_json/Task1_json_1_missingInfo.json', 'w') as file:
    json.dump(result, file, indent=4)

Test from a plan from out of pool and missing info

In [37]:
with open('Outputs/Task1/Task1_1_outOfPool.txt', 'r') as file:
    user_prompt = file.read()

with open('Prompts/Task1_extract/Task1_extraction_prompt.txt', 'r') as file:
    system_prompt = file.read()

chat_completion = client.beta.chat.completions.parse(
    model="gpt-4o-mini",
    temperature=0,
    messages=[
        {"role": "system","content": system_prompt,},
        {"role": "user","content": user_prompt,}
    ],
    response_format=WholePlan
)

result = json.loads(chat_completion.choices[0].message.parsed.json())
result

{'itinerary': [{'days': '1',
   'breakfast': {'name': '-', 'address': '-'},
   'morning_attractions': [{'name': 'Independence Hall',
     'address': '520 Chestnut St, Philadelphia, PA 19106'}],
   'lunch': {'name': 'Some Restaurant',
    'address': '123 Road, Philadelphia, PA 19106'},
   'afternoon_attractions': [],
   'dinner': {'name': 'Amada',
    'address': '217 Chestnut St, Philadelphia, PA 19106'},
   'night_attractions': [{'name': 'Spruce Street Harbor Park',
     'address': '121 N Columbus Blvd, Philadelphia, PA 19106'}],
   'accommodation': {'name': '-', 'address': '-'}},
  {'days': '2',
   'breakfast': {'name': 'Green Eggs Cafe',
    'address': '1306 Dickinson St, Philadelphia, PA 19146'},
   'morning_attractions': [{'name': 'Philadelphia Museum of Art',
     'address': '2600 Benjamin Franklin Pkwy, Philadelphia, PA 19130'}],
   'lunch': {'name': 'Barbuzzo',
    'address': '110 S 13th St, Philadelphia, PA 19107'},
   'afternoon_attractions': [{'name': "Ethan's shop",
     'ad

In [38]:
with open('Outputs/Task1_json/Task1_json_1_outOfPool.json', 'w') as file:
    json.dump(result, file, indent=4)