In [1]:
# description = "You are a helpful hotel assistant, your job is to help users in whatever queries they may have."

# intent_list = {
#     "book_room": "The user wants to book a room in the hotel",
#     "cancel_booking": "The user wants to cancel an existing booking",
#     "general_enquiries": "The user wants to ask general questions about the hotel",
#     "chit_chat": "Queries outside of the other intents specified. Apart from greetings and hellos, the response for this one should be 'Sorry, I can only help you with hotel queries.'"
# }

# slots_to_fill = {
#     "dateFrom", ('book_room'),
#     "dateTo", ('book_to'),
#     "bookingID", ("cancel_booking")
# }

# action_slot_pair = {
#     "makeBooking": ("dateFrom", "dateTo"),
#     "lookUpBooking": ("bookingID"),
#     "cancellation": ("bookingID")
# }

# description = "You are a helpful restaurant assistant. Your job is to help users in whatever queries they may have."

# # Intents the restaurant chatbot can handle
# intent_list = {
#     "book_table": "The user wants to book a table at the restaurant",
#     "cancel_reservation": "The user wants to cancel an existing reservation",
#     # "menu_enquiry": "The user wants information about the menu or specials",
#     # "opening_hours": "The user wants to know the restaurant's opening hours",
#     "order_takeout": "The user wants to place a takeout order",
#     "chit_chat": "Queries outside of the other intents specified. Apart from greetings and hellos, the response for this one should be 'Sorry, I can only help you with restaurant queries.'"
# }

# # Slots to be filled by the chatbot for specific intents
# slots_to_fill = {
#     "date": ("book_table"),
#     "time": ("book_table"),
#     "party_size": ("book_table"),
#     "reservationID": ("cancel_reservation"),
#     "order_items": ("order_takeout"),
#     "pickup_time": ("order_takeout")
# }

# # Mapping actions to required slots
# action_slot_pair = {
#     "makeReservation": ("date", "time", "party_size"),
#     "lookUpReservation": ("reservationID"),
#     "cancelReservation": ("reservationID"),
#     "placeTakeoutOrder": ("order_items", "pickup_time")
# }


description = "You are a helpful airline assistant. Your job is to help users with flight bookings, changes, check-in, baggage, and travel support."[3]

# Intents the airline chatbot can handle
intent_list = {
    "book_flight": "The user wants to search and book a flight",
    "cancel_booking": "The user wants to cancel an existing flight booking",
    # "flight_status": "The user wants real-time status for a flight",
    # "manage_booking": "The user wants to change seats, add bags, or modify itinerary",
    "check_in": "The user wants to check in for an upcoming flight",
    "chit_chat": "Queries outside of the other intents specified. Apart from greetings and hellos, the response for this one should be 'Sorry, I can only help with airline queries.'"
}

slots_to_fill = {
    "origin": ("book_flight"),
    "destination": ("book_flight"),
    "depart_date": ("book_flight"),
    "return_date": ("book_flight"),
    "passenger_count": ("book_flight"),
    "booking_reference": ("cancel_booking", "check_in"),
    "last_name": ("cancel_booking", "check_in"),
    "seat_pref": ("check_in")
}

# Mapping actions to required slots
action_slot_pair = {
    "searchAndBookFlight": ("origin", "destination", "depart_date", "return_date", "passenger_count"),
    "lookUpBooking": ("booking_reference", "last_name"),
    "cancelBooking": ("booking_reference", "last_name"),
    "startCheckIn": ("booking_reference", "last_name", "seat_pref")
}

In [4]:
str(action_slot_pair)

"{'searchAndBookFlight': ('origin', 'destination', 'depart_date', 'return_date', 'passenger_count'), 'lookUpBooking': ('booking_reference', 'last_name'), 'cancelBooking': ('booking_reference', 'last_name'), 'startCheckIn': ('booking_reference', 'last_name', 'seat_pref')}"

In [5]:
from functools import wraps
from langchain.prompts import PromptTemplate
from langchain.schema import LLMResult
from langchain_aws import ChatBedrock
from numpy.random import choice
from tqdm import tqdm
import time

def retry(max_retries=3, retry_delay=5):
    """Decorator to retry a function or staticmethod if it raises an exception.

    :param max_retries: The maximum number of attempts to retry.
    :param retry_delay: The delay in seconds between retries.
    :return: A decorator that wraps the function or classmethod.
    """

    def decorator(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            retries = 0
            while retries < max_retries:
                try:
                    return func(*args, **kwargs)
                except Exception as e:
                    retries += 1
                    print(f"Retrying {func.__name__} due to exception: {e}")
                    time.sleep(retry_delay)
            else:
                raise Exception(f"Maximum retries ({max_retries}) exceeded for {func.__name__}")

        return wrapper

    return decorator


import boto3
from botocore.config import Config


@retry(max_retries=3, retry_delay=5)
def get_claude_response(
    prompt_string,
    prompt_params,
    model_id="anthropic.claude-3-haiku-20240307-v1:0",
    model_kwargs={
        "max_tokens": 500000,
        "temperature": 1.0,
        # "top_k": 1,
        "top_p": 1,
        "stop_sequences": ['User:', '</assistant>'],
    },
):
    # Configure timeout settings
    config = Config(
        read_timeout=300,  # Read timeout in seconds (5 minutes)
        retries={
            'max_attempts': 3,
            'mode': 'standard'
        }
    )
    
    # Create a custom bedrock client with the config
    bedrock_client = boto3.client(
        service_name="bedrock-runtime",
        region_name="us-east-1",
        config=config
    )
    
    model = ChatBedrock(
        region_name="us-east-1",
        model_id=model_id,
        model_kwargs=model_kwargs,
        client=bedrock_client  # Pass the custom client
    )
    
    prompt = PromptTemplate.from_template(prompt_string)
    chain = prompt | model  # | SimpleJsonOutputParser() # LCEL
    return chain.invoke(prompt_params)

In [6]:
from prompts import GEN_PROMPT, FINETUNE_PROMPT
import secrets
import random

generated_samples = 0
num_samples_per_intent = 1

def mix_it_up():
    mix_it_up = random.random() >= 0.75
    mix_up = (
        "Be creative with your user generation. Come up with something wild and wacky!! The conversation shouldn't be normal. The agent should still reply like a normal agent however. Just have the user utterances be weird"
        if mix_it_up is True
        else ''
    )
    return mix_up


example = get_claude_response(prompt_string=GEN_PROMPT,
                              prompt_params={
                                  'intent_list': str(intent_list),
                                  'slot_list': str(slots_to_fill),
                                  'action_list': str(action_slot_pair),
                                  'generated_samples': generated_samples,
                                  'salt_tag': secrets.token_hex(4),
                                  'num_samples': num_samples_per_intent,
                                  'previous_example_hashes': [],
                                  'mix_it_up': mix_it_up()
                              },
                              model_id='anthropic.claude-3-5-sonnet-20240620-v1:0')
example.content

'[{"dialogue_id":"book_flight_0_a1b2c3d4","goal":"Book a flight to a city made entirely of cheese","turns":[{"utterance":"I need to fly to Cheddarville, the city made of cheese. Can you help me book that?","system_response":"I apologize, but I\'m not familiar with a city called Cheddarville. Could you please provide a valid origin and destination for your flight?","dialogue_acts":{"intent":"book_flight","action":""},"belief_state":{}},{"utterance":"Oh, sorry! I meant I want to fly from New York to Paris. But can we stop at the Moon Cheese Factory on the way?","system_response":"I understand you\'d like to book a flight from New York to Paris. Unfortunately, I can\'t book stops at the Moon Cheese Factory. Can you please provide your preferred departure date?","dialogue_acts":{"intent":"book_flight","action":""},"belief_state":{"origin":"New York","destination":"Paris"}},{"utterance":"I want to leave on the next blue moon, is that possible?","system_response":"I\'m sorry, but I can\'t bo

In [8]:
import ast
import json
ast.literal_eval(example.content)[0]['turns']

[{'utterance': 'I need to fly to Cheddarville, the city made of cheese. Can you help me book that?',
  'system_response': "I apologize, but I'm not familiar with a city called Cheddarville. Could you please provide a valid origin and destination for your flight?",
  'dialogue_acts': {'intent': 'book_flight', 'action': ''},
  'belief_state': {}},
 {'utterance': 'Oh, sorry! I meant I want to fly from New York to Paris. But can we stop at the Moon Cheese Factory on the way?',
  'system_response': "I understand you'd like to book a flight from New York to Paris. Unfortunately, I can't book stops at the Moon Cheese Factory. Can you please provide your preferred departure date?",
  'dialogue_acts': {'intent': 'book_flight', 'action': ''},
  'belief_state': {'origin': 'New York', 'destination': 'Paris'}},
 {'utterance': 'I want to leave on the next blue moon, is that possible?',
  'system_response': "I'm sorry, but I can't book flights based on lunar events. Could you please provide a specifi

In [10]:
import ast
import secrets
from tqdm import tqdm

conv_list = []
previous_example_hashes = []


generated_samples = 0
for i in tqdm(range(100)):
    secret = secrets.token_hex(4)
    example = get_claude_response(prompt_string=GEN_PROMPT,
                              prompt_params={
                                  'intent_list': str(intent_list),
                                  'slot_list': str(slots_to_fill),
                                  'action_list': str(action_slot_pair),
                                  'generated_samples': generated_samples,
                                  'salt_tag': secret,
                                  'num_samples': num_samples_per_intent,
                                  'previous_example_hashes': previous_example_hashes,
                                  'mix_it_up': mix_it_up()
                              },
                              model_id='anthropic.claude-3-5-sonnet-20240620-v1:0')
    try:
        gen_list = ast.literal_eval(example.content)
        conv_list.extend(gen_list)
        previous_example_hashes.append(secret)
        generated_samples += 2
    except Exception as err:
        print(f"Error printed: {err}, thing than caused it {example.content}")
    

# print(conv_list)

 30%|███       | 30/100 [20:28<1:00:54, 52.20s/it]

Error printed: unterminated string literal (detected at line 1) (<unknown>, line 1), thing than caused it [{"dialogue_id":"book_flight_59_a7b3e9c1","goal":"The user wants to search and book a flight to the moon","turns":[{"utterance":"I need to book a flight to the moon, pronto! My pet rock is homesick.","system_response":"I apologize, but I can only assist with booking flights to Earth destinations. Perhaps I can help you find a flight to a place that might remind your pet rock of the moon?","dialogue_acts":{"intent":"book_flight","action":""},"belief_state":{}},{"utterance":"Fine, let's go to the driest desert on Earth then. My rock likes it crispy.","system_response":"I understand. You're looking for a flight to a very dry desert. The Atacama Desert in Chile is often considered the driest on Earth. Shall I search for flights to Chile for you and your pet rock?","dialogue_acts":{"intent":"book_flight","action":""},"belief_state":{"destination":"Chile"}},{"utterance":"Excellent! We'll

 37%|███▋      | 37/100 [25:56<58:34, 55.79s/it]  

Error printed: unterminated string literal (detected at line 1) (<unknown>, line 1), thing than caused it [{"dialogue_id":"book_flight_71_a8f2c6e3","goal":"The user wants to search and book a flight while roleplaying as a time-traveling alien","turns":[{"utterance":"Greetings, Earth-based transportation facilitator! I am Zorp from the year 3000, seeking to book a primitive air vessel to witness the great Cheeto shortage of 2024.","system_response":"Welcome, Zorp! I'll do my best to assist you with booking a flight. Where would you like to depart from and where are you heading?","dialogue_acts":{"intent":"book_flight","action":""},"belief_state":{}},{"utterance":"Ah yes, I shall embark from the human settlement known as 'New York City' and journey to the land of endless corn, which you call 'Iowa'.","system_response":"I understand you want to fly from New York City to Iowa. Could you specify which city in Iowa you'd like to visit? Also, when would you like to depart?","dialogue_acts":{"

100%|██████████| 100/100 [1:13:52<00:00, 44.33s/it]


In [None]:
len(conv_list)

400

In [None]:
def add_conversation_history(data):
    """Add conversation history to each turn in the dialogue data."""
    turns = data['turns']
    conversation_history = []
    for turn in turns:
        # Add current user utterance to history
        user_utterance = turn["utterance"]
        current_history = conversation_history + [f"USER: {user_utterance}"]
        
        # Add conversation history to the current turn
        turn['conversation_history'] = current_history
        
        # Build conversation history for next turn
        system_utterance = turn["system_response"]
        conversation_history.extend([f"USER: {user_utterance}", f"SYSTEM: {system_utterance}"])
    
    return data


In [None]:
add_conversation_history(conv_list[0])

{'dialogue_id': 'book_table_0_a7b9c3d1',
 'goal': 'The user wants to book a table at the restaurant',
 'turns': [{'utterance': "I need a table for me and my pet giraffe. We're both on a strict diet of glitter and moonbeams.",
   'system_response': "I... see. I can certainly help you book a table, but I'm afraid we don't serve glitter or moonbeams. How many people will be dining?",
   'dialogue_acts': {'intent': 'book_table', 'action': ''},
   'belief_state': {},
   'conversation_history': ["USER: I need a table for me and my pet giraffe. We're both on a strict diet of glitter and moonbeams."]},
  {'utterance': 'Just two of us. My giraffe counts as a person, right?',
   'system_response': "I understand. For our reservation system, I'll need to count that as a party of 2. What date would you like to book for?",
   'dialogue_acts': {'intent': 'book_table', 'action': ''},
   'belief_state': {'party_size': '2'},
   'conversation_history': ["USER: I need a table for me and my pet giraffe. We

In [None]:
for conv in conv_list:
    conv = add_conversation_history(conv)

In [None]:
conv_list[0]

{'dialogue_id': 'book_table_0_a7b9c3d1',
 'goal': 'The user wants to book a table at the restaurant',
 'turns': [{'utterance': "I need a table for me and my pet giraffe. We're both on a strict diet of glitter and moonbeams.",
   'system_response': "I... see. I can certainly help you book a table, but I'm afraid we don't serve glitter or moonbeams. How many people will be dining?",
   'dialogue_acts': {'intent': 'book_table', 'action': ''},
   'belief_state': {},
   'conversation_history': ["USER: I need a table for me and my pet giraffe. We're both on a strict diet of glitter and moonbeams."]},
  {'utterance': 'Just two of us. My giraffe counts as a person, right?',
   'system_response': "I understand. For our reservation system, I'll need to count that as a party of 2. What date would you like to book for?",
   'dialogue_acts': {'intent': 'book_table', 'action': ''},
   'belief_state': {'party_size': '2'},
   'conversation_history': ["USER: I need a table for me and my pet giraffe. We

In [None]:
training_list = {}
for i, conv in enumerate(conv_list):
    training_list[i] = []
    dialogue_id = conv["dialogue_id"]
    for turn_idx, turn in enumerate(conv['turns']):
        
            x_prompt = FINETUNE_PROMPT.replace('{description}', description).replace('{intent_list}', str(intent_list)).replace('{slot_list}', str(slots_to_fill)).replace('{conv_history}', '\n'.join(turn['conversation_history'])).replace('{action_list}', str(action_slot_pair))
            y_prompt = x_prompt + '\n' + str({
                                        'system_response': turn['system_response'],
                                        'dialogue_acts': turn['dialogue_acts'],
                                        'belief_state': turn['belief_state']
                                    })
            training_list[i].append((dialogue_id, turn["utterance"], x_prompt, y_prompt))

In [None]:
training_list[0]

[('book_table_0_a7b9c3d1',
  "I need a table for me and my pet giraffe. We're both on a strict diet of glitter and moonbeams.",
  '<|start_header_id|>system<|end_header_id|>\nYou are a conversational agent with the following persona:\nYou are a helpful restaurant assistant. Your job is to help users in whatever queries they may have.\n\nALLOWED INTENTS:\n{\'book_table\': \'The user wants to book a table at the restaurant\', \'cancel_reservation\': \'The user wants to cancel an existing reservation\', \'order_takeout\': \'The user wants to place a takeout order\', \'chit_chat\': "Queries outside of the other intents specified. Apart from greetings and hellos, the response for this one should be \'Sorry, I can only help you with restaurant queries.\'"}\n\nALLOWED SLOTS (must match exactly):\n{\'date\': \'book_table\', \'time\': \'book_table\', \'party_size\': \'book_table\', \'reservationID\': \'cancel_reservation\', \'order_items\': \'order_takeout\', \'pickup_time\': \'order_takeout\'}

In [None]:
def extract_llm_response(text: str) -> str:
    answer = text.split("<|start_header_id|>assistant<|end_header_id|>\nDialogue State:\n\n")[-1]
    return answer.strip()

In [None]:
extract_llm_response(list(training_list.items())[2][1][0][1])

"I'd like to order some takeout for my alien friends. They've just landed and they're famished!"

In [None]:
import pandas as pd
import random

flattened_training_list = []
for conv_id, turns in training_list.items():
    for turn_data in turns:
        ground_truth = ast.literal_eval(extract_llm_response(turn_data[3]))
        flattened_training_list.append({
            'conv_id': turn_data[0],
            'user_utterance': turn_data[1],
            'system_utterance': ground_truth['system_response'],
            'input': turn_data[2],
            'output': turn_data[3],
            'ground_truth': extract_llm_response(turn_data[3])
        })

pd_df = pd.DataFrame(flattened_training_list)

# --- Split by conv_id ---
unique_convs = pd_df['conv_id'].unique().tolist()
random.shuffle(unique_convs)  # shuffle to randomize split

split_idx = int(len(unique_convs) * 0.5)  # 50% train, 50% test
sft_convs = set(unique_convs[:split_idx])
grpo_convs = set(unique_convs[split_idx:])

sft_train = pd_df[pd_df['conv_id'].isin(sft_convs)].reset_index(drop=True)
grpo_train = pd_df[pd_df['conv_id'].isin(grpo_convs)].reset_index(drop=True)

In [None]:
sft_train.to_csv('sft_train2000.csv', index=False, sep=',')
grpo_train.to_csv('grpo_train2000.csv', index=False, sep=',')

In [None]:
pd_df.to_csv('tester.csv', index=False, sep=',')

In [None]:
print(FINETUNE_PROMPT)

In [None]:
ast.literal_eval(extract_llm_response(training_list[0][0][1]))