# Do not execute this code

![Chat exceeded](./iteration_3.png)

Code provided:

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML
import random
from collections import Counter, defaultdict
import re
import string
import time

# Define the dialogue domains and intents
DOMAINS = ['restaurant', 'hotel', 'flight', 'general']
INTENTS = {
	'restaurant': ['book_table', 'menu_info', 'opening_hours', 'location', 'price_range'],
	'hotel': ['book_room', 'check_availability', 'amenities', 'location', 'price_range'],
	'flight': ['book_flight', 'check_schedule', 'baggage_info', 'check_in', 'flight_status'],
	'general': ['greeting', 'thanks', 'goodbye', 'help', 'affirm', 'deny']
}

# Define entity types that can be recognized and used in responses
ENTITIES = {
	'restaurant': ['cuisine', 'people', 'date', 'time', 'location'],
	'hotel': ['room_type', 'people', 'check_in', 'check_out', 'location'],
	'flight': ['origin', 'destination', 'date', 'passengers', 'class'],
	'general': []
}

# Example database of responses based on intents
RESPONSES = {
	'restaurant': {
		'book_table': [
			"I can book a table for {people} on {date} at {time}. Would that work for you?",
			"Let me book that {cuisine} restaurant for {people} people at {time} on {date}.",
			"I'll make a reservation at the {location} restaurant for {people} at {time} on {date}."
		],
		'menu_info': [
			"The {cuisine} restaurant has a wide variety of dishes. Would you like to see the menu?",
			"They serve traditional {cuisine} cuisine. Their specialties include...",
			"The menu changes seasonally, but they always have {cuisine} options."
		],
		'opening_hours': [
			"The restaurant is open from 11am to 10pm daily.",
			"They operate from noon to midnight on weekends, and 11am to 10pm on weekdays.",
			"The {cuisine} restaurant in {location} is open from 10am to 11pm."
		],
		'location': [
			"The restaurant is located at the heart of {location}.",
			"You can find it on Main Street in {location}.",
			"It's near the central square in {location}."
		],
		'price_range': [
			"The {cuisine} restaurant is in the moderate price range.",
			"It's a bit expensive, averaging about $30 per person.",
			"It's quite affordable, with meals around $15-20 per person."
		]
	},
	'hotel': {
		'book_room': [
			"I've booked a {room_type} room for {people} from {check_in} to {check_out}.",
			"Your {room_type} is confirmed for {people} people from {check_in} to {check_out}.",
			"Booking complete: {room_type} for {people} guests, {check_in} to {check_out}."
		],
		'check_availability': [
			"Let me check... Yes, we have {room_type} rooms available for those dates.",
			"There are {room_type} rooms available from {check_in} to {check_out}.",
			"I'm afraid we're fully booked for {room_type} rooms on those dates."
		],
		'amenities': [
			"The hotel offers free WiFi, a pool, and a fitness center.",
			"Amenities include breakfast, parking, and an airport shuttle.",
			"The {room_type} rooms come with a mini-bar, safe, and room service."
		],
		'location': [
			"The hotel is located in downtown {location}, close to all attractions.",
			"It's in a quiet area of {location}, about 2 miles from the city center.",
			"The address is 123 Main Street, {location}."
		],
		'price_range': [
			"The {room_type} room costs $150 per night.",
			"Rates for {room_type} rooms range from $120-200 depending on the season.",
			"For {people} people in a {room_type} room, that would be $180 per night."
		]
	},
	'flight': {
		'book_flight': [
			"I've booked your flight from {origin} to {destination} on {date} for {passengers} passengers in {class} class.",
			"Your flight is confirmed: {origin} to {destination}, {date}, {passengers} travelers, {class} class.",
			"Booking complete for {passengers} people from {origin} to {destination} on {date}."
		],
		'check_schedule': [
			"There are flights from {origin} to {destination} at 9am, 1pm, and 5pm on {date}.",
			"The first flight from {origin} to {destination} on {date} departs at 7am.",
			"There are no direct flights from {origin} to {destination} on that date."
		],
		'baggage_info': [
			"In {class} class, you can bring one carry-on and one checked bag.",
			"Each passenger can check up to 2 bags of 23kg each.",
			"Baggage fees start at $30 for the first checked bag."
		],
		'check_in': [
			"You can check in online 24 hours before your flight departs.",
			"Check-in opens 3 hours before departure at the airport.",
			"For international flights, please arrive at least 3 hours early for check-in."
		],
		'flight_status': [
			"Your flight from {origin} to {destination} is on time.",
			"I'm afraid the flight from {origin} to {destination} is delayed by 1 hour.",
			"The flight from {origin} to {destination} has been cancelled due to weather."
		]
	},
	'general': {
		'greeting': [
			"Hello! How can I help you today?",
			"Hi there! What can I assist you with?",
			"Good day! How may I be of service?"
		],
		'thanks': [
			"You're welcome! Is there anything else you need?",
			"No problem at all. Happy to help!",
			"My pleasure. Don't hesitate to ask if you need more assistance."
		],
		'goodbye': [
			"Goodbye! Have a nice day!",
			"Thanks for chatting with me. Goodbye!",
			"Farewell! Feel free to come back anytime."
		],
		'help': [
			"I can help with restaurant bookings, hotel reservations, or flight information. What do you need?",
			"How can I assist you today? I handle restaurants, hotels, and flights.",
			"I'm your virtual assistant for travel and dining. What would you like to do?"
		],
		'affirm': [
			"Great! Let's proceed.",
			"Excellent! I'll continue with that.",
			"Perfect! Moving forward."
		],
		'deny': [
			"I understand. Let's try something else then.",
			"No problem. What would you prefer instead?",
			"Got it. Let's explore other options."
		]
	}
}

# Example user queries mapped to intents and entities
EXAMPLE_QUERIES = {
	'restaurant': {
		'book_table': [
			"I want to book a table for 4 people tomorrow at 7pm",
			"Can you reserve a spot at an Italian restaurant for Friday night?",
			"Make a dinner reservation for 2 at 8pm on Saturday"
		],
		'menu_info': [
			"What kind of food do they serve?",
			"Tell me about the menu options",
			"Do they have vegetarian dishes?"
		],
		'opening_hours': [
			"What time does the restaurant open?",
			"Until when are they serving dinner?",
			"Are they open on Sundays?"
		],
		'location': [
			"Where is the restaurant located?",
			"How do I get to the restaurant?",
			"Is it close to downtown?"
		],
		'price_range': [
			"How expensive is this place?",
			"Is it a budget-friendly restaurant?",
			"What's the average cost per person?"
		]
	},
	'hotel': {
		'book_room': [
			"I need to book a room for 2 people from May 10 to May 15",
			"Can you reserve a suite for next weekend?",
			"Book a deluxe room for 3 nights starting tomorrow"
		],
		'check_availability': [
			"Are there any rooms available for next week?",
			"Do you have any suites for December 24-26?",
			"Check if you have a double room for tomorrow night"
		],
		'amenities': [
			"Does the hotel have a swimming pool?",
			"What amenities are included?",
			"Is breakfast included in the stay?"
		],
		'location': [
			"Where exactly is the hotel located?",
			"How far is it from the airport?",
			"Is it in a safe neighborhood?"
		],
		'price_range': [
			"How much does a room cost per night?",
			"What's the rate for a suite?",
			"Are there any discounts available?"
		]
	},
	'flight': {
		'book_flight': [
			"I want to book a flight from New York to London on July 15",
			"Book tickets for 2 people from Chicago to Miami next Friday",
			"I need a business class flight to Tokyo next month"
		],
		'check_schedule': [
			"When's the next flight to Denver?",
			"What time are flights from LA to San Francisco tomorrow?",
			"Show me the flight schedule for next week"
		],
		'baggage_info': [
			"How many bags can I check in?",
			"What's the weight limit for luggage?",
			"How much does an extra bag cost?"
		],
		'check_in': [
			"When can I check in for my flight?",
			"Is online check-in available?",
			"How early should I arrive at the airport?"
		],
		'flight_status': [
			"Is my flight on time?",
			"Has the flight from Boston been delayed?",
			"Check the status of flight AC123"
		]
	},
	'general': {
		'greeting': [
			"Hello there",
			"Hi",
			"Good morning"
		],
		'thanks': [
			"Thank you",
			"Thanks for your help",
			"I appreciate that"
		],
		'goodbye': [
			"Goodbye",
			"Bye for now",
			"See you later"
		],
		'help': [
			"I need some help",
			"What can you do?",
			"How does this work?"
		],
		'affirm': [
			"Yes",
			"That's correct",
			"Sure"
		],
		'deny': [
			"No",
			"I don't think so",
			"Not really"
		]
	}
}

# Simple keyword matching for NLU (Natural Language Understanding)
def extract_intent_and_entities(user_input):
	# Normalize text
	user_input = user_input.lower()
	
	# Default intent and domain if nothing is matched
	domain = 'general'
	intent = 'greeting'
	entities = {}
	
	# Simple keyword matching for domain and intent detection
	# In a real system, this would be a trained classifier
	keywords = {
		'restaurant': ['restaurant', 'table', 'dinner', 'lunch', 'eat', 'food', 'menu', 'cuisine', 'meal'],
		'hotel': ['hotel', 'room', 'suite', 'stay', 'accommodation', 'lodge', 'night', 'motel'],
		'flight': ['flight', 'fly', 'airport', 'plane', 'ticket', 'travel', 'trip', 'departure']
	}
	
	# Determine domain
	for potential_domain, kws in keywords.items():
		if any(kw in user_input for kw in kws):
			domain = potential_domain
			break
	
	# Intent detection based on keywords
	intent_keywords = {
		'book': ['book', 'reserve', 'make', 'schedule', 'get'],
		'info': ['what', 'tell', 'information', 'about', 'kind', 'type'],
		'hours': ['time', 'hour', 'open', 'close', 'until', 'when'],
		'location': ['where', 'location', 'address', 'located', 'direction', 'far'],
		'price': ['cost', 'price', 'expensive', 'cheap', 'afford', 'budget', 'money', 'much']
	}
	
	if domain == 'restaurant':
		if any(kw in user_input for kw in intent_keywords['book']):
			intent = 'book_table'
		elif any(kw in user_input for kw in intent_keywords['info']):
			intent = 'menu_info'
		elif any(kw in user_input for kw in intent_keywords['hours']):
			intent = 'opening_hours'
		elif any(kw in user_input for kw in intent_keywords['location']):
			intent = 'location'
		elif any(kw in user_input for kw in intent_keywords['price']):
			intent = 'price_range'
	elif domain == 'hotel':
		if any(kw in user_input for kw in intent_keywords['book']):
			intent = 'book_room'
		elif 'available' in user_input or 'availability' in user_input:
			intent = 'check_availability'
		elif 'amenities' in user_input or 'facility' in user_input or 'service' in user_input:
			intent = 'amenities'
		elif any(kw in user_input for kw in intent_keywords['location']):
			intent = 'location'
		elif any(kw in user_input for kw in intent_keywords['price']):
			intent = 'price_range'
	elif domain == 'flight':
		if any(kw in user_input for kw in intent_keywords['book']):
			intent = 'book_flight'
		elif 'schedule' in user_input or 'when' in user_input or 'time' in user_input:
			intent = 'check_schedule'
		elif 'bag' in user_input or 'luggage' in user_input:
			intent = 'baggage_info'
		elif 'check in' in user_input or 'checkin' in user_input:
			intent = 'check_in'
		elif 'status' in user_input or 'delay' in user_input or 'on time' in user_input:
			intent = 'flight_status'
	else:  # general domain
		if 'hello' in user_input or 'hi' in user_input or 'hey' in user_input:
			intent = 'greeting'
		elif 'thank' in user_input or 'thanks' in user_input:
			intent = 'thanks'
		elif 'bye' in user_input or 'goodbye' in user_input:
			intent = 'goodbye'
		elif 'help' in user_input or 'can you' in user_input:
			intent = 'help'
		elif 'yes' in user_input or 'sure' in user_input or 'correct' in user_input:
			intent = 'affirm'
		elif 'no' in user_input or 'not' in user_input or "don't" in user_input:
			intent = 'deny'
	
	# Simple entity extraction (very basic)
	entity_patterns = {
		'cuisine': ['italian', 'chinese', 'mexican', 'french', 'indian', 'japanese', 'thai'],
		'people': r'(\d+) people|for (\d+)',
		'date': r'(today|tomorrow|monday|tuesday|wednesday|thursday|friday|saturday|sunday|january|february|march|april|may|june|july|august|september|october|november|december|\d{1,2}/\d{1,2}|\d{1,2}-\d{1,2})',
		'time': r'(\d{1,2})(:\d{2})?\s*(am|pm|a\.m\.|p\.m\.)?|noon|midnight',
		'location': ['downtown', 'city center', 'uptown', 'near', 'next to', 'by the'],
		'room_type': ['single', 'double', 'suite', 'deluxe', 'standard', 'king', 'queen'],
		'check_in': r'check[ -]?in|from (\d{1,2}/\d{1,2}|\d{1,2}-\d{1,2}|tomorrow|today)',
		'check_out': r'check[ -]?out|until (\d{1,2}/\d{1,2}|\d{1,2}-\d{1,2}|tomorrow|today)',
		'origin': ['from', 'departing from', 'leaving'],
		'destination': ['to', 'arriving at', 'going to'],
		'passengers': r'(\d+) (passenger|person|people|traveler|passenger)',
		'class': ['economy', 'business', 'first class', 'premium economy']
	}
	
	# Very simplified entity extraction - in a real system, this would be more sophisticated
	for entity_name, patterns in entity_patterns.items():
		if isinstance(patterns, list):  # List of keywords
			for pattern in patterns:
					if pattern in user_input:
						if entity_name in ['origin', 'destination', 'location']:
							# Try to find location names (very simplified)
							words = user_input.split()
							idx = words.index(pattern) if pattern in words else -1
							if idx >= 0 and idx + 1 < len(words):
									entities[entity_name] = words[idx + 1]
						else:
							entities[entity_name] = pattern
						break
		else:  # Regex pattern
			matches = re.search(patterns, user_input)
			if matches:
					# Use the first non-None group
					match_group = next((g for g in matches.groups() if g is not None), matches.group(0))
					entities[entity_name] = match_group
	
	# Fill in default values for required entities if not found
	default_entities = {
		'cuisine': 'Italian',
		'people': '2',
		'date': 'tomorrow',
		'time': '7:00pm',
		'location': 'downtown',
		'room_type': 'standard',
		'check_in': 'tomorrow',
		'check_out': 'in 3 days',
		'origin': 'New York',
		'destination': 'London',
		'passengers': '1',
		'class': 'economy'
	}
	
	# Only add defaults for domain-specific entities if they're needed for the response
	if domain in ENTITIES:
		for entity in ENTITIES[domain]:
			if entity not in entities and entity in default_entities:
					entities[entity] = default_entities[entity]
	
	return domain, intent, entities

class DialogueManager:
	def __init__(self):
		self.current_domain = None
		self.current_intent = None
		self.collected_entities = {}
		self.conversation_history = []
		self.state_memory = []
		
	def process_input(self, user_input):
		domain, intent, entities = extract_intent_and_entities(user_input)
		
		# Update current domain and intent if detected
		if domain != 'general' or self.current_domain is None:
			self.current_domain = domain
		
		if intent in INTENTS[domain]:
			self.current_intent = intent
		
		# Update collected entities
		self.collected_entities.update(entities)
		
		# Log the current state
		current_state = {
			'domain': self.current_domain,
			'intent': self.current_intent,
			'entities': self.collected_entities.copy()
		}
		self.state_memory.append(current_state)
		
		# Add to conversation history
		self.conversation_history.append(('user', user_input))
		
		# Generate response
		response = self.generate_response()
		self.conversation_history.append(('system', response))
		
		return response
	
	def generate_response(self):
		if not self.current_domain or not self.current_intent:
			return "I'm not sure what you're asking about. Could you clarify?"
		
		# Get response templates for the current domain and intent
		templates = RESPONSES[self.current_domain][self.current_intent]
		
		# Select a random template
		template = random.choice(templates)
		
		# Fill in entity values using collected entities
		try:
			response = template.format(**self.collected_entities)
		except KeyError:
			# If we're missing required entities, fall back to a generic response
			return f"I understand you want to {self.current_intent.replace('_', ' ')}. Could you provide more details?"
		
		return response
	
	def reset(self):
		self.current_domain = None
		self.current_intent = None
		self.collected_entities = {}
		self.conversation_history = []
		self.state_memory = []

class RLDialogueAgent:
	def __init__(self):
		self.dialogue_manager = DialogueManager()
		self.q_table = defaultdict(lambda: np.zeros(len(RESPONSES)))
		self.alpha = 0.1  # Learning rate
		self.gamma = 0.9  # Discount factor
		self.epsilon = 0.3  # Exploration rate
		self.decay_rate = 0.995  # Decay rate for exploration
		
		# Flatten all responses for action selection
		self.all_responses = []
		for domain in RESPONSES:
			for intent in RESPONSES[domain]:
					for response in RESPONSES[domain][intent]:
						self.all_responses.append((domain, intent, response))
		
	def get_state_key(self, state):
		"""Convert state to a hashable key for Q-table"""
		domain = state['domain'] if state['domain'] else 'unknown'
		intent = state['intent'] if state['intent'] else 'unknown'
		
		# Include only the most important entities in the state key
		key_entities = []
		if 'entities' in state and state['entities']:
			for entity_type in ['cuisine', 'room_type', 'class']:
					if entity_type in state['entities']:
						key_entities.append(f"{entity_type}:{state['entities'][entity_type]}")
		
		return f"{domain}:{intent}:{','.join(sorted(key_entities))}"
	
	def select_response(self, state, explore=True):
		"""Select response based on Q-values and exploration strategy"""
		state_key = self.get_state_key(state)
		
		# Either explore or exploit
		if explore and random.random() < self.epsilon:
			# Exploration: choose random response from matching domain and intent
			matching_responses = [(i, r) for i, (d, intent, r) in enumerate(self.all_responses) 
										if d == state['domain'] and intent == state['intent']]
			
			if not matching_responses:
					# Fall back to general responses if no match
					matching_responses = [(i, r) for i, (d, intent, r) in enumerate(self.all_responses) 
											if d == 'general']
			
			if matching_responses:
					action_idx, _ = random.choice(matching_responses)
					return action_idx
			else:
					return random.randint(0, len(self.all_responses) - 1)
		else:
			# Check if we have Q-values for responses matching this domain and intent
			domain_intent_matches = [(i, self.q_table[state_key][i]) 
											for i, (d, intent, _) in enumerate(self.all_responses)
											if d == state['domain'] and intent == state['intent']]
			
			if domain_intent_matches:
					# Return the action with highest Q-value among matching responses
					sorted_actions = sorted(domain_intent_matches, key=lambda x: x[1], reverse=True)
					return sorted_actions[0][0]
			else:
					# If no matching responses have Q-values, select based on general domain
					general_matches = [(i, self.q_table[state_key][i])
										for i, (d, _, _) in enumerate(self.all_responses)
										if d == 'general']
					
					if general_matches:
						sorted_actions = sorted(general_matches, key=lambda x: x[1], reverse=True)
						return sorted_actions[0][0]
					else:
						# If still no matches, return highest Q-value or random action
						if np.sum(self.q_table[state_key]) > 0:
							return np.argmax(self.q_table[state_key])
						else:
							return random.randint(0, len(self.all_responses) - 1)
	
	def update_q_table(self, state, action, reward, next_state):
		"""Update Q-value based on reward and estimated future reward"""
		state_key = self.get_state_key(state)
		next_state_key = self.get_state_key(next_state)
		
		# Calculate target value (reward + discounted future reward)
		next_max = np.max(self.q_table[next_state_key])
		target = reward + self.gamma * next_max
		
		# Update Q-value
		self.q_table[state_key][action] += self.alpha * (target - self.q_table[state_key][action])
	
	def decay_exploration(self):
		"""Reduce exploration rate over time"""
		self.epsilon *= self.decay_rate

class DialogueSimulator:
	def __init__(self):
		self.rl_agent = RLDialogueAgent()
		self.user_simulator = SimpleUserSimulator()
		self.reward_history = []
		self.conversation_lengths = []
		self.satisfaction_scores = []
		self.domain_coverage = Counter()
		self.intent_coverage = Counter()
	
	def run_dialogue(self, max_turns=10):
		"""Run a single dialogue between agent and simulated user"""
		self.rl_agent.dialogue_manager.reset()
		self.user_simulator.reset()
		
		# Initial user message
		user_message, true_domain, true_intent = self.user_simulator.generate_message()
		self.domain_coverage[true_domain] += 1
		self.intent_coverage[f"{true_domain}:{true_intent}"] += 1
		
		dialogue_rewards = []
		turns = 0
		done = False
		
		while not done and turns < max_turns:
			# Get current state before response
			current_state = self.rl_agent.dialogue_manager.state_memory[-1] if self.rl_agent.dialogue_manager.state_memory else {'domain': None, 'intent': None, 'entities': {}}
			
			# Agent selects response
			action_idx = self.rl_agent.select_response(current_state)
			domain, intent, response_template = self.rl_agent.all_responses[action_idx]
			
			# Agent generates response using the selected template
			try:
					response = response_template.format(**self.rl_agent.dialogue_manager.collected_entities)
			except KeyError:
					# If we're missing required entities, use a template version
					response = response_template.replace("{", "").replace("}", "")
			
			# Update dialogue manager with the selected response
			self.rl_agent.dialogue_manager.conversation_history.append(('system', response))
			
			# Get user feedback on the response
			user_rating = self.user_simulator.rate_response(response, true_domain, true_intent)
			reward = user_rating / 5.0  # Scale rating to 0-1
			
			# User generates next message or ends dialogue
			next_user_message, next_domain, next_intent = self.user_simulator.generate_message(response)
			
			if next_user_message.lower() in ['bye', 'goodbye', 'exit', 'end']:
					done = True
			else:
					# Process user message
					self.rl_agent.dialogue_manager.process_input(next_user_message)
					
					# Track domains and intents seen
					self.domain_coverage[next_domain] += 1
					self.intent_coverage[f"{next_domain}:{next_intent}"] += 1
					
					# Update for next turn
					true_domain, true_intent = next_domain, next_intent
			
			# Get next state after user message and processing
			next_state = self.rl_agent.dialogue_manager.state_memory[-1] if self.rl_agent.dialogue_manager.state_memory else {'domain': None, 'intent': None, 'entities': {}}
			
			# Update Q-table with reward
			self.rl_agent.update_q_table(current_state, action_idx, reward, next_state)
			
			# Track reward
			dialogue_rewards.append(reward)
			turns += 1
		
		# Calculate dialogue success metrics
		total_reward = sum(dialogue_rewards)
		avg_reward = total_reward / turns if turns > 0 else 0
		self.reward_history.append(avg_reward)
		self.conversation_lengths.append(turns)
		self.satisfaction_scores.append(avg_reward)
		
		return {
			'turns': turns,
			'avg_reward': avg_reward,
			'total_reward': total_reward
		}
	
	def train(self, episodes=500):
		"""Train the agent over multiple dialogues"""
		episode_rewards = []
		episode_turns = []
		episode_satisfaction = []
		
		for episode in range(episodes):
			results = self.run_dialogue()
			episode_rewards.append(results['avg_reward'])
			episode_turns.append(results['turns'])
			episode_satisfaction.append(results['avg_reward'])
			
			# Decay exploration rate
			self.rl_agent.decay_exploration()
			
			# Print progress
			if (episode + 1) % 50 == 0:
					print(f"Episode {episode+1}/{episodes}, " 
							f"Avg Reward: {np.mean(episode_rewards[-50:]):.3f}, "
							f"Avg Turns: {np.mean(episode_turns[-50:]):.1f}, "
							f"Exploration Rate: {self.rl_agent.epsilon:.3f}")
		
		# Return training metrics
		return {
			'rewards': episode_rewards,
			'turns': episode_turns,
			'satisfaction': episode_satisfaction,
			'domain_coverage': dict(self.domain_coverage),
			'intent_coverage': dict(self.intent_coverage)
		}
	
	def generate_conversation(self, max_turns=10):
		"""Generate a conversation using the trained model (no exploration)"""
		self.rl_agent.dialogue_manager.reset()
		self.user_simulator.reset()
		
		conversation = []
		
		# Initial user message
		user_message, true_domain, true_intent = self.user_simulator.generate_message()
		conversation.append(('User', user

SyntaxError: incomplete input (3227908792.py, line 684)