In [1]:
import numpy as np
from transformers import GPT2Tokenizer, GPT2LMHeadModel, AdamW

from torch.utils.data import Dataset, DataLoader, ConcatDataset
import pandas as pd
import re
import torch
from sklearn.metrics.pairwise import cosine_similarity
import os
from nltk.corpus import wordnet as wn
import nltk
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts



# Load data from files and handle missing data
def load_data(file_path, file_type='csv'):
    if not os.path.exists(file_path):
        print(f"Error: {file_path} not found.")
        return None
    try:
        if file_type == 'csv':
            return pd.read_csv(file_path)
        elif file_type == 'json':
            return pd.read_json(file_path)
    except Exception as e:
        print(f"Error loading {file_path}: {e}")
        return None

# Load all required datasets
hotel_df = load_data('new_hotel_data.csv')
ratings_df = load_data('clean_hotel_review_data.json', 'json')
restaurant_df = load_data('new_restaurant_db.restaurants_data.json', 'json')
attraction_df = load_data('attraction.csv')
if hotel_df is None or ratings_df is None or restaurant_df is None or attraction_df is None:
    print("Error: One or more required datasets are missing or improperly loaded. Please check your data files.")
    exit()

# Merge hotel ratings with hotel data on 'hotel_id'
merged_df = hotel_df.merge(ratings_df, on='hotel_id', how='left')
average_ratings = merged_df.groupby('hotel_id')['rating'].mean().reset_index()
average_ratings.columns = ['hotel_id', 'average_rating']
hotel_df = hotel_df.merge(average_ratings, on='hotel_id', how='left')
hotel_df.rename(columns={'average_rating': 'Rating'}, inplace=True)

restaurant_ratings_df = load_data('new_restaurant_db.restaurants_reviews.json', 'json')
restaurant_merged_df = restaurant_df.merge(restaurant_ratings_df, on='restaurant_id', how='left')
average_restaurant_ratings = restaurant_merged_df.groupby('restaurant_id')['rating'].mean().reset_index()
average_restaurant_ratings.columns = ['restaurant_id', 'average_restaurant_ratings']
restaurant_df = restaurant_df.merge(average_restaurant_ratings, on='restaurant_id', how='left')
restaurant_df.rename(columns={'average_restaurant_ratings': 'Rating'}, inplace=True)
restaurant_df['Rating'] = restaurant_df['Rating'].fillna(3.5)

# List of facilities
facilities_list = ['Cable TV', 'Entire apartment', 'Oven stove', 'Car Rental Service', 'Ironing board', 
                   'bathroom', 'Washing machine', 'Cot', 'Entire bungalow', 'Conference Room', 
                   'Fitness Center', 'Outdoor grill', 'Fitness centre', 'Pet-friendly', 'Entire house',
                   'Pet Friendly', 'Airport shuttle', 'Terrace', 'Room Service', 'BBQ Facilities', 
                   '24-Hour Front Desk', 'Child-friendly', 'Kitchen', 'Microwave', 'Free parking', 
                   'Breakfast Buffet', 'Swimming Pool', 'Paid parking', 'Entire villa', 'Air conditioning', 
                   'Balcony', 'Spa', 'Business Center', 'Free breakfast', 'Currency Exchange', 
                   'Outdoor pool', 'Air Conditioning', 'Laundry', 'Golf Course', 'cafe', 'Indoor pool', 
                   'Parking', 'Garden', 'Patio', 'Fireplace', 'Restaurant', 'Smoking Rooms', 
                   'Babysitting Service', 'Heating', 'Paid Wi-Fi', 'Wheelchair accessible', 'Entire chalet', 
                   'Lift', 'Smoke-free', 'Entire cottage', 'Library', 'Hot tub', 'Beach access', 
                   'Family Rooms', 'Free Wi-Fi']


def preprocess_hotel_data(row):
    description = f"{row['name']} and its address is {row['address']} in {row['city']}, costing Rs{row['price']} per night. Small description: {row['about']}."
    available_facilities = [facility for facility in facilities_list if row.get(facility, 0) == 1]
    if available_facilities:
        description += " Available facilities: " + ", ".join(available_facilities) + "."
    else:
        description += " No special facilities are listed."
    return description.strip()

hotel_df['description'] = hotel_df.apply(preprocess_hotel_data, axis=1)

def preprocess_restaurant_data(row):
    return f"{row['name']} located at {row['address']} and its phone number is {row['phone_number']} in {row['city']}. The provided services are {row['service']} and opening hour is {row['Open_hour']}."

def preprocess_attraction_data(row):
    return f"{row['Name']} is a popular attraction located in {row['city']}. Its address is {row['Address']}, its category is {row['Category']}, its rating is {row['Rating']}, and its availability time is {row['Availability']}. Description: {row['Description']}."

restaurant_df['description'] = restaurant_df.apply(preprocess_restaurant_data, axis=1)
attraction_df['description'] = attraction_df.apply(preprocess_attraction_data, axis=1)





# Load the GPT-2 tokenizer and model
gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
gpt_model = GPT2LMHeadModel.from_pretrained('gpt2')

# Set the padding token for the GPT-2 tokenizer
gpt_tokenizer.pad_token = gpt_tokenizer.eos_token  # Set padding token to EOS token

class GPT2BERTDataset(Dataset):
    def __init__(self, descriptions, embeddings, tokenizer):
        self.descriptions = descriptions
        self.embeddings = embeddings
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.descriptions)

    def __getitem__(self, idx):
        # Convert text description to tokens
        description = self.descriptions[idx]
        embedding = self.embeddings[idx]
        
        # Tokenize description with padding
        tokenized_input = self.tokenizer.encode(description, return_tensors='pt', padding='max_length', max_length=100, truncation=True).squeeze(0)
        
        return tokenized_input, torch.tensor(embedding, dtype=torch.float32)

# Load embeddings from the specified files
hotel_embedding_matrix = np.load('hotel_embeddings.npy')  # Load hotel embeddings
restaurant_embedding_matrix = np.load('restaurant_embeddings.npy')  # Load restaurant embeddings
attraction_embedding_matrix = np.load('attraction_embeddings.npy')  # Load attraction embeddings

# Prepare datasets for hotels, restaurants, and attractions
hotel_descriptions = hotel_df['description'].tolist()  # Hotel descriptions
restaurant_descriptions = restaurant_df['description'].tolist()  # Restaurant descriptions
attraction_descriptions = attraction_df['description'].tolist()  # Attraction descriptions

# Create datasets for each type
hotel_dataset = GPT2BERTDataset(hotel_descriptions, hotel_embedding_matrix, gpt_tokenizer)
restaurant_dataset = GPT2BERTDataset(restaurant_descriptions, restaurant_embedding_matrix, gpt_tokenizer)
attraction_dataset = GPT2BERTDataset(attraction_descriptions, attraction_embedding_matrix, gpt_tokenizer)

# Combine datasets into one
combined_dataset = ConcatDataset([hotel_dataset, restaurant_dataset, attraction_dataset])
dataloader = DataLoader(combined_dataset, batch_size=8, shuffle=True)

# Fine-tuning GPT-2 with BERT embeddings
optimizer = AdamW(gpt_model.parameters(), lr=5e-5,weight_decay=1e-2)

scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2, eta_min=1e-6)

# Training loop
for epoch in range(5):  # Adjust number of epochs as necessary
    gpt_model.train()
    total_loss = 0
    for batch_idx, batch in enumerate(dataloader):
        inputs, bert_embeddings = batch
        
        # Move tensors to the appropriate device (GPU if available)
        inputs = inputs.to(gpt_model.device)
        bert_embeddings = bert_embeddings.to(gpt_model.device)

        # Forward pass through GPT-2
        outputs = gpt_model(inputs, labels=inputs)
        
        # Calculate loss and backpropagate
        loss = outputs.loss
        total_loss += loss.item()  # Accumulate total loss
        optimizer.zero_grad()
        
        loss.backward()       
        # Apply gradient clipping
        torch.nn.utils.clip_grad_norm_(gpt_model.parameters(), max_norm=1.0)       
        # Update parameters and scheduler
        optimizer.step()
        scheduler.step()

    
        
        # Print details of each batch
        print(f"Epoch {epoch + 1}, Batch {batch_idx + 1}, Loss: {loss.item()}, Inputs shape: {inputs.shape}, Bert Embeddings shape: {bert_embeddings.shape}")

    # Print the average loss after each epoch
    average_loss = total_loss / len(dataloader)
    print(f"Epoch {epoch + 1} completed. Average Loss: {average_loss}")

# Save the fine-tuned GPT-2 model and tokenizer
gpt_model.save_pretrained('fine_tuned_gpt2')
gpt_tokenizer.save_pretrained('fine_tuned_gpt2')

def generate_response_gpt(prompt):
    inputs = gpt_tokenizer.encode(prompt, return_tensors='pt')
    with torch.no_grad():
        # Adjust the model to take embeddings into account if modified
        outputs = gpt_model.generate(inputs, max_length=100, num_return_sequences=1, temperature=0.7, top_p=0.9)
    
    response = gpt_tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(f"Generated Response: {response}")  # Print the generated response
    return response

# Example usage of the response generation
prompt = "What is the best hotel in Islamabad?"
generate_response_gpt(prompt)




Epoch 1, Batch 1, Loss: 7.114327430725098, Inputs shape: torch.Size([8, 100]), Bert Embeddings shape: torch.Size([8, 768])
Epoch 1, Batch 2, Loss: 5.512115955352783, Inputs shape: torch.Size([8, 100]), Bert Embeddings shape: torch.Size([8, 768])
Epoch 1, Batch 3, Loss: 4.516782283782959, Inputs shape: torch.Size([8, 100]), Bert Embeddings shape: torch.Size([8, 768])
Epoch 1, Batch 4, Loss: 4.149557590484619, Inputs shape: torch.Size([8, 100]), Bert Embeddings shape: torch.Size([8, 768])
Epoch 1, Batch 5, Loss: 3.452545404434204, Inputs shape: torch.Size([8, 100]), Bert Embeddings shape: torch.Size([8, 768])
Epoch 1, Batch 6, Loss: 3.707782506942749, Inputs shape: torch.Size([8, 100]), Bert Embeddings shape: torch.Size([8, 768])
Epoch 1, Batch 7, Loss: 3.983088970184326, Inputs shape: torch.Size([8, 100]), Bert Embeddings shape: torch.Size([8, 768])
Epoch 1, Batch 8, Loss: 3.302690029144287, Inputs shape: torch.Size([8, 100]), Bert Embeddings shape: torch.Size([8, 768])
Epoch 1, Batch 9

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Generated Response: What is the best hotel in Islamabad? and its address is address not available in Islamabad, costing Rs12648 per night. Small description: Check-in time: 14:00Check-out time: 12:00. Available facilities: Conference Room, Spa, Free Wi-Fi.


'What is the best hotel in Islamabad? and its address is address not available in Islamabad, costing Rs12648 per night. Small description: Check-in time: 14:00Check-out time: 12:00. Available facilities: Conference Room, Spa, Free Wi-Fi.'

In [1]:
import numpy as np
from transformers import GPT2Tokenizer, GPT2LMHeadModel, AdamW

from torch.utils.data import Dataset, DataLoader, ConcatDataset
import pandas as pd
import re
import torch
from sklearn.metrics.pairwise import cosine_similarity
import os
from nltk.corpus import wordnet as wn
import nltk
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts



# Load data from files and handle missing data
def load_data(file_path, file_type='csv'):
    if not os.path.exists(file_path):
        print(f"Error: {file_path} not found.")
        return None
    try:
        if file_type == 'csv':
            return pd.read_csv(file_path)
        elif file_type == 'json':
            return pd.read_json(file_path)
    except Exception as e:
        print(f"Error loading {file_path}: {e}")
        return None

# Load all required datasets
hotel_df = load_data('new_hotel_data.csv')
ratings_df = load_data('clean_hotel_review_data.json', 'json')
restaurant_df = load_data('new_restaurant_db.restaurants_data.json', 'json')
attraction_df = load_data('attraction.csv')
if hotel_df is None or ratings_df is None or restaurant_df is None or attraction_df is None:
    print("Error: One or more required datasets are missing or improperly loaded. Please check your data files.")
    exit()

# Merge hotel ratings with hotel data on 'hotel_id'
merged_df = hotel_df.merge(ratings_df, on='hotel_id', how='left')
average_ratings = merged_df.groupby('hotel_id')['rating'].mean().reset_index()
average_ratings.columns = ['hotel_id', 'average_rating']
hotel_df = hotel_df.merge(average_ratings, on='hotel_id', how='left')
hotel_df.rename(columns={'average_rating': 'Rating'}, inplace=True)

restaurant_ratings_df = load_data('new_restaurant_db.restaurants_reviews.json', 'json')
restaurant_merged_df = restaurant_df.merge(restaurant_ratings_df, on='restaurant_id', how='left')
average_restaurant_ratings = restaurant_merged_df.groupby('restaurant_id')['rating'].mean().reset_index()
average_restaurant_ratings.columns = ['restaurant_id', 'average_restaurant_ratings']
restaurant_df = restaurant_df.merge(average_restaurant_ratings, on='restaurant_id', how='left')
restaurant_df.rename(columns={'average_restaurant_ratings': 'Rating'}, inplace=True)
restaurant_df['Rating'] = restaurant_df['Rating'].fillna(3.5)

# List of facilities
facilities_list = ['Cable TV', 'Entire apartment', 'Oven stove', 'Car Rental Service', 'Ironing board', 
                   'bathroom', 'Washing machine', 'Cot', 'Entire bungalow', 'Conference Room', 
                   'Fitness Center', 'Outdoor grill', 'Fitness centre', 'Pet-friendly', 'Entire house',
                   'Pet Friendly', 'Airport shuttle', 'Terrace', 'Room Service', 'BBQ Facilities', 
                   '24-Hour Front Desk', 'Child-friendly', 'Kitchen', 'Microwave', 'Free parking', 
                   'Breakfast Buffet', 'Swimming Pool', 'Paid parking', 'Entire villa', 'Air conditioning', 
                   'Balcony', 'Spa', 'Business Center', 'Free breakfast', 'Currency Exchange', 
                   'Outdoor pool', 'Air Conditioning', 'Laundry', 'Golf Course', 'cafe', 'Indoor pool', 
                   'Parking', 'Garden', 'Patio', 'Fireplace', 'Restaurant', 'Smoking Rooms', 
                   'Babysitting Service', 'Heating', 'Paid Wi-Fi', 'Wheelchair accessible', 'Entire chalet', 
                   'Lift', 'Smoke-free', 'Entire cottage', 'Library', 'Hot tub', 'Beach access', 
                   'Family Rooms', 'Free Wi-Fi']


def preprocess_hotel_data(row):
    description = f"{row['name']} and its address is {row['address']} in {row['city']}, costing Rs{row['price']} per night. Small description: {row['about']}."
    available_facilities = [facility for facility in facilities_list if row.get(facility, 0) == 1]
    if available_facilities:
        description += " Available facilities: " + ", ".join(available_facilities) + "."
    else:
        description += " No special facilities are listed."
    return description.strip()

hotel_df['description'] = hotel_df.apply(preprocess_hotel_data, axis=1)

def preprocess_restaurant_data(row):
    return f"{row['name']} located at {row['address']} and its phone number is {row['phone_number']} in {row['city']}. The provided services are {row['service']} and opening hour is {row['Open_hour']}."

def preprocess_attraction_data(row):
    return f"{row['Name']} is a popular attraction located in {row['city']}. Its address is {row['Address']}, its category is {row['Category']}, its rating is {row['Rating']}, and its availability time is {row['Availability']}. Description: {row['Description']}."

restaurant_df['description'] = restaurant_df.apply(preprocess_restaurant_data, axis=1)
attraction_df['description'] = attraction_df.apply(preprocess_attraction_data, axis=1)





# Load the GPT-2 tokenizer and model
gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
gpt_model = GPT2LMHeadModel.from_pretrained('gpt2')

# Set the padding token for the GPT-2 tokenizer
gpt_tokenizer.pad_token = gpt_tokenizer.eos_token  # Set padding token to EOS token

class GPT2BERTDataset(Dataset):
    def __init__(self, descriptions, embeddings, tokenizer):
        self.descriptions = descriptions
        self.embeddings = embeddings
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.descriptions)

    def __getitem__(self, idx):
        # Convert text description to tokens
        description = self.descriptions[idx]
        embedding = self.embeddings[idx]
        
        # Tokenize description with padding
        tokenized_input = self.tokenizer.encode(description, return_tensors='pt', padding='max_length', max_length=100, truncation=True).squeeze(0)
        
        return tokenized_input, torch.tensor(embedding, dtype=torch.float32)

# Load embeddings from the specified files
hotel_embedding_matrix = np.load('hotel_embeddings.npy')  # Load hotel embeddings
restaurant_embedding_matrix = np.load('restaurant_embeddings.npy')  # Load restaurant embeddings
attraction_embedding_matrix = np.load('attraction_embeddings.npy')  # Load attraction embeddings

# Prepare datasets for hotels, restaurants, and attractions
hotel_descriptions = hotel_df['description'].tolist()  # Hotel descriptions
restaurant_descriptions = restaurant_df['description'].tolist()  # Restaurant descriptions
attraction_descriptions = attraction_df['description'].tolist()  # Attraction descriptions

# Create datasets for each type
hotel_dataset = GPT2BERTDataset(hotel_descriptions, hotel_embedding_matrix, gpt_tokenizer)
restaurant_dataset = GPT2BERTDataset(restaurant_descriptions, restaurant_embedding_matrix, gpt_tokenizer)
attraction_dataset = GPT2BERTDataset(attraction_descriptions, attraction_embedding_matrix, gpt_tokenizer)

# Combine datasets into one
combined_dataset = ConcatDataset([hotel_dataset, restaurant_dataset, attraction_dataset])
dataloader = DataLoader(combined_dataset, batch_size=8, shuffle=True)

# Fine-tuning GPT-2 with BERT embeddings
optimizer = AdamW(gpt_model.parameters(), lr=5e-5,weight_decay=1e-2)

scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2, eta_min=1e-6)

# Training loop
for epoch in range(5):  # Adjust number of epochs as necessary
    gpt_model.train()
    total_loss = 0
    for batch_idx, batch in enumerate(dataloader):
        inputs, bert_embeddings = batch
        
        # Move tensors to the appropriate device (GPU if available)
        inputs = inputs.to(gpt_model.device)
        bert_embeddings = bert_embeddings.to(gpt_model.device)

        # Forward pass through GPT-2
        outputs = gpt_model(inputs, labels=inputs)
        
        # Calculate loss and backpropagate
        loss = outputs.loss
        total_loss += loss.item()  # Accumulate total loss
        optimizer.zero_grad()
        
        loss.backward()       
        # Apply gradient clipping
        torch.nn.utils.clip_grad_norm_(gpt_model.parameters(), max_norm=1.0)       
        # Update parameters and scheduler
        optimizer.step()
        scheduler.step()

    
        
        # Print details of each batch
        print(f"Epoch {epoch + 1}, Batch {batch_idx + 1}, Loss: {loss.item()}, Inputs shape: {inputs.shape}, Bert Embeddings shape: {bert_embeddings.shape}")

    # Print the average loss after each epoch
    average_loss = total_loss / len(dataloader)
    print(f"Epoch {epoch + 1} completed. Average Loss: {average_loss}")

# Save the fine-tuned GPT-2 model and tokenizer
gpt_model.save_pretrained('fine_tuned_gpt2')
gpt_tokenizer.save_pretrained('fine_tuned_gpt2')

def generate_response_gpt(prompt):
    inputs = gpt_tokenizer.encode(prompt, return_tensors='pt')
    with torch.no_grad():
        # Adjust the model to take embeddings into account if modified
        outputs = gpt_model.generate(inputs, max_length=100, num_return_sequences=1, temperature=0.7, top_p=0.9)
    
    response = gpt_tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(f"Generated Response: {response}")  # Print the generated response
    return response

# Example usage of the response generation
prompt = "What is the best hotel in Islamabad?"
generate_response_gpt(prompt)




Epoch 1, Batch 1, Loss: 7.442346572875977, Inputs shape: torch.Size([8, 100]), Bert Embeddings shape: torch.Size([8, 768])
Epoch 1, Batch 2, Loss: 6.737992763519287, Inputs shape: torch.Size([8, 100]), Bert Embeddings shape: torch.Size([8, 768])
Epoch 1, Batch 3, Loss: 5.2877092361450195, Inputs shape: torch.Size([8, 100]), Bert Embeddings shape: torch.Size([8, 768])
Epoch 1, Batch 4, Loss: 4.9482269287109375, Inputs shape: torch.Size([8, 100]), Bert Embeddings shape: torch.Size([8, 768])
Epoch 1, Batch 5, Loss: 4.0254998207092285, Inputs shape: torch.Size([8, 100]), Bert Embeddings shape: torch.Size([8, 768])
Epoch 1, Batch 6, Loss: 4.365288734436035, Inputs shape: torch.Size([8, 100]), Bert Embeddings shape: torch.Size([8, 768])
Epoch 1, Batch 7, Loss: 4.031280517578125, Inputs shape: torch.Size([8, 100]), Bert Embeddings shape: torch.Size([8, 768])
Epoch 1, Batch 8, Loss: 4.055359840393066, Inputs shape: torch.Size([8, 100]), Bert Embeddings shape: torch.Size([8, 768])
Epoch 1, Batc

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Generated Response: What is the best hotel in Islamabad? and its address is address not available in Islamabad, costing Rs12648 per night. Small description: Check-out time: 12:00. Available facilities: Terrace, Spa, Restaurant.


'What is the best hotel in Islamabad? and its address is address not available in Islamabad, costing Rs12648 per night. Small description: Check-out time: 12:00. Available facilities: Terrace, Spa, Restaurant.'

In [10]:
import numpy as np
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel, AdamW
from sklearn.metrics.pairwise import cosine_similarity

# Load fine-tuned model and tokenizer
gpt_tokenizer = GPT2Tokenizer.from_pretrained('fine_tuned_gpt2')
gpt_model = GPT2LMHeadModel.from_pretrained('fine_tuned_gpt2')
gpt_model.eval()  # Set model to evaluation mode

# Set the pad_token_id to match the EOS token to handle padding correctly
gpt_tokenizer.pad_token = gpt_tokenizer.eos_token  # Set padding token to EOS token
gpt_model.config.pad_token_id = gpt_tokenizer.eos_token_id  # Set pad token ID to eos_token_id

# Load embeddings from the specified files
hotel_embedding_matrix = np.load('hotel_embeddings.npy')  # Load hotel embeddings
restaurant_embedding_matrix = np.load('restaurant_embeddings.npy')  # Load restaurant embeddings
attraction_embedding_matrix = np.load('attraction_embeddings.npy')  # Load attraction embeddings


# Generate response based on the prompt and add top suggestions if requested
def generate_response_gpt(prompt):
    # Encode the prompt and set up attention mask
    inputs = gpt_tokenizer.encode(prompt, return_tensors='pt', padding=True, truncation=True, max_length=100)
    attention_mask = (inputs != gpt_tokenizer.pad_token_id).long()

    with torch.no_grad():
        # Generate response with attention mask and pad token ID
        outputs = gpt_model.generate(
            inputs,
            max_length=100,
            num_return_sequences=1,
            temperature=0.7,
            top_p=0.9,
            attention_mask=attention_mask,
            pad_token_id=gpt_tokenizer.eos_token_id
        )

    response = gpt_tokenizer.decode(outputs[0], skip_special_tokens=True)

    return response

# User interaction loop
def chatbot():
    print("Welcome to the STELS Chatbot! Type 'exit' to end the chat.")
    while True:
        user_input = input("You: ")
        
        # Exit condition
        if user_input.lower() == "exit":
            print("Chatbot: Thank you for chatting. Goodbye!")
            break
        
        # Generate and print response
        response = generate_response_gpt(user_input)
        print(f"Chatbot: {response}")

# Run chatbot interaction
chatbot()


Welcome to the STELS Chatbot! Type 'exit' to end the chat.


You:  suggest top 5 hotel according to rating inLahore


Chatbot: suggest top 5 hotel according to rating inLahore and its address is address not available in Lahore, costing Rs12648 per night. Small description: Check-out time: 12:00. Available facilities: Terrace, Spa, Restaurant.


You:  suggest  attraction spot in Islmabad  


Chatbot: suggest  attraction spot in Islmabad   located at WJ2X+QG9, Naran, Mansehra, Khyber Pakhtunkhwa and its phone number is 0300 8480022 in Naran. The provided services are not available and opening hour is not available.


You:  exit


Chatbot: Thank you for chatting. Goodbye!
