<a href="https://colab.research.google.com/github/Savisolanki/Flight_Booking-NLP/blob/main/Flight_Booking_using_NLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:

import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import torch
from transformers import BertTokenizer, BertModel

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')


In [3]:
def preprocess_text(text):
    return text.lower().strip()

# Function to generate BERT embeddings
def generate_bert_embeddings(text):
    inputs = tokenizer(text, return_tensors='pt', max_length=512, truncation=True)
    with torch.no_grad():
        outputs = model(**inputs)
    cls_embedding = outputs.last_hidden_state[:, 0, :].numpy()
    return cls_embedding


In [4]:
df_flight = pd.read_csv('/content/Flight Booking.csv')

# Sample 1000 records for faster execution
df_flight_sample = df_flight.sample(n=1000).copy()

# Preprocess text data and generate BERT embeddings
df_flight_sample['source_city'] = df_flight_sample['source_city'].apply(preprocess_text)
df_flight_sample['destination_city'] = df_flight_sample['destination_city'].apply(preprocess_text)
df_flight_sample['text_combined'] = df_flight_sample['source_city'] + ' to ' + df_flight_sample['destination_city']
df_flight_sample['bert_embeddings'] = df_flight_sample['text_combined'].apply(generate_bert_embeddings)


In [5]:
def calculate_similarity(user_input_embedding, flight_embeddings):
    similarities = cosine_similarity(user_input_embedding.reshape(1, -1), flight_embeddings)
    return similarities.flatten()

def generate_bert_embeddings_batch(texts):
    inputs = tokenizer(texts, return_tensors='pt', max_length=512, truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    cls_embeddings = outputs.last_hidden_state[:, 0, :].numpy()
    return cls_embeddings


In [8]:
def process_and_recommend(user_input, df_sample):
    # Tokenize user input and obtain BERT embedding
    user_input_embedding = generate_bert_embeddings(user_input)

    batch_size = 100
    similarities = []
    for i in range(0, len(df_sample), batch_size):
        batch_texts = df_sample['text_combined'].iloc[i:i+batch_size].tolist()
        batch_embeddings = generate_bert_embeddings_batch(batch_texts)
        similarity_batch = calculate_similarity(user_input_embedding, batch_embeddings)
        similarities.extend(similarity_batch)
    df_sample['similarity_score'] = similarities

    # Sort DataFrame by similarity score
    df_sorted = df_sample.sort_values(by='similarity_score', ascending=False)

    top_recommendations = df_sorted[['airline', 'flight', 'price']].head(5)
    return top_recommendations

# Function to filter flights by airline
def filter_flights_by_airline(df_sample, airline):
    filtered_df = df_sample[df_sample['airline'].str.lower() == airline.lower()]
    return filtered_df


In [10]:
while True:
    user_input = input("You: ")
    if user_input.lower() == 'exit':
        print("Bot: Goodbye!")
        break
    else:
        airline_keywords = ['only', 'flights', 'of']
        if any(keyword in user_input.lower() for keyword in airline_keywords):
            # Extract airline name from user input
            words = user_input.split()
            airline_index = words.index('of') if 'of' in words else len(words) - 1
            airline = ' '.join(words[airline_index + 1:])
            df_filtered = filter_flights_by_airline(df_flight_sample, airline)
            recommendations = process_and_recommend(user_input, df_filtered)
        else:
            recommendations = process_and_recommend(user_input, df_flight_sample)

        print("Bot: Here are the top flight recommendations based on your input:")
        print(recommendations)


You: I want to book a ticket from Mumbai To Delhi
Bot: Here are the top flight recommendations based on your input:
        airline   flight    price
152638  AirAsia  I5-1543   6131.0
153225  Vistara   UK-874  10018.0
153601   Indigo   6E-913   9073.0
154395  AirAsia   I5-974   2228.0
153266  Vistara   UK-894  11488.0
You: Show me the flight with less than 4 hours.
Bot: Here are the top flight recommendations based on your input:
        airline   flight   price
30397   Vistara   UK-927  5772.0
33061    Indigo  6E-6205  3013.0
29168   Vistara   UK-995  9840.0
31726   Vistara   UK-637  6533.0
32916  GO_FIRST   G8-422  2098.0
You: I need a morning flight from Mumbai To Kolkata M
Bot: Here are the top flight recommendations based on your input:
        airline   flight    price
3679  Air_India   AI-429   4170.0
4769  Air_India   AI-429   4170.0
8748   GO_FIRST   G8-266  11291.0
3254    Vistara   UK-879   5227.0
4193  Air_India  AI-9609   6654.0
You: exit
Bot: Goodbye!
