In [1]:
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

In [2]:
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to C:\Users\Aaditya
[nltk_data]     Mehta\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to C:\Users\Aaditya
[nltk_data]     Mehta\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [3]:
data=pd.read_csv("Flight_Booking_System.flights.csv")

In [4]:
data.head()

Unnamed: 0,_id,airline,flightNumber,origin,destination,daysOfWeek,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Sunday,scheduledDepartureTime,scheduledArrivalTime,flightTime,flightTimeMinutes,rate,seatAvail
0,65e3679ccb66225971331563,GoAir,562,Kochi,Mumbai,"Sunday,Monday,Tuesday,Wednesday,Thursday,Frida...",Y,Y,Y,Y,Y,Y,Y,00:00,10:50,10:50,650,1200.0,180
1,65e3679ccb66225971331564,IndiGo,122,Pune,Goa,"Sunday,Monday,Tuesday,Wednesday,Thursday,Frida...",Y,Y,Y,Y,Y,Y,Y,00:05,01:05,01:00,60,2040.0,180
2,65e3679ccb66225971331565,IndiGo,6825,Bengaluru,Chennai,"Sunday,Monday,Tuesday,Wednesday,Thursday,Frida...",Y,Y,Y,Y,Y,Y,Y,00:05,01:10,01:05,65,2210.0,180
3,65e3679ccb66225971331566,IndiGo,5221,Mumbai,Goa,Tuesday,N,Y,N,N,N,N,N,00:05,01:15,01:10,70,2380.0,180
4,65e3679ccb66225971331567,GoAir,380,Goa,Mumbai,"Sunday,Monday,Tuesday,Wednesday,Thursday,Frida...",Y,Y,Y,Y,Y,Y,Y,00:05,01:20,01:15,75,2550.0,180


In [5]:
def preprocess_text(text):
    stop_words=set(stopwords.words('english'))
    tokens=word_tokenize(text)
    tokens = [word.lower() for word in tokens if word.isalpha() and word not in stop_words]
    return tokens

In [6]:
def display_flights(origin,dest):
    mydata = data[(data['origin'] == origin) & (data['destination'] == dest)]
    return mydata

In [7]:
def handle_specific_query(filtered_data, query):
    tokens = preprocess_text(query)
    # Handling a query about the cheapest flight by returning only the specified columns
    if any(word in tokens for word in ['cheapest','least']):
        cheapest_flight = filtered_data.loc[filtered_data['rate'].idxmin()]
        selected_columns = cheapest_flight[['_id', 'airline', 'flightNumber', 'origin', 'destination', 'rate']]
        return selected_columns.to_string()
    
    elif any(word in tokens for word in ['costliest','expensive']):
        cheapest_flight = filtered_data.loc[filtered_data['rate'].idxmax()]
        selected_columns = cheapest_flight[['_id', 'airline', 'flightNumber', 'origin', 'destination', 'rate']]
        return selected_columns.to_string()
    
    elif any(word in tokens for word in ['shortest', 'fastest', 'quickest', 'quick', 'short', 'quickly']):
        quickest_flight = filtered_data.loc[filtered_data['flightTimeMinutes'].idxmin()]
        selected_columns = quickest_flight[['_id', 'airline', 'flightNumber', 'origin', 'destination', 'flightTime']]
        return selected_columns.to_string()
    
    elif any(word in tokens for word in ['longest', 'long','slowest','slowly','slow']):
        quickest_flight = filtered_data.loc[filtered_data['flightTimeMinutes'].idxmax()]
        selected_columns = quickest_flight[['_id', 'airline', 'flightNumber', 'origin', 'destination', 'flightTime']]
        return selected_columns.to_string()
    
    
    elif any(word in tokens for word in ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']):
        # Filter flights based on the days of operation
        response = ""
        for _, row in filtered_data.iterrows():
            days_operating = row['daysOfWeek'].split(',')
            if any(token.capitalize() in days_operating for token in tokens):
                response += row[['_id', 'airline', 'flightNumber', 'origin', 'destination', 'rate']].to_string() + "\n\n"
        return response if response else "No flights operate on the given day(s)."
    
    
    elif any(word in tokens for word in ['weekday','weekend']):
        response = ""
        for _, row in filtered_data.iterrows():
            days_operating = row['daysOfWeek'].split(',')
            match_found = False
            # Check for weekend/weekday
            if 'weekend' in tokens and any(day in days_operating for day in ['Saturday', 'Sunday']):
                match_found = True
            if 'weekday' in tokens and any(day in days_operating for day in ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']):
                match_found = True
            if match_found:
                response += row[['_id', 'airline', 'flightNumber', 'origin', 'destination', 'rate']].to_string() + "\n\n"
        return response if response else "No flights operate on the given day(s)."
        
    return "I'm sorry, I can't answer that question."


In [8]:
def handle_connecting_flights(mydata, user_query):
    tokens = preprocess_text(user_query)
    
    # Function to convert minutes to "hours:minutes" format
    def minutes_to_hours(minutes):
        return f"{minutes // 60}h {minutes % 60}m"
    
    if any(word in tokens for word in ['cheapest', 'least']):
        cheapest_flight = mydata.loc[mydata['Total Rate'].idxmin()]
        # Convert 'Total Flight Time Minutes' back to "hours:minutes"
        cheapest_flight['Total Flight Time'] = minutes_to_hours(cheapest_flight['Total Flight Time Minutes'])
        selected_columns = cheapest_flight[['Origin', 'Layover City', 'Destination', 'Departure from Origin', 'Arrival at Layover', 'Departure from Layover', 'Arrival at Destination', 'Total Rate', 'Total Flight Time']]
        return selected_columns.to_string()
    
    elif any(word in tokens for word in ['expensive', 'costliest']):
        expensive_flight = mydata.loc[mydata['Total Rate'].idxmax()]
        expensive_flight['Total Flight Time'] = minutes_to_hours(expensive_flight['Total Flight Time Minutes'])
        selected_columns = expensive_flight[['Origin', 'Layover City', 'Destination', 'Departure from Origin', 'Arrival at Layover', 'Departure from Layover', 'Arrival at Destination', 'Total Rate', 'Total Flight Time']]
        return selected_columns.to_string()
    
    elif any(word in tokens for word in ['shortest', 'quickest','fastest']):
        shortest_flight = mydata.loc[mydata['Total Flight Time Minutes'].idxmin()]
        shortest_flight['Total Flight Time'] = minutes_to_hours(shortest_flight['Total Flight Time Minutes'])
        selected_columns = shortest_flight[['Origin', 'Layover City', 'Destination', 'Departure from Origin', 'Arrival at Layover', 'Departure from Layover', 'Arrival at Destination', 'Total Rate', 'Total Flight Time']]
        return selected_columns.to_string()
    
    elif any(word in tokens for word in ['longest','slowest']):
        longest_flight = mydata.loc[mydata['Total Flight Time Minutes'].idxmax()]
        longest_flight['Total Flight Time'] = minutes_to_hours(longest_flight['Total Flight Time Minutes'])
        selected_columns = longest_flight[['Origin', 'Layover City', 'Destination', 'Departure from Origin', 'Arrival at Layover', 'Departure from Layover', 'Arrival at Destination', 'Total Rate', 'Total Flight Time']]
        return selected_columns.to_string()



In [9]:
def check_direct_flight_exists(origin, destination):
    """
    Checks if there is a direct flight from the origin to the destination.
    
    Parameters:
    - origin (str): The origin airport code.
    - destination (str): The destination airport code.
    
    Returns:
    - bool: True if there is at least one direct flight, False otherwise.
    """
    direct_flights = data[(data['origin'].str.lower() == origin.lower()) & (data['destination'].str.lower() == destination.lower())]
    return not direct_flights.empty

In [10]:
def find_connecting_flights(origin, dest):
    first_legs = data[data['origin'].str.lower() == origin.lower()]
    second_legs = data[data['destination'].str.lower() == dest.lower()]

    potential_connections = pd.merge(first_legs, second_legs, left_on='destination', right_on='origin', suffixes=('_first', '_second'))
    potential_connections = potential_connections[potential_connections['scheduledArrivalTime_first'] < potential_connections['scheduledDepartureTime_second']]

    if not potential_connections.empty:
        potential_connections['_id'] = potential_connections['_id_first'].astype(str) + "," + potential_connections['_id_second'].astype(str)
        potential_connections['Origin'] = potential_connections['origin_first']
        potential_connections['Layover City'] = potential_connections['destination_first']
        potential_connections['Destination'] = potential_connections['destination_second']
        potential_connections['Departure from Origin'] = potential_connections['scheduledDepartureTime_first']
        potential_connections['Arrival at Layover'] = potential_connections['scheduledArrivalTime_first']
        potential_connections['Departure from Layover'] = potential_connections['scheduledDepartureTime_second']
        potential_connections['Arrival at Destination'] = potential_connections['scheduledArrivalTime_second']
        potential_connections['Total Rate'] = potential_connections['rate_first'] + potential_connections['rate_second']
        
        # Calculating total flight time in minutes and storing it directly
        potential_connections['Total Flight Time Minutes'] = potential_connections['flightTimeMinutes_first'] + potential_connections['flightTimeMinutes_second']

        return potential_connections if not potential_connections.empty else "No connecting flights found."

    else:
        return "No connecting flights found."


In [None]:
origin=input("This is your flight booking ashttp://localhost:8889/notebooks/OneDrive/Desktop/Python%20project/Chatbot-2(direct%20flights).ipynb#sistant. Enter your flight origin: ")
destination=input("Enter your flight destination: ")

if check_direct_flight_exists(origin, destination):
    filtered_data=display_flights(origin, destination)
    while True:
        user_query=input("Ask me about the flight minutes, rate, or days of operation (or type 'quit' to exit): ")
        if user_query.lower()=='quit':
            break
        response=handle_specific_query(filtered_data, user_query)
        print("Response:")
        print(response)
else:
    mydata = find_connecting_flights(origin, destination)
    print("There are no direct flights available. Below are the list of connecting flights")
    while True:
        user_query2 = input("Ask me about the flight minutes or rate (or type 'quit' to exit): ")
        if user_query2.lower() == 'quit':  # Corrected from user_query.lower2() to user_query2.lower()
            break
        response2 = handle_connecting_flights(mydata, user_query2)  # Pass the correct DataFrame or data structure
        print("Response:")
        print(response2)