In [3]:
import os
import json
import openai
from datetime import datetime, timedelta
from dotenv import load_dotenv
from openai import OpenAI
import pandas as pd
from amadeus import Client, ResponseError

In [4]:
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

In [6]:
amadeus = Client(
     client_id='H4S446vjDRHVJn8C0ZDRkXSLt03AvOGp',
     client_secret='2MCfJEBzkSLqYla0',
     #log_level='debug'

 )


In [7]:
client = OpenAI()

In [8]:
def get_airport_code(location):
    """Get the airport code for a given location string."""
    try:
        response = amadeus.reference_data.locations.get(
            keyword=location,
            subType='AIRPORT'
        )
        if response.data:
            # Return the first matching airport code
            return response.data[0]['iataCode']
        else:
            print(f"No airport code found for {location}")
            return None
    except ResponseError as error:
        print(f"Error finding airport code for {location}: {error}")
        return None

# Flight info API

In [10]:
def extract_flight_info_from_query(query):
    today = datetime.now()
    current_date_str = today.strftime('%Y-%m-%d')
    messages = [
        {
            "role": "system",
            "content": (
                "You are an assistant that helps extract flight information from user queries. "
                f"Today is {current_date_str}. Extract the following details from the query: "
                "1. location_origin: The departure city or airport, ensure it corresponds to a valid airport code "
                "2. location_destination: The destination city or airport, ensure it corresponds to a valid airport code "
                "3. departure_date: The date of departure "
                "4. adults: The number of adult passengers. (If there is no information regarding this then consider only 1 Adult is there)"
                "If the query specifies a relative date (e.g., 'next Monday'), convert it to an absolute date. "
                "Provide the information in JSON format as follows: "
                '{"location_origin": "origin", "location_destination": "destination", "departure_date": "YYYY-MM-DD", "adults": number_of_adults}'
            )
        },
        {
            "role": "user",
            "content": query
        }
    ]

    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        max_tokens=300,
        temperature=0.1 
    )

    response_text = response.choices[0].message.content.strip()
    try:
        flight_info = json.loads(response_text)
        # Basic validation
        required_keys = ["location_origin", "location_destination", "departure_date", "adults"]
        if not all(key in flight_info for key in required_keys):
            raise ValueError("Incomplete response from LLM")
        return flight_info
    except (json.JSONDecodeError, ValueError) as e:
        print(f"Error extracting flight info: {e}")
        return None


In [11]:
def get_flight_info(location_origin, location_destination, departure_date, adults=1):
    """Get flight information between two locations."""
    origin_code = location_origin
    destination_code = location_destination
    
    if not origin_code or not destination_code:
        return "Could not find airport codes for the provided locations."

    try:
        response = amadeus.shopping.flight_offers_search.get(
            originLocationCode=origin_code,
            destinationLocationCode=destination_code,
            departureDate=departure_date,
            adults=adults
        )
        return response.data
    except ResponseError as error:
        return str(error)

In [12]:
def create_flight_dataframe(flight_data):
    """Create a Pandas DataFrame from the flight data."""
    flight_details = []

    # Get the airline names from the flight offers
    airlines = set()
    for offer in flight_data:
        for segment in offer['itineraries'][0]['segments']:
            airlines.add(segment['carrierCode'])

    # Fetch airline names using the IATA codes
    airline_names = {}
    for airline_code in airlines:
        airline_response = amadeus.reference_data.airlines.get(airlineCodes=airline_code)
        if airline_response.data:
                airline_names[airline_code] = airline_response.data[0]['commonName']

    for flight in flight_data:
        total_price = flight['price'].get('total', '')
        currency = flight['price'].get('currency', '')
        one_way = len(flight['itineraries']) == 1
        
        for itinerary in flight['itineraries']:
            num_stops = len(itinerary['segments']) - 1
            
            for segment in itinerary['segments']:
                airline_code = segment.get('carrierCode', '')
                airline_name =  airline_names.get(segment['carrierCode'], '')
                from_ = segment.get(0, {}).get('departure', {}).get('iataCode', '')
                from_terminal = segment.get(0, {}).get('departure', {}).get('terminal', '')
                to = segment.get(0, {}).get('arrival', {}).get('iataCode', '')
                to_terminal = segment.get(0, {}).get('arrival', {}).get('terminal', '')
                departure = segment['departure'].get('at', '')
                arrival = segment['arrival'].get('at', '')
                cabin = segment.get('travelerPricings', [{}])[0].get('fareDetailsBySegment', [{}])[0].get('cabin', '')
                pricing_detail = segment.get('pricingDetailPerAdult', {})
               

                flight_details.append({
                    "Airline Code": airline_code,
                    "Airline Name": airline_name,
                    "Departure": departure,
                    "Arrival": arrival,
                    "Total Price": total_price,
                    "Currency": currency,
                    "Number of Stops": num_stops,
                    "Cabin": cabin,
                    "One Way": one_way})
                   
    data = pd.DataFrame(flight_details)
    data.drop_duplicates(inplace=True)
    return data

In [13]:
query = "Give me flight information for Bangalore to Mumbai for next Monday"
result = extract_flight_info_from_query(query)

In [14]:
result

{'location_origin': 'BLR',
 'location_destination': 'BOM',
 'departure_date': '2024-05-27',
 'adults': 1}

In [15]:
origin = result['location_origin']
destination = result['location_destination']
departure_date = result['departure_date']
adults = result['adults']

flight_info = get_flight_info(origin, destination, departure_date)

# Convert the flight info to a DataFrame
if isinstance(flight_info, list):
    flight_df = create_flight_dataframe(flight_info)
else:
    print("Error in forming Dataframe")

1. CHeapest One
2. Less Hops
3. Less travel time
4. User Currency (better to ask / follow up question)

In [16]:
flight_df

Unnamed: 0,Airline Code,Airline Name,Departure,Arrival,Total Price,Currency,Number of Stops,Cabin,One Way
0,UK,VISTARA,2024-05-27T08:30:00,2024-05-27T10:10:00,52.34,EUR,0,,True
1,UK,VISTARA,2024-05-27T06:40:00,2024-05-27T08:25:00,52.34,EUR,0,,True
2,AI,AIR INDIA,2024-05-27T06:40:00,2024-05-27T08:35:00,53.86,EUR,0,,True
3,AI,AIR INDIA,2024-05-27T21:25:00,2024-05-27T23:20:00,53.86,EUR,0,,True
4,AI,AIR INDIA,2024-05-27T09:20:00,2024-05-27T11:20:00,53.86,EUR,0,,True
...,...,...,...,...,...,...,...,...,...
111,H1,HAHN AIR SYSTEMS,2024-05-28T12:00:00,2024-05-28T14:25:00,440.85,EUR,1,,True
113,H1,HAHN AIR SYSTEMS,2024-05-27T18:20:00,2024-05-27T20:45:00,440.85,EUR,1,,True
115,H1,HAHN AIR SYSTEMS,2024-05-27T19:00:00,2024-05-27T21:25:00,440.85,EUR,1,,True
116,H1,HAHN AIR SYSTEMS,2024-05-27T05:40:00,2024-05-27T08:20:00,469.20,EUR,1,,True


# Using Agent

In [40]:
df1 = flight_df.copy()

In [54]:
def prompt_query(query,origin,destination):
    system_prompt = "System Prompt : " + f"You are an assistant that helps to answer queries based on the dataframe provided. This is a dataframe containing flight information from {origin} to {destination}. So answer the question by analyzing dataframe and give information useful for ticket booking only, please refrain from explaining how things are calculated. Don't give the code but analyze the dataframe, calculate values if required by yourself and answer the query."
    query_ask = "Query : " + query
    return f"{system_prompt}\n{query_ask}"


In [76]:
from langchain.agents.agent_types import AgentType
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAI

In [80]:
agent = create_pandas_dataframe_agent(OpenAI(temperature=0), df1, verbose=True)

In [93]:
agent = create_pandas_dataframe_agent(
    ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613"),
    df1,
    verbose=False,
    handle_parsing_errors=True,
    agent_type=AgentType.OPENAI_FUNCTIONS,
)



In [94]:
query = "Give me cheapest flight for Bangalore to Mumbai for next Monday"
query_to_ask = prompt_query(query,origin,destination)

In [95]:
res = agent.invoke(query_to_ask)
res['output'].strip()

TypeError: Completions.create() got an unexpected keyword argument 'functions'

https://python.langchain.com/v0.2/docs/how_to/sql_csv/

# Using Pandas Toolkit

In [17]:
df = flight_df.copy()

In [22]:
from langchain_experimental.agents import create_pandas_dataframe_agent
from langchain_openai import ChatOpenAI

In [23]:
llm = ChatOpenAI(model="gpt-3.5-turbo-0613")

In [24]:
def prompt_query(query,origin,destination):
    main_prompt = f"""
    "You are an assistant that helps to answer queries based on the flight information dataframe provided. 
    This is a dataframe containing flight information from {origin} to {destination}. 
    So answer the question by analyzing dataframe and give direct answers to query and please refrain from explaining how things are calculated. 
    Don't give the code but analyze the dataframe, calculate values if required by yourself and answer the query."

    """
    system_prompt = "System Prompt : " + main_prompt
    query_ask = "Query : " + query
    return f"{system_prompt}\n{query_ask}"


In [25]:
query = "give top 3 flights which are cheapest and also have less travel time"
query_to_ask = prompt_query(query,origin,destination)

In [26]:
print(query_to_ask)

System Prompt : 
    "You are an assistant that helps to answer queries based on the flight information dataframe provided. 
    This is a dataframe containing flight information from BLR to BOM. 
    So answer the question by analyzing dataframe and give direct answers to query and please refrain from explaining how things are calculated. 
    Don't give the code but analyze the dataframe, calculate values if required by yourself and answer the query."

    
Query : give top 3 flights which are cheapest and also have less travel time


In [27]:
agent = create_pandas_dataframe_agent(llm, df, agent_type="openai-tools", verbose=False)
res = agent.invoke(
    {
        "input": query_to_ask
    }
)

print(res['output'])

To find the top 3 flights that are cheapest and have less travel time, we need to sort the dataframe by both the "Total Price" and the time difference between "Departure" and "Arrival". 

Here is the answer:
1. Flight with Airline Code "UK" (VISTARA), Total Price of 52.34 EUR, and travel time of 1 hour and 40 minutes.
2. Flight with Airline Code "AI" (AIR INDIA), Total Price of 52.34 EUR, and travel time of 1 hour and 45 minutes.
3. Flight with Airline Code "AI" (AIR INDIA), Total Price of 52.34 EUR, and travel time of 2 hours.

Please note that the above information is based on the provided dataframe.


# Currency Converter

In [29]:
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents import AgentType

In [30]:

tools = load_tools(["serpapi", "llm-math"], llm=llm)

agent = initialize_agent(
    tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

ValidationError: 1 validation error for SerpAPIWrapper
__root__
  Did not find serpapi_api_key, please add an environment variable `SERPAPI_API_KEY` which contains it, or pass `serpapi_api_key` as a named parameter. (type=value_error)

In [6]:
!pip install chardet

Looking in indexes: https://pypi.org/simple/


In [1]:
import pandas as pd

In [2]:
!pip install openpyxl

Looking in indexes: https://pypi.org/simple/


In [4]:
df = pd.read_excel("IATA_code.xlsx")

ImportError: Missing optional dependency 'openpyxl'.  Use pip or conda to install openpyxl.

In [2]:
import pandas as pd

In [3]:
df = pd.read_csv("code_reference.csv")

In [19]:
df = df[["IATA","Airline"]]

In [20]:
airline = list(df.Airline.values)
code = list(df.IATA.values)

In [21]:
code_dict = dict(zip(code,airline))

In [22]:
code_dict

{'M3': 'ABSA Cargo Airline',
 'JP': 'Adria Airways',
 'A3': 'Aegean Airlines',
 'EI': 'Aer Lingus',
 'P5': 'Aero República',
 'SU': 'Aeroflot',
 'AR': 'Aerolineas Argentinas',
 '2K': 'Aerolineas Galapagos S.A. Aerogal',
 'AM': 'Aeromexico',
 '8U': 'Afriqiyah Airways',
 'ZI': 'Aigle Azur',
 'AH': 'Air Algérie',
 'G9': 'Air Arabia',
 'KC': 'Air Astana',
 'UU': 'Air Austral',
 'BT': 'Air Baltic',
 'AB': 'Air Berlin',
 'BP': 'Air Botswana',
 'TY': 'Air Caledonie',
 'AC': 'Air Canada',
 'CA': 'Air China Limited',
 'XK': 'Air Corsica',
 'UX': 'Air Europa',
 'AF': 'Air France',
 'AI': 'Air India',
 'JS': 'Air Koryo',
 'NX': 'Air Macau',
 'MD': 'Air Madagascar',
 'KM': 'Air Malta',
 'MK': 'Air Mauritius',
 '9U': 'Air Moldova',
 'SW': 'Air Namibia',
 'NZ': 'Air New Zealand',
 'PX': 'Air Niugini',
 'YW': 'Air Nostrum',
 'AP': 'Air One S.p.A.',
 'JU': 'Air SERBIA a.d. Beograd',
 'HM': 'Air Seychelles',
 'VT': 'Air Tahiti',
 'TN': 'Air Tahiti Nui',
 'TS': 'Air Transat',
 'U7': 'Air Uganda',
 'NF':

In [24]:
airlines = {'KL', 'WY', 'LO', 'CX', 'TK', 'JL', 'UL', 'QR', 'VS', 'UK', 'AF', 'SQ', 'EY', 'TG', 'EK', 'LH', 'BR', 'AI', 'QF', '6X'}

airline_names_1 = {}
for airline_code in airlines:
    if airline_code in code_dict:
        airline_names_1[airline_code] = code_dict[airline_code]
        #airlines.discard(airline_code)


In [26]:
airline_new = set(airline_names_1.keys())
airline_names_2 = airlines - airline_new

{'6X', 'UK'}

In [29]:
d1 = {"a":1,"b":2}
d2 = {"c":3,"d":4}

d3 = d1 | d2
d3

{'a': 1, 'b': 2, 'c': 3, 'd': 4}

In [21]:
import os
import json
import openai
from datetime import datetime, timedelta
from dotenv import load_dotenv
from openai import OpenAI
import pandas as pd
import time
from amadeus import Client, ResponseError
from langchain_experimental.agents import create_pandas_dataframe_agent
from langchain_openai import ChatOpenAI

In [32]:
df_reference = pd.read_csv("code_reference.csv")
df_reference = df_reference[["IATA","Airline"]]

airline = list(df_reference.Airline.values)
code = list(df_reference.IATA.values)

code_dict = dict(zip(code,airline))

In [33]:
client = OpenAI()

In [34]:
from amadeus import Client, ResponseError

amadeus = Client(
    client_id='H4S446vjDRHVJn8C0ZDRkXSLt03AvOGp',
    client_secret='2MCfJEBzkSLqYla0'

 )

In [35]:
def extract_flight_info_from_query(query):
    today = datetime.now()
    current_date_str = today.strftime('%Y-%m-%d')
    messages = [
        {
            "role": "system",
            "content": (
                "You are an assistant that helps extract flight information from user queries. "
                f"Today is {current_date_str}. Extract the following details from the query: "
                "1. location_origin: The departure city or airport, ensure it corresponds to a valid airport code "
                "2. location_destination: The destination city or airport, ensure it corresponds to a valid airport code "
                "3. departure_date: The date of departure "
                "4. adults: The number of adult passengers. (If there is no information regarding this then consider only 1 Adult is there)"
                "If the query specifies a relative date (e.g., 'next Monday'), convert it to an absolute date. "
                "Provide the information in JSON format as follows: "
                '{"location_origin": "origin", "location_destination": "destination", "departure_date": "YYYY-MM-DD", "adults": number_of_adults}'
            )
        },
        {
            "role": "user",
            "content": query
        }
    ]

    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        max_tokens=300,
        temperature=0.1 
    )

    response_text = response.choices[0].message.content.strip()
    try:
        flight_info = json.loads(response_text)
        # Basic validation
        required_keys = ["location_origin", "location_destination", "departure_date", "adults"]
        if not all(key in flight_info for key in required_keys):
            raise ValueError("Incomplete response from LLM")
        return flight_info
    except (json.JSONDecodeError, ValueError) as e:
        print(f"Error extracting flight info: {e}")
        return None

def get_flight_info(location_origin, location_destination, departure_date, adults=1,retries=3, delay=2):
    """Get flight information between two locations."""
    origin_code = location_origin
    destination_code = location_destination
    
    if not origin_code or not destination_code:
        return "Could not find airport codes for the provided locations."

    try:
        #response = amadeus.shopping.flight_dates.get(origin=location_origin, destination=location_destination)
        response = amadeus.shopping.flight_offers_search.get(
            originLocationCode=origin_code,
            destinationLocationCode=destination_code,
            departureDate=departure_date,
            adults=adults
        )
        return response.data
    except Exception as error:
        if retries > 0:
            print(f"Rate limit exceeded. Retrying in {delay} seconds...")
            time.sleep(delay)
            return get_flight_info(location_origin, location_destination, departure_date, retries - 1, delay * 2)
        else:
            print(f"Failed to get flight information: {error}")
            return None

def create_flight_dataframe(flight_data):
    """Create a Pandas DataFrame from the flight data."""
    flight_details = []

    # Get the airline names from the flight offers
    airlines = set()
    for offer in flight_data:
        for segment in offer['itineraries'][0]['segments']:
            airlines.add(segment['carrierCode'])

    #print("Airlines :", airlines)

    # Fetch Airline names using reference data
    airline_names_f1 = {}
    for airline_code in airlines:
        if airline_code in code_dict:
            airline_names_f1[airline_code] = code_dict[airline_code]

    #print("airline_names_f1 : ",airline_names_f1)
    
    airline_new = set(airline_names_f1.keys())
    airline_names_2 = airlines - airline_new    


    # Fetch airline names using the IATA codes
    airline_names_f2 = {}
    for airline_code in airline_names_2:
        try:
            time.sleep(3)
            airline_response = amadeus.reference_data.airlines.get(airlineCodes=airline_code)
            if airline_response.data:
                    airline_names_f2[airline_code] = airline_response.data[0]['commonName']
        except Exception as e:
            airline_names_f2[airline_code] = ""

    #print("airline_names_f2 : ",airline_names_f2)

    airline_names_final = airline_names_f1 | airline_names_f2

    # print("Airline names : ",airline_names)

    for flight in flight_data:
        total_price = flight['price'].get('total', '')
        currency = flight['price'].get('currency', '')
        one_way = len(flight['itineraries']) == 1
        
        for itinerary in flight['itineraries']:
            num_stops = len(itinerary['segments']) - 1
            
            for segment in itinerary['segments']:
                airline_code = segment.get('carrierCode', '')
                airline_name =  airline_names_final.get(segment['carrierCode'], '')
                from_ = segment.get(0, {}).get('departure', {}).get('iataCode', '')
                from_terminal = segment.get(0, {}).get('departure', {}).get('terminal', '')
                to = segment.get(0, {}).get('arrival', {}).get('iataCode', '')
                to_terminal = segment.get(0, {}).get('arrival', {}).get('terminal', '')
                departure = segment['departure'].get('at', '')
                arrival = segment['arrival'].get('at', '')
                cabin = segment.get('travelerPricings', [{}])[0].get('fareDetailsBySegment', [{}])[0].get('cabin', '')
                pricing_detail = segment.get('pricingDetailPerAdult', {})
               

                flight_details.append({
                    "Airline Code": airline_code,
                    "Airline Name": airline_name,
                    "Departure": departure,
                    "Arrival": arrival,
                    "Total Price": total_price,
                    "Currency": currency,
                    "Number of Stops": num_stops,
                    #"Cabin": cabin,
                    "One Way": one_way})
                   
    data = pd.DataFrame(flight_details)
    data.drop_duplicates(inplace=True)
    return data

In [36]:
def run(query):
    print("Extracting fields from Query")
    print("*"*125)
    result = extract_flight_info_from_query(query)
    print("Information extracted from query : ",result)
    print("*"*125)

    origin = result['location_origin']
    destination = result['location_destination']
    departure_date = result['departure_date']
    adults = result['adults']

    flight_info = get_flight_info(origin, destination, departure_date)

    # Convert the flight info to a DataFrame
    if isinstance(flight_info, list):
        flight_df = create_flight_dataframe(flight_info)
        print("Dataframe : ",flight_df)
        print("*"*125)
        flight_df.to_csv(f"{origin}" + "_" + f"{destination}.csv")
        return flight_df,origin,destination
    else:
        print("Error in forming Dataframe")

In [41]:
query = "give me cheapest flights for Bangalore to Chandigarh till 1st july"
df_,a,b = run(query)

Extracting fields from Query
*****************************************************************************************************************************
Information extracted from query :  {'location_origin': 'BLR', 'location_destination': 'IXC', 'departure_date': '2024-07-01', 'adults': 1}
*****************************************************************************************************************************
Dataframe :      Airline Code Airline Name            Departure              Arrival  \
0             UK      VISTARA  2024-07-01T11:30:00  2024-07-01T14:35:00   
1             UK      VISTARA  2024-07-01T11:30:00  2024-07-01T14:10:00   
2             UK      VISTARA  2024-07-01T17:10:00  2024-07-01T18:15:00   
3             UK      VISTARA  2024-07-01T08:00:00  2024-07-01T10:40:00   
5             UK      VISTARA  2024-07-01T07:00:00  2024-07-01T09:40:00   
..           ...          ...                  ...                  ...   
140           AI    Air India  2024-07-01T

In [44]:
df_['Departure'] = pd.to_datetime(df_['Departure'])
df_['Arrival'] = pd.to_datetime(df_['Arrival'])

# Calculate the journey duration
df_['Journey Duration'] = df_['Arrival'] - df_['Departure']

In [45]:
df_


Unnamed: 0,Airline Code,Airline Name,Departure,Arrival,Total Price,Currency,Number of Stops,One Way,Journey Duration
0,UK,VISTARA,2024-07-01 11:30:00,2024-07-01 14:35:00,107.69,EUR,0,True,0 days 03:05:00
1,UK,VISTARA,2024-07-01 11:30:00,2024-07-01 14:10:00,125.13,EUR,1,True,0 days 02:40:00
2,UK,VISTARA,2024-07-01 17:10:00,2024-07-01 18:15:00,125.13,EUR,1,True,0 days 01:05:00
3,UK,VISTARA,2024-07-01 08:00:00,2024-07-01 10:40:00,125.13,EUR,1,True,0 days 02:40:00
5,UK,VISTARA,2024-07-01 07:00:00,2024-07-01 09:40:00,125.13,EUR,1,True,0 days 02:40:00
...,...,...,...,...,...,...,...,...,...
140,AI,Air India,2024-07-01 15:00:00,2024-07-01 17:55:00,703.09,EUR,1,True,0 days 02:55:00
150,AI,Air India,2024-07-01 12:05:00,2024-07-01 14:10:00,710.44,EUR,1,True,0 days 02:05:00
151,UK,VISTARA,2024-07-01 16:50:00,2024-07-01 19:10:00,710.44,EUR,1,True,0 days 02:20:00
152,AI,Air India,2024-07-01 09:20:00,2024-07-01 11:20:00,710.44,EUR,1,True,0 days 02:00:00


# Currency converter

In [47]:
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents import AgentType
from langchain.llms import OpenAI
#from decouple import config


llm = OpenAI(openai_api_key="sk-proj-zDiVuxfiSAsawWDGRemmT3BlbkFJWwbyyWNBFK4lQZ8KIJ9w", temperature=0)

tools = load_tools(["serpapi", "llm-math"], llm=llm,serpapi_api_key = "0ed8ec9c15be4500a7cb42ab2568f2d820fa0de9273db1c45686cacc613c432e")

agent = initialize_agent(
    tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

ValidationError: 1 validation error for SerpAPIWrapper
__root__
  Did not find serpapi_api_key, please add an environment variable `SERPAPI_API_KEY` which contains it, or pass `serpapi_api_key` as a named parameter. (type=value_error)

In [None]:
#