In [3]:
import pandas as pd
from datetime import datetime
from pprint import pprint

In [4]:
flights_all = pd.read_csv('flights.csv', delimiter=';')

In [5]:
flights_all.head()

Unnamed: 0,source,destination,departure,arrival,flight_number,price,bags_allowed,bag_price
0,USM,HKT,2017-02-11T06:25:00,2017-02-11T07:25:00,PV404,24,1,9
1,USM,HKT,2017-02-12T12:15:00,2017-02-12T13:15:00,PV755,23,2,9
2,USM,HKT,2017-02-12T21:15:00,2017-02-12T22:15:00,PV729,25,1,14
3,USM,HKT,2017-02-11T14:50:00,2017-02-11T15:50:00,PV966,21,1,17
4,USM,HKT,2017-02-12T00:35:00,2017-02-12T01:35:00,PV398,24,1,14


## Utils

In [6]:
# Preprocessing
def date2timestamp(date):
    return mktime(datetime.strptime(date, "%Y-%m-%dT%H:%M:%S").timetuple())

def preprocess(flights):
    flights_prices = flights[['price', 'bag_price']]
    flights_prices.index = flights['flight_number']
    
    flights = flights.drop(columns=['price', 'bag_price'])
    flights['departure'] = flights['departure'].apply(lambda x: date2timestamp(x))
    flights['arrival'] = flights['arrival'].apply(lambda x: date2timestamp(x))
    return flights_prices, flights

def make_subtree(flight, flights):
    sub_tree = {}
    row = flights[flights['flight_number'] == flight]
    time_now, airport_now = row[['arrival', 'destination']].values[0]
    flights_now = flights[(flights['departure'] > time_now + 60*60) & \
                          (flights['departure'] < time_now + 4*60*60)]

    if len(flights_now) == 0 or airport_now not in flights_now['source'].values:
        return sub_tree
    else:
        airport_flights_now = flights_now['flight_number'][ flights_now['source'] == airport_now ]
        for flight_now in airport_flights_now:
            sub_tree[flight_now] = make_subtree(flight_now, flights_now)
            
        return sub_tree

# search for possible combination tree
def make_tree(flights, num_bags=0):
    tree = {}
    airports = flights['source'].unique()
    
    for airport in airports:     
        tree[airport] = {}
        airport_flights = flights['flight_number'][(flights['source'] == airport) & \
                                                   (flights['bags_allowed'] >= num_bags)]
    
        for flight in airport_flights:
            sub_tree = make_subtree(flight, flights)
            tree[airport][flight] = sub_tree
            
    return tree

# assuming there are no more than 2 stopovers
def search_combinations(flights_tree):
    combinations_all = []
    for airport in flights_tree:
        for flight in tree[airport]:
            combination = [flight]
            combinations_all += [combination]
            
            for flight_next in tree[airport][flight]:
                combination_next = [flight_next]
                combinations_all += [combination_next]
                combinations_all += [combination + combination_next]
                
                for flight_next_next in tree[airport][flight][flight_next]:
                    combination_next_next = [flight_next_next]
                    combinations_all += [combination_next_next]
                    combinations_all += [combination_next + combination_next_next]
                    combinations_all += [combination + combination_next + combination_next_next]
        
    return combinations_all

def add_prices(combinations: list, flights_prices, num_bags=0):
    num_combination = len(combinations)
    for i in range(num_combination):
        combination_price = 0
        for flight in combinations[i]:      
            prices = flights_prices.loc[flight]
            combination_price += prices['price'] + num_bags * prices['bag_price']
        
        combinations[i] += [combination_price]
        
    return combinations

In [7]:
prices, flights = preprocess(flights_all)
flights.head()

Unnamed: 0,source,destination,departure,arrival,flight_number,bags_allowed
0,USM,HKT,1486791000.0,1486794000.0,PV404,1
1,USM,HKT,1486898000.0,1486902000.0,PV755,2
2,USM,HKT,1486930000.0,1486934000.0,PV729,1
3,USM,HKT,1486821000.0,1486825000.0,PV966,1
4,USM,HKT,1486856000.0,1486860000.0,PV398,1


In [8]:
prices.head()

Unnamed: 0_level_0,price,bag_price
flight_number,Unnamed: 1_level_1,Unnamed: 2_level_1
PV404,24,9
PV755,23,9
PV729,25,14
PV966,21,17
PV398,24,14


In [9]:
bags = 2
tree = make_tree(flights, num_bags=bags)
print("\n Flights tree with {} number of bags allowed:".format(bags))
pprint(tree)


 Flights tree with 2 number of bags allowed:
{'BWN': {'PV042': {}, 'PV213': {'PV197': {}}, 'PV278': {}, 'PV999': {}},
 'DPS': {'PV519': {'PV442': {}},
         'PV620': {'PV042': {}},
         'PV699': {'PV634': {}}},
 'HKT': {'PV101': {}, 'PV146': {}, 'PV672': {}},
 'USM': {'PV275': {},
         'PV290': {},
         'PV540': {'PV634': {}},
         'PV755': {'PV634': {}},
         'PV876': {'PV442': {}}}}


In [10]:
combinations = search_combinations(tree)
pprint(combinations)
len(combinations)

[['PV755'],
 ['PV634'],
 ['PV755', 'PV634'],
 ['PV540'],
 ['PV634'],
 ['PV540', 'PV634'],
 ['PV290'],
 ['PV876'],
 ['PV442'],
 ['PV876', 'PV442'],
 ['PV275'],
 ['PV146'],
 ['PV101'],
 ['PV672'],
 ['PV999'],
 ['PV213'],
 ['PV197'],
 ['PV213', 'PV197'],
 ['PV278'],
 ['PV042'],
 ['PV620'],
 ['PV042'],
 ['PV620', 'PV042'],
 ['PV699'],
 ['PV634'],
 ['PV699', 'PV634'],
 ['PV519'],
 ['PV442'],
 ['PV519', 'PV442']]


29

In [11]:
add_prices(combinations, prices, num_bags=bags)

[['PV755', 41],
 ['PV634', 45],
 ['PV755', 'PV634', 86],
 ['PV540', 52],
 ['PV634', 45],
 ['PV540', 'PV634', 97],
 ['PV290', 35],
 ['PV876', 57],
 ['PV442', 39],
 ['PV876', 'PV442', 96],
 ['PV275', 58],
 ['PV146', 31],
 ['PV101', 32],
 ['PV672', 34],
 ['PV999', 100],
 ['PV213', 99],
 ['PV197', 110],
 ['PV213', 'PV197', 209],
 ['PV278', 85],
 ['PV042', 118],
 ['PV620', 93],
 ['PV042', 118],
 ['PV620', 'PV042', 211],
 ['PV699', 160],
 ['PV634', 45],
 ['PV699', 'PV634', 205],
 ['PV519', 167],
 ['PV442', 39],
 ['PV519', 'PV442', 206]]