In [38]:
import pandas as pd 
import json
from datetime import datetime, timedelta 
import sys
from tqdm import tqdm

class Flight:
    
    def __init__(self, data, origin, destination, bags = 0, ret = False):
        self.origin = origin
        self.destination = destination
        self.data = pd.read_csv(data)

        # all unique possible combinations between origin and destination
        self.data_c = self.data[["origin", "destination"]].drop_duplicates()

        # all unique cities in a dataframe
        self.data_s = self.data["origin"].drop_duplicates()

        # count of bags
        self.bags = bags
        
        self.ret = ret

    # will create a graph from dataframe
    # the output of this function is a dictionary with all unique cities in df as a key
    # every key has list of values (cities), where is possible to fly straight from the key
    def create_graph(self):
        dictt = {}
        for i in self.data_s:
            a = self.data_c[self.data_c["destination"] == i]
            values = []
            for j in a["origin"]:
                values.append(j)
            dictt[i] = values
        return dictt
    
    # will find all connections between two cities in a dataframe
    def _find_connection(self, a, b, time = 0, ret = False):
        if time == 0:
            return self.data[(self.data["origin"] == a) & (self.data["destination"] == b)] 
        else:
            if ret == True:
                df = self.data[(self.data["origin"] == a) & (self.data["destination"] == b)]
                adj_df = pd.DataFrame()
                for i in range(df.shape[0]):
                    if self._return_date(df.iloc[i]["departure"]) > self._return_date(time):
                        adj_df = adj_df.append(df.iloc[i])
                return adj_df
            else:
                df = self.data[(self.data["origin"] == a) & (self.data["destination"] == b)]
                adj_df = pd.DataFrame()
                for i in range(df.shape[0]):
                    if timedelta(seconds=21600) + self._return_date(time) >= self._return_date(df.iloc[i]["departure"]) >= timedelta(seconds=3600) + self._return_date(time):
                        adj_df = adj_df.append(df.iloc[i])
                return adj_df

    # will return a datetime from string
    def _return_date(self, s):
        return datetime.strptime(s, "%Y-%m-%dT%H:%M:%S")
    
    def _split_df(self, splt, df):
        df = df.reset_index(drop = True)
        df1 = pd.DataFrame()
        df2 = pd.DataFrame()
        break_rows = 0
        for row in range(len(df)):
            if df.iloc[row]["destination"] == splt:
                break_rows = row + 1
        df1 = df.iloc[range(0, break_rows)]
        df2 = df.iloc[range(break_rows, (len(df)))]
        return [df1, df2]
    
    # will parse seconds to HH:MM:SS format
    def _parse_time(self, s):
        hours = int(s / 3600)
        minutes = int((s - hours * 3600) / 60)
        seconds = int(s - hours * 3600 - minutes * 60)
        if hours < 10:
            hours = str(0) + str(hours)
        if minutes < 10:
            minutes = str(0) + str(minutes)
        if seconds < 10:
            seconds = str(0) + str(seconds)
        return  str(hours) + ":" + str(minutes) + ":" + str(seconds)
    
    # will return required json format from pandas dataframe
    def _return_dict_format(self, df, end):
        df = df.reset_index(drop = True)
        if str(df.iloc[df.shape[0] - 1]["destination"]) == str(end):
            ret_dict = {"flights": []}
            for row in range(len(df)):
                dictt = {}
                for col in df.columns:
                    dictt[col] = df.iloc[row][col]
                ret_dict["flights"].append(dictt)

            ret_dict["bags_allowed"] = df["bags_allowed"].min()
            ret_dict["bags_count"] = self.bags
            ret_dict["destination"] = df.loc[df.index[df.shape[0] - 1]]["destination"]
            ret_dict["origin"] = df.loc[df.index[0]]["origin"]
            ret_dict["total_price"] = df["base_price"].sum() + self.bags * df["bag_price"].sum()
            ret_dict["travel_time"] = self._parse_time((self._return_date(df.iloc[len(df) - 1]["arrival"]) 
                                                        - self._return_date(df.iloc[0]["departure"])).total_seconds())
            return ret_dict
        else:
            df1 = self._split_df(str(end), df)[0]
            df1 = df1.reset_index(drop = True)
            df2 = self._split_df(str(end), df)[1]
            df2 = df2.reset_index(drop = True)
            
            ret_dict = {"return flights": []}
            dict_to_dest = {"flights to dest.": []}
            dict_from_dest = {"flights from dest.": []}
            
            for row in range(len(df1)):
                dictt = {}
                for col in df1.columns:
                    dictt[col] = df1.iloc[row][col]
                dict_to_dest["flights to dest."].append(dictt) 
    
            dict_to_dest["bags_allowed"] = df1["bags_allowed"].min()
            dict_to_dest["bags_count"] = self.bags
            dict_to_dest["destination"] = df1.loc[df1.index[df1.shape[0] - 1]]["destination"]
            dict_to_dest["origin"] = df1.loc[df1.index[0]]["origin"]
            dict_to_dest["total_price"] = df1["base_price"].sum() + self.bags * df1["bag_price"].sum()
            dict_to_dest["travel_time"] = self._parse_time((self._return_date(df1.iloc[len(df1) - 1]["arrival"]) 
                                                        - self._return_date(df1.iloc[0]["departure"])).total_seconds())
            ret_dict["return flights"].append(dict_to_dest)
    
            for row in range(len(df2)):
                dictt = {}
                for col in df2.columns:
                    dictt[col] = df2.iloc[row][col]
                dict_from_dest["flights from dest."].append(dictt) 
            dict_from_dest["bags_allowed"] = df2["bags_allowed"].min()
            dict_from_dest["bags_count"] = self.bags
            dict_from_dest["destination"] = df2.loc[df2.index[df2.shape[0] - 1]]["destination"]
            dict_from_dest["origin"] = df2.loc[df2.index[0]]["origin"]
            dict_from_dest["total_price"] = df2["base_price"].sum() + self.bags * df2["bag_price"].sum()
            dict_from_dest["travel_time"] = self._parse_time((self._return_date(df2.iloc[len(df2) - 1]["arrival"]) 
                                                        - self._return_date(df2.iloc[0]["departure"])).total_seconds())
            ret_dict["return flights"].append(dict_from_dest)
            
            ret_dict["bags_allowed"] = min(int(dict_to_dest["bags_allowed"]), int(dict_from_dest["bags_allowed"]))
            ret_dict["bags_count"] = self.bags
            ret_dict["destination"] = df2.loc[df2.index[df2.shape[0] - 1]]["destination"]
            ret_dict["origin"] = df1.loc[df1.index[0]]["origin"]
            ret_dict["total_price"] = int(dict_to_dest["total_price"]) + int(dict_from_dest["total_price"])
            ret_dict["travel_time"] = self._parse_time(((self._return_date(df1.iloc[len(df1) - 1]["arrival"]) 
                                                        - self._return_date(df1.iloc[0]["departure"])).total_seconds())
                                                       + (self._return_date(df2.iloc[len(df2) - 1]["arrival"]) - 
                                                          self._return_date(df2.iloc[0]["departure"])).total_seconds())
            return ret_dict
            
    # will sort list on a total_price basis               
    def _sort_function(self, e):
        return e["total_price"]
    
    def _find_flights(self, start, end, graph, path, all_path, df, all_df, ret, ret_time):
        
        path.append(start)

        if (start == end):
            if ret == False:
                all_path.append(list(path))
                if df["bags_allowed"].min() >= self.bags:
                    all_df.append(self._return_dict_format(df, self.destination))   
            else:
                all_path.append(list(path))
                path2 = []
                ret = False
                ret_time = True
                self._find_flights(end, self.origin, graph, path2, all_path, df, all_df, ret, ret_time)  
        else:
            for i in graph[start]:
                if (i not in path):
                    if df.empty == True:
                        flights = self._find_connection(start, i)
                    else:
                        if ret_time == True:
                            flights = self._find_connection(start, i, str(df.iloc[df.shape[0] - 1]["arrival"]), True)
                        else:
                            flights = self._find_connection(start, i, str(df.iloc[df.shape[0] - 1]["arrival"]))                     
                    if flights.empty == False:
                        for flight in range(flights.shape[0]):
                            df = df.append(flights.iloc[flight])
                            self._find_flights(i, end, graph, path, all_path, df, all_df, ret, False)
                            df = df.drop(df.index[df.shape[0] - 1])
        path.pop()
    
    # will return all flights from origin to destination available in dataframe
    # it goes through each combination from dfs() function and call _find_flights() function
    def return_all_flights(self):
        graph = self.create_graph()
        path = []
        all_path = []
        df = pd.DataFrame()
        all_df = []
        self._find_flights(self.origin, self.destination, graph, path, all_path, df, all_df, self.ret, False)
        all_df.sort(key = self._sort_function)
        if len(all_df) > 0:
            return all_df
        else:
            return "No flights found."

In [40]:
f = Flight("examples/example3.csv", "IUT", "GXV", 0,True)
f.create_graph()
#f.return_all_flights()

{'WUE': ['JBN', 'NNB', 'EZO', 'BPZ', 'ZRW', 'VVH', 'WTN'],
 'EZO': ['WUE', 'JBN', 'NNB', 'BPZ', 'VVH', 'WTN', 'ZRW'],
 'NNB': ['WUE', 'EZO', 'BPZ', 'ZRW', 'JBN', 'VVH', 'WTN'],
 'JBN': ['WUE', 'EZO', 'NNB', 'ZRW', 'VVH', 'WTN', 'BPZ'],
 'BPZ': ['VVH', 'ZRW', 'NNB', 'EZO', 'WUE', 'JBN'],
 'VVH': ['BPZ', 'NNB', 'EZO', 'ZRW', 'WUE', 'WTN', 'JBN'],
 'ZRW': ['NNB', 'JBN', 'BPZ', 'VVH', 'WUE', 'EZO', 'WTN'],
 'WTN': ['VVH', 'ZRW', 'NNB', 'JBN', 'WUE', 'EZO']}

In [42]:
pd.read_csv("examples/example3.csv")

Unnamed: 0,flight_no,origin,destination,departure,arrival,base_price,bag_price,bags_allowed
0,JT808,WUE,JBN,2021-09-01T00:05:00,2021-09-01T01:15:00,36.0,11,2
1,JT465,WUE,NNB,2021-09-01T00:15:00,2021-09-01T03:30:00,71.0,11,1
2,YL471,EZO,JBN,2021-09-01T00:20:00,2021-09-01T02:55:00,31.0,10,1
3,JT457,NNB,ZRW,2021-09-01T00:30:00,2021-09-01T03:30:00,77.0,11,2
4,CC515,WUE,JBN,2021-09-01T00:45:00,2021-09-01T01:55:00,18.0,9,2
...,...,...,...,...,...,...,...,...
673,YL690,BPZ,ZRW,2021-09-18T23:30:00,2021-09-19T02:40:00,73.0,10,1
674,CC211,ZRW,EZO,2021-09-19T00:00:00,2021-09-19T05:40:00,325.0,9,2
675,JT178,JBN,VVH,2021-09-19T00:05:00,2021-09-19T01:55:00,30.0,11,1
676,CC202,VVH,WUE,2021-09-19T01:50:00,2021-09-19T04:30:00,41.0,9,2
