In [38]:
import pandas as pd
import numpy as np
import datetime

In [83]:
df_seats = pd.read_excel('Week 14 - Input.xlsx', sheet_name='SeatList').melt(id_vars='Row', value_name='Seat', var_name='Letter')
df_seats['Position'] = np.where(df_seats['Letter'].isin(['A','F']), 'Window', np.where(df_seats['Letter'].isin(['B','E']), 'Aisle', 'Middle'))
df_seats

Unnamed: 0,Row,Letter,Seat,Position
0,1,A,1,Window
1,2,A,7,Window
2,3,A,13,Window
3,4,A,19,Window
4,5,A,25,Window
...,...,...,...,...
115,16,F,96,Window
116,17,F,102,Window
117,18,F,108,Window
118,19,F,114,Window


In [84]:
df_flights = pd.read_excel('Week 14 - Input.xlsx', sheet_name='FlightDetails')
df_flights[['Flight ID', 'From', 'To', 'Date', 'Time']] = df_flights['[FlightID|DepAir|ArrAir|DepDate|DepTime]'].str.strip('[]').str.split('|', expand=True)
df_flights['DateTime'] = pd.to_datetime(df_flights['Date'] + ' ' + df_flights['Time'], format='%Y-%m-%d %H:%M:%S')
df_flights['Time of day'] =  np.where(df_flights['DateTime'].dt.time < datetime.time(12,0,0), 'Morning',
                                  np.where(df_flights['DateTime'].dt.time <= datetime.time(18,0,0), 'Afternoon', 'Evening'))
df_flights.rename(columns={'Flight ID':'Flight'}, inplace=True)
df_flights['Flight'] = df_flights['Flight'].astype('int')
df_flights

Unnamed: 0,[FlightID|DepAir|ArrAir|DepDate|DepTime],Flight,From,To,Date,Time,DateTime,Time of day
0,[1|LHR|SEA|2020-10-08|14:53:00],1,LHR,SEA,2020-10-08,14:53:00,2020-10-08 14:53:00,Afternoon
1,[2|MTY|JFK|2020-12-03|06:51:00],2,MTY,JFK,2020-12-03,06:51:00,2020-12-03 06:51:00,Morning
2,[3|SEA|BOS|2020-11-21|20:45:00],3,SEA,BOS,2020-11-21,20:45:00,2020-11-21 20:45:00,Evening
3,[4|LHR|BOS|2020-10-31|21:01:00],4,LHR,BOS,2020-10-31,21:01:00,2020-10-31 21:01:00,Evening
4,[5|MTY|CAI|2020-12-07|09:33:00],5,MTY,CAI,2020-12-07,09:33:00,2020-12-07 09:33:00,Morning
5,[6|JFK|LHR|2020-11-10|05:05:00],6,JFK,LHR,2020-11-10,05:05:00,2020-11-10 05:05:00,Morning
6,[7|TPE|LHR|2020-12-01|14:22:00],7,TPE,LHR,2020-12-01,14:22:00,2020-12-01 14:22:00,Afternoon
7,[8|BOS|SEA|2020-12-26|16:45:00],8,BOS,SEA,2020-12-26,16:45:00,2020-12-26 16:45:00,Afternoon
8,[9|JFK|LHR|2020-10-23|19:06:00],9,JFK,LHR,2020-10-23,19:06:00,2020-10-23 19:06:00,Evening
9,[10|CAI|LHR|2020-12-22|10:54:00],10,CAI,LHR,2020-12-22,10:54:00,2020-12-22 10:54:00,Morning


In [85]:
df_flight_classes = pd.read_excel('Week 14 - Input.xlsx', sheet_name='PlaneDetails')
df_flight_classes['last business row'] = df_flight_classes['Business Class'].str.extract('.*\-(.*)').astype('int')
df_flight_classes.rename(columns={'FlightNo.':'Flight'}, inplace=True)
df_flight_classes

Unnamed: 0,Flight,Business Class,last business row
0,1,1-5,5
1,2,1-8,8
2,3,1-10,10
3,4,1-5,5
4,5,1-5,5
5,6,1-5,5
6,7,1-2,2
7,8,1-3,3
8,9,1-5,5
9,10,1-5,5


In [86]:
df_passengers = pd.read_excel('Week 14 - Input.xlsx', sheet_name='Passenger List')
df_passengers.rename(columns=lambda x : x.replace('_',' ').title(), inplace=True)
df_passengers.rename(columns={'Flight Number':'Flight', 'Passenger Number':'Seat'}, inplace=True)
df_passengers.dropna(axis=1,inplace=True)
df_passengers

Unnamed: 0,First Name,Last Name,Seat,Flight,Purchase Amount
0,Jerrylee,Rein,1,1,48.29
1,Forester,Iashvili,2,1,0.00
2,Shaun,Sherwill,3,1,0.00
3,Werner,Basile,4,1,58.21
4,Kerwinn,Skillen,5,1,41.96
...,...,...,...,...,...
995,Skye,McLaverty,106,10,10.46
996,Margaux,Rymour,107,10,0.00
997,Corny,Vaszoly,108,10,44.60
998,Vittorio,Rushbrook,109,10,0.00


In [90]:
df_output = pd.merge(df_passengers, df_seats, on='Seat').merge(df_flights, on='Flight').merge(df_flight_classes, on='Flight')
df_output['Class'] = np.where(df_output['Row'] <= df_output['last business row'], 'Business', 'Economy')
df_output

Unnamed: 0,First Name,Last Name,Seat,Flight,Purchase Amount,Row,Letter,Position,[FlightID|DepAir|ArrAir|DepDate|DepTime],From,To,Date,Time,DateTime,Time of day,Business Class,last business row,Class
0,Jerrylee,Rein,1,1,48.29,1,A,Window,[1|LHR|SEA|2020-10-08|14:53:00],LHR,SEA,2020-10-08,14:53:00,2020-10-08 14:53:00,Afternoon,1-5,5,Business
1,Forester,Iashvili,2,1,0.00,1,B,Aisle,[1|LHR|SEA|2020-10-08|14:53:00],LHR,SEA,2020-10-08,14:53:00,2020-10-08 14:53:00,Afternoon,1-5,5,Business
2,Shaun,Sherwill,3,1,0.00,1,C,Middle,[1|LHR|SEA|2020-10-08|14:53:00],LHR,SEA,2020-10-08,14:53:00,2020-10-08 14:53:00,Afternoon,1-5,5,Business
3,Werner,Basile,4,1,58.21,1,D,Middle,[1|LHR|SEA|2020-10-08|14:53:00],LHR,SEA,2020-10-08,14:53:00,2020-10-08 14:53:00,Afternoon,1-5,5,Business
4,Kerwinn,Skillen,5,1,41.96,1,E,Aisle,[1|LHR|SEA|2020-10-08|14:53:00],LHR,SEA,2020-10-08,14:53:00,2020-10-08 14:53:00,Afternoon,1-5,5,Business
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,Skye,McLaverty,106,10,10.46,18,D,Middle,[10|CAI|LHR|2020-12-22|10:54:00],CAI,LHR,2020-12-22,10:54:00,2020-12-22 10:54:00,Morning,1-5,5,Economy
996,Margaux,Rymour,107,10,0.00,18,E,Aisle,[10|CAI|LHR|2020-12-22|10:54:00],CAI,LHR,2020-12-22,10:54:00,2020-12-22 10:54:00,Morning,1-5,5,Economy
997,Corny,Vaszoly,108,10,44.60,18,F,Window,[10|CAI|LHR|2020-12-22|10:54:00],CAI,LHR,2020-12-22,10:54:00,2020-12-22 10:54:00,Morning,1-5,5,Economy
998,Vittorio,Rushbrook,109,10,0.00,19,A,Window,[10|CAI|LHR|2020-12-22|10:54:00],CAI,LHR,2020-12-22,10:54:00,2020-12-22 10:54:00,Morning,1-5,5,Economy


In [97]:
df_avg_per_time = df_output.where(df_output['Class']!='Business').groupby(['Flight', 'Time of day'],as_index=False)['Purchase Amount'].sum().groupby('Time of day')['Purchase Amount'].mean()
df_avg_per_time

Time of day
Afternoon    1959.096667
Evening      1185.263333
Morning      1478.312500
Name: Purchase Amount, dtype: float64

In [98]:
df_seat_tot = df_output.where(df_output['Class']!='Business').groupby('Position')['Purchase Amount'].sum()
df_seat_tot

Position
Aisle     4797.72
Middle    4888.19
Window    5660.42
Name: Purchase Amount, dtype: float64

In [92]:
df_tot_per_class = df_output.groupby('Class')['Purchase Amount'].sum()
df_tot_per_class

Class
Business     7207.69
Economy     15346.33
Name: Purchase Amount, dtype: float64