In [1]:
import pandas as pd

In [2]:
# Importing data from Output of Week 1
df_flow = pd.read_csv('PD 2024 Wk 1 Output Flow Card.csv')
df_noflow = pd.read_csv('PD 2024 Wk 1 Output Non-Flow Card.csv')

In [3]:
# df_flow.head()
# df_noflow.head()

In [4]:
# Union of the two dataframes
df_Week_1 = pd.concat([df_flow, df_noflow], ignore_index = True)

df_Week_1.head()

Unnamed: 0,Date,Flight Number,From,To,Class,Price,Flow Card?,Bags Checked,Meal Type
0,22/07/2024,PA010,Tokyo,New York,Economy,2380.0,Yes,0,Egg Free
1,20/04/2024,PA002,New York,London,Economy,3490.0,Yes,1,Vegan
2,23/01/2024,PA010,Tokyo,New York,Premium Economy,825.0,Yes,1,Vegetarian
3,05/06/2024,PA006,Tokyo,London,First Class,618.0,Yes,3,Vegan
4,30/03/2024,PA004,Perth,London,First Class,446.0,Yes,1,Nut Free


In [5]:
# Replacing "Class" element labels as per findings from week 2

df_Week_1["Class"] = df_Week_1["Class"].replace({
    "Economy": "First",
    "First Class": "Economy",
    "Business Class": "Business",
    "Premium Economy": "Premium"})

# df_Week_1.head()

In [6]:
# Abbreviating "Class" element labels

df_Week_1["Class"] = df_Week_1["Class"].replace({
    "First" : "FC",
    "Economy" : "E",
    "Premium" : "PE",
    "Business" : "BC"})

df_Week_1.head()

Unnamed: 0,Date,Flight Number,From,To,Class,Price,Flow Card?,Bags Checked,Meal Type
0,22/07/2024,PA010,Tokyo,New York,FC,2380.0,Yes,0,Egg Free
1,20/04/2024,PA002,New York,London,FC,3490.0,Yes,1,Vegan
2,23/01/2024,PA010,Tokyo,New York,PE,825.0,Yes,1,Vegetarian
3,05/06/2024,PA006,Tokyo,London,E,618.0,Yes,3,Vegan
4,30/03/2024,PA004,Perth,London,E,446.0,Yes,1,Nut Free


In [7]:
# Extracting the Month as a number from the Date Column

df_Week_1["Date"] = pd.to_datetime(df_Week_1["Date"], format="%d/%m/%Y")
df_Week_1["Month"] = df_Week_1["Date"].dt.month

df_Week_1.head()

Unnamed: 0,Date,Flight Number,From,To,Class,Price,Flow Card?,Bags Checked,Meal Type,Month
0,2024-07-22,PA010,Tokyo,New York,FC,2380.0,Yes,0,Egg Free,7
1,2024-04-20,PA002,New York,London,FC,3490.0,Yes,1,Vegan,4
2,2024-01-23,PA010,Tokyo,New York,PE,825.0,Yes,1,Vegetarian,1
3,2024-06-05,PA006,Tokyo,London,E,618.0,Yes,3,Vegan,6
4,2024-03-30,PA004,Perth,London,E,446.0,Yes,1,Nut Free,3


In [8]:
# Dropping unrelated columns ['Date', 'Flight Number', 'From', 'To', 'Flow Card?', 'Bags Checked', 'Meal Type']

df_Week_1_Cleaned = df_Week_1.drop(['Date', 'Flight Number', 'From', 'To', 'Flow Card?', 'Bags Checked', 'Meal Type'], axis=1, inplace=False)
    # axis = 0, rows
    # axis = 1, columns

    # inplace = True, updates the original dataframe
    # inplace = False, returns a new dataframe, keeps the original dataframe

df_Week_1_Cleaned.head()

Unnamed: 0,Class,Price,Month
0,FC,2380.0,7
1,FC,3490.0,4
2,PE,825.0,1
3,E,618.0,6
4,E,446.0,3


In [16]:
# Total up the sales at the level of Class & Month
df_Sales = df_Week_1_Cleaned.groupby(["Month", "Class"])["Price"].sum().reset_index()

df_Sales.head()

Unnamed: 0,Month,Class,Price
0,1,BC,48555.6
1,1,E,36081.0
2,1,FC,193960.0
3,1,PE,67297.5
4,2,BC,46335.6


In [10]:
# Importing data from Input of Week 3
df_q1 = pd.read_csv('PD 2024 Wk 3 Input.xlsx - Q1.csv')
df_q2 = pd.read_csv('PD 2024 Wk 3 Input.xlsx - Q2.csv')
df_q3 = pd.read_csv('PD 2024 Wk 3 Input.xlsx - Q3.csv')
df_q4 = pd.read_csv('PD 2024 Wk 3 Input.xlsx - Q4.csv')

In [11]:
# df_q1
# df_q2
# df_q3
# df_q4

In [12]:
# Union of the four dataframes
df_Week_3 = pd.concat([df_q1, df_q2, df_q3, df_q4], ignore_index = True)

df_Target = df_Week_3

df_Target.head()

Unnamed: 0,Month,Class,Target
0,1,FC,120000
1,2,FC,130000
2,3,FC,140000
3,1,BC,85000
4,2,BC,86000


In [13]:
# Joining the two dataframes throught its intersection, "Month" and "Class"
df_Metric = pd.merge(df_Sales, df_Target, on = ["Month", "Class"], how = "inner")
    # how = "inner" - join through matching rows (intersection)

df_Metric

Unnamed: 0,Month,Class,Price,Target
0,1,BC,48555.6,85000
1,1,E,36081.0,31000
2,1,FC,193960.0,120000
3,1,PE,67297.5,40000
4,2,BC,46335.6,86000
5,2,E,30968.0,31500
6,2,FC,145665.0,130000
7,2,PE,69222.5,40500
8,3,BC,47875.2,87000
9,3,E,31829.0,32000


In [14]:
# Getting the difference of the sale with the target
df_Metric["Difference to Target"] = df_Metric["Price"] - df_Metric["Target"]

df_Metric

Unnamed: 0,Month,Class,Price,Target,Difference to Target
0,1,BC,48555.6,85000,-36444.4
1,1,E,36081.0,31000,5081.0
2,1,FC,193960.0,120000,73960.0
3,1,PE,67297.5,40000,27297.5
4,2,BC,46335.6,86000,-39664.4
5,2,E,30968.0,31500,-532.0
6,2,FC,145665.0,130000,15665.0
7,2,PE,69222.5,40500,28722.5
8,3,BC,47875.2,87000,-39124.8
9,3,E,31829.0,32000,-171.0


In [15]:
# Exporting data
df_Metric.to_csv('Week_3_Data_Cleaning.csv', index=False)