In [1]:
import pandas as pd

In [2]:
# Importing data
df = pd.read_csv('PD_2024_Wk_1_Input.csv')
df.head()

Unnamed: 0,Flight Details,Flow Card?,Bags Checked,Meal Type
0,2024-07-22//PA010//Tokyo-New York//Economy//2380,1,0,Egg Free
1,2024-09-28//PA008//Perth-New York//Economy//1855,0,2,Vegetarian
2,2024-04-20//PA002//New York-London//Economy//3490,1,1,Vegan
3,2024-01-23//PA010//Tokyo-New York//Premium Eco...,1,1,Vegetarian
4,2024-10-01//PA008//Perth-New York//Business Cl...,0,0,Vegetarian


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3778 entries, 0 to 3777
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Flight Details  3778 non-null   object
 1   Flow Card?      3778 non-null   int64 
 2   Bags Checked    3778 non-null   int64 
 3   Meal Type       3189 non-null   object
dtypes: int64(2), object(2)
memory usage: 118.2+ KB


In [4]:
df.describe()

Unnamed: 0,Flow Card?,Bags Checked
count,3778.0,3778.0
mean,0.498412,1.507941
std,0.500064,1.121228
min,0.0,0.0
25%,0.0,1.0
50%,0.0,2.0
75%,1.0,3.0
max,1.0,3.0


In [5]:
# Creating list for extracting the data
t_date = []
t_flight_number = []
t_route_from = []
t_route_to = []
t_class_type = []
t_price = []

In [6]:
# For Loop to iterate each element on the Flight Details Column
for flight in df["Flight Details"]:
    # Extracting data from the string
    date, flight_number, route, class_type, price = flight.split("//")

    # Extracting Origin and Destination
    route_from, route_to = route.split("-")

    # Tidying-up the Date
    date = date.replace('-', '/')
    date = "/".join(date.split("/")[::-1])
        # .split("/") → Converts "2024/07/22" into ['2024', '07', '22']
        # [::-1] → Reverses the list to ['22', '07', '2024']
        # "/".join(...) → Joins it back into "22/07/2024"

    # Rounding the price
    price = float(price)
    price = round(price, 2)

    # Inserting data extracted into lists
    t_date.append(date)
    t_flight_number.append(flight_number)
    t_route_from.append(route_from)
    t_route_to.append(route_to)
    t_class_type.append(class_type)
    t_price.append(price)

In [7]:
# Converting lists into df columns
df["Date"] = t_date
df["Flight Number"] = t_flight_number
df["From"] = t_route_from
df["To"] = t_route_to
df["Class"] = t_class_type
df["Price"] = t_price

df.head()

Unnamed: 0,Flight Details,Flow Card?,Bags Checked,Meal Type,Date,Flight Number,From,To,Class,Price
0,2024-07-22//PA010//Tokyo-New York//Economy//2380,1,0,Egg Free,22/07/2024,PA010,Tokyo,New York,Economy,2380.0
1,2024-09-28//PA008//Perth-New York//Economy//1855,0,2,Vegetarian,28/09/2024,PA008,Perth,New York,Economy,1855.0
2,2024-04-20//PA002//New York-London//Economy//3490,1,1,Vegan,20/04/2024,PA002,New York,London,Economy,3490.0
3,2024-01-23//PA010//Tokyo-New York//Premium Eco...,1,1,Vegetarian,23/01/2024,PA010,Tokyo,New York,Premium Economy,825.0
4,2024-10-01//PA008//Perth-New York//Business Cl...,0,0,Vegetarian,01/10/2024,PA008,Perth,New York,Business Class,634.8


In [8]:
# Rearranging and dropping unwanted columns
df = df[['Date', 'Flight Number', 'From', 'To', 'Class', 'Price', 'Flow Card?', 'Bags Checked', 'Meal Type']]

In [9]:
# Preview of Output Data
df

Unnamed: 0,Date,Flight Number,From,To,Class,Price,Flow Card?,Bags Checked,Meal Type
0,22/07/2024,PA010,Tokyo,New York,Economy,2380.0,1,0,Egg Free
1,28/09/2024,PA008,Perth,New York,Economy,1855.0,0,2,Vegetarian
2,20/04/2024,PA002,New York,London,Economy,3490.0,1,1,Vegan
3,23/01/2024,PA010,Tokyo,New York,Premium Economy,825.0,1,1,Vegetarian
4,01/10/2024,PA008,Perth,New York,Business Class,634.8,0,0,Vegetarian
...,...,...,...,...,...,...,...,...,...
3773,05/05/2024,PA009,New York,Tokyo,Economy,1360.0,0,3,Nut Free
3774,14/06/2024,PA008,Perth,New York,First Class,245.0,0,1,Dairy Free
3775,16/01/2024,PA010,Tokyo,New York,Economy,2410.0,0,2,Egg Free
3776,16/08/2024,PA005,London,Tokyo,Premium Economy,960.0,0,0,Nut Free


In [10]:
# Changing 1/0 to Yes/No
df['Flow Card?'] = df['Flow Card?'].replace({1: "Yes", 0: "No"})
df.head()

Unnamed: 0,Date,Flight Number,From,To,Class,Price,Flow Card?,Bags Checked,Meal Type
0,22/07/2024,PA010,Tokyo,New York,Economy,2380.0,Yes,0,Egg Free
1,28/09/2024,PA008,Perth,New York,Economy,1855.0,No,2,Vegetarian
2,20/04/2024,PA002,New York,London,Economy,3490.0,Yes,1,Vegan
3,23/01/2024,PA010,Tokyo,New York,Premium Economy,825.0,Yes,1,Vegetarian
4,01/10/2024,PA008,Perth,New York,Business Class,634.8,No,0,Vegetarian


In [11]:
# Creating each table
df_flowcard_yes = df[df["Flow Card?"] == "Yes"]
df_flowcard_yes

Unnamed: 0,Date,Flight Number,From,To,Class,Price,Flow Card?,Bags Checked,Meal Type
0,22/07/2024,PA010,Tokyo,New York,Economy,2380.0,Yes,0,Egg Free
2,20/04/2024,PA002,New York,London,Economy,3490.0,Yes,1,Vegan
3,23/01/2024,PA010,Tokyo,New York,Premium Economy,825.0,Yes,1,Vegetarian
6,05/06/2024,PA006,Tokyo,London,First Class,618.0,Yes,3,Vegan
8,30/03/2024,PA004,Perth,London,First Class,446.0,Yes,1,Nut Free
...,...,...,...,...,...,...,...,...,...
3764,23/11/2024,PA005,London,Tokyo,Economy,2070.0,Yes,2,Egg Free
3766,04/11/2024,PA003,London,Perth,First Class,210.0,Yes,3,Nut Free
3770,29/04/2024,PA012,Tokyo,Perth,Economy,3490.0,Yes,0,Dairy Free
3772,26/09/2024,PA001,London,New York,First Class,207.0,Yes,2,Vegetarian


In [12]:
# Creating each table
df_flowcard_no = df[df["Flow Card?"] == "No"]
df_flowcard_no

Unnamed: 0,Date,Flight Number,From,To,Class,Price,Flow Card?,Bags Checked,Meal Type
1,28/09/2024,PA008,Perth,New York,Economy,1855.0,No,2,Vegetarian
4,01/10/2024,PA008,Perth,New York,Business Class,634.8,No,0,Vegetarian
5,04/03/2024,PA007,New York,Perth,Business Class,458.4,No,3,Nut Free
7,25/02/2024,PA010,Tokyo,New York,Premium Economy,1435.0,No,0,
13,29/03/2024,PA004,Perth,London,Economy,2730.0,No,2,Vegan
...,...,...,...,...,...,...,...,...,...
3771,06/03/2024,PA006,Tokyo,London,Premium Economy,940.0,No,2,Vegetarian
3773,05/05/2024,PA009,New York,Tokyo,Economy,1360.0,No,3,Nut Free
3774,14/06/2024,PA008,Perth,New York,First Class,245.0,No,1,Dairy Free
3775,16/01/2024,PA010,Tokyo,New York,Economy,2410.0,No,2,Egg Free


In [13]:
# Exporting data
df_flowcard_yes.to_csv('Week_1.1_Data_Cleaning.csv', index=False)

In [14]:
# Exporting data
df_flowcard_no.to_csv('Week_1.2_Data_Cleaning.csv', index=False)