In [36]:
import os
import pandas as pd
import numpy as np

pd.set_option("display.max_columns", 200)
pd.set_option("display.width", 200)

IN_PATH = os.path.join("..", "data", "processed", "week1_sample.csv")
OUT_PATH = os.path.join("..", "data", "processed", "week2_processed.csv")

print("Input path:", IN_PATH)
print("Output path:", OUT_PATH)
print("Input file exists?", os.path.exists(IN_PATH))


Input path: ..\data\processed\week1_sample.csv
Output path: ..\data\processed\week2_processed.csv
Input file exists? True


In [37]:
df = pd.read_csv(IN_PATH)
print("Data loaded successfully.")
print("Shape:", df.shape)
df.head(5)


Data loaded successfully.
Shape: (98619, 15)


Unnamed: 0,Passenger ID,First Name,Last Name,Gender,Age,Nationality,Airport Name,Airport Country Code,Country Name,Airport Continent,Continents,Departure Date,Arrival Airport,Pilot Name,Flight Status
0,ABVWIg,Edithe,Leggis,Female,62,Japan,Coldfoot Airport,US,United States,NAM,North America,6/28/2022,CXF,Fransisco Hazeldine,On Time
1,jkXXAX,Elwood,Catt,Male,62,Nicaragua,Kugluktuk Airport,CA,Canada,NAM,North America,12/26/2022,YCO,Marla Parsonage,On Time
2,CdUz2g,Darby,Felgate,Male,67,Russia,Grenoble-Isère Airport,FR,France,EU,Europe,1/18/2022,GNB,Rhonda Amber,On Time
3,BRS38V,Dominica,Pyle,Female,71,China,Ottawa / Gatineau Airport,CA,Canada,NAM,North America,9/16/2022,YND,Kacie Commucci,Delayed
4,9kvTLo,Bay,Pencost,Male,21,China,Gillespie Field,US,United States,NAM,North America,2/25/2022,SEE,Ebonee Tree,On Time


In [38]:
df.columns = [c.strip().replace(" ", "_") for c in df.columns]
print("Column names cleaned")
df.columns.tolist()[:30]


Column names cleaned


['Passenger_ID',
 'First_Name',
 'Last_Name',
 'Gender',
 'Age',
 'Nationality',
 'Airport_Name',
 'Airport_Country_Code',
 'Country_Name',
 'Airport_Continent',
 'Continents',
 'Departure_Date',
 'Arrival_Airport',
 'Pilot_Name',
 'Flight_Status']

In [39]:
missing_before = df.isna().sum().sort_values(ascending=False)
missing_before.head(15)


Passenger_ID            0
First_Name              0
Last_Name               0
Gender                  0
Age                     0
Nationality             0
Airport_Name            0
Airport_Country_Code    0
Country_Name            0
Airport_Continent       0
Continents              0
Departure_Date          0
Arrival_Airport         0
Pilot_Name              0
Flight_Status           0
dtype: int64

In [40]:
num_cols = df.select_dtypes(include="number").columns.tolist()
cat_cols = df.select_dtypes(exclude="number").columns.tolist()

for c in num_cols:
    if df[c].isna().any():
        df[c] = df[c].fillna(df[c].median())

for c in cat_cols:
    if df[c].isna().any():
        df[c] = df[c].fillna("Unknown")

print("Missing values handled/updated (numeric=median, text='Unknown')")


Missing values handled/updated (numeric=median, text='Unknown')


In [41]:
missing_after = df.isna().sum().sort_values(ascending=False)
missing_after.head(15)


Passenger_ID            0
First_Name              0
Last_Name               0
Gender                  0
Age                     0
Nationality             0
Airport_Name            0
Airport_Country_Code    0
Country_Name            0
Airport_Continent       0
Continents              0
Departure_Date          0
Arrival_Airport         0
Pilot_Name              0
Flight_Status           0
dtype: int64

In [42]:
df["Departure_Date"] = pd.to_datetime(df["Departure_Date"], errors="coerce")
print("Departure_Date converted to datetime")
df["Departure_Date"].head()


Departure_Date converted to datetime


0   2022-06-28
1   2022-12-26
2   2022-01-18
3   2022-09-16
4   2022-02-25
Name: Departure_Date, dtype: datetime64[ns]

In [43]:
df.head(5)

Unnamed: 0,Passenger_ID,First_Name,Last_Name,Gender,Age,Nationality,Airport_Name,Airport_Country_Code,Country_Name,Airport_Continent,Continents,Departure_Date,Arrival_Airport,Pilot_Name,Flight_Status
0,ABVWIg,Edithe,Leggis,Female,62,Japan,Coldfoot Airport,US,United States,NAM,North America,2022-06-28,CXF,Fransisco Hazeldine,On Time
1,jkXXAX,Elwood,Catt,Male,62,Nicaragua,Kugluktuk Airport,CA,Canada,NAM,North America,2022-12-26,YCO,Marla Parsonage,On Time
2,CdUz2g,Darby,Felgate,Male,67,Russia,Grenoble-Isère Airport,FR,France,EU,Europe,2022-01-18,GNB,Rhonda Amber,On Time
3,BRS38V,Dominica,Pyle,Female,71,China,Ottawa / Gatineau Airport,CA,Canada,NAM,North America,2022-09-16,YND,Kacie Commucci,Delayed
4,9kvTLo,Bay,Pencost,Male,21,China,Gillespie Field,US,United States,NAM,North America,2022-02-25,SEE,Ebonee Tree,On Time


In [44]:
df["Departure_Year"] = df["Departure_Date"].dt.year
df["Departure_Month"] = df["Departure_Date"].dt.month
df["Departure_Day"] = df["Departure_Date"].dt.day
df["Departure_DayOfWeek"] = df["Departure_Date"].dt.day_name()

print("Time-based features created:")
df[["Departure_Year", "Departure_Month", "Departure_Day", "Departure_DayOfWeek"]].head()


Time-based features created:


Unnamed: 0,Departure_Year,Departure_Month,Departure_Day,Departure_DayOfWeek
0,2022.0,6.0,28.0,Tuesday
1,2022.0,12.0,26.0,Monday
2,2022.0,1.0,18.0,Tuesday
3,2022.0,9.0,16.0,Friday
4,2022.0,2.0,25.0,Friday


In [45]:
df.head(5)

Unnamed: 0,Passenger_ID,First_Name,Last_Name,Gender,Age,Nationality,Airport_Name,Airport_Country_Code,Country_Name,Airport_Continent,Continents,Departure_Date,Arrival_Airport,Pilot_Name,Flight_Status,Departure_Year,Departure_Month,Departure_Day,Departure_DayOfWeek
0,ABVWIg,Edithe,Leggis,Female,62,Japan,Coldfoot Airport,US,United States,NAM,North America,2022-06-28,CXF,Fransisco Hazeldine,On Time,2022.0,6.0,28.0,Tuesday
1,jkXXAX,Elwood,Catt,Male,62,Nicaragua,Kugluktuk Airport,CA,Canada,NAM,North America,2022-12-26,YCO,Marla Parsonage,On Time,2022.0,12.0,26.0,Monday
2,CdUz2g,Darby,Felgate,Male,67,Russia,Grenoble-Isère Airport,FR,France,EU,Europe,2022-01-18,GNB,Rhonda Amber,On Time,2022.0,1.0,18.0,Tuesday
3,BRS38V,Dominica,Pyle,Female,71,China,Ottawa / Gatineau Airport,CA,Canada,NAM,North America,2022-09-16,YND,Kacie Commucci,Delayed,2022.0,9.0,16.0,Friday
4,9kvTLo,Bay,Pencost,Male,21,China,Gillespie Field,US,United States,NAM,North America,2022-02-25,SEE,Ebonee Tree,On Time,2022.0,2.0,25.0,Friday


In [46]:
# Make date-part columns look clean (remove .0)
for c in ["Departure_Year", "Departure_Month", "Departure_Day"]:
    if c in df.columns:
        df[c] = df[c].astype("Int64")  # keeps missing values safe

df[["Departure_Year", "Departure_Month", "Departure_Day", "Departure_DayOfWeek"]].head()


Unnamed: 0,Departure_Year,Departure_Month,Departure_Day,Departure_DayOfWeek
0,2022,6,28,Tuesday
1,2022,12,26,Monday
2,2022,1,18,Tuesday
3,2022,9,16,Friday
4,2022,2,25,Friday


In [47]:
df.head(5)


Unnamed: 0,Passenger_ID,First_Name,Last_Name,Gender,Age,Nationality,Airport_Name,Airport_Country_Code,Country_Name,Airport_Continent,Continents,Departure_Date,Arrival_Airport,Pilot_Name,Flight_Status,Departure_Year,Departure_Month,Departure_Day,Departure_DayOfWeek
0,ABVWIg,Edithe,Leggis,Female,62,Japan,Coldfoot Airport,US,United States,NAM,North America,2022-06-28,CXF,Fransisco Hazeldine,On Time,2022,6,28,Tuesday
1,jkXXAX,Elwood,Catt,Male,62,Nicaragua,Kugluktuk Airport,CA,Canada,NAM,North America,2022-12-26,YCO,Marla Parsonage,On Time,2022,12,26,Monday
2,CdUz2g,Darby,Felgate,Male,67,Russia,Grenoble-Isère Airport,FR,France,EU,Europe,2022-01-18,GNB,Rhonda Amber,On Time,2022,1,18,Tuesday
3,BRS38V,Dominica,Pyle,Female,71,China,Ottawa / Gatineau Airport,CA,Canada,NAM,North America,2022-09-16,YND,Kacie Commucci,Delayed,2022,9,16,Friday
4,9kvTLo,Bay,Pencost,Male,21,China,Gillespie Field,US,United States,NAM,North America,2022-02-25,SEE,Ebonee Tree,On Time,2022,2,25,Friday


In [62]:
df["Age_Group"] = pd.cut(
    df["Age"],
    bins=[0, 12, 19, 30, 55, 100],
    labels=["Child", "Teen", "Youth", "Adult", "Senior Citizen"]
)

print("Age_Group feature created")
df[["Age", "Age_Group"]].head(10)


Age_Group feature created


Unnamed: 0,Age,Age_Group
0,62,Senior Citizen
1,62,Senior Citizen
2,67,Senior Citizen
3,71,Senior Citizen
4,21,Youth
5,55,Adult
6,73,Senior Citizen
7,36,Adult
8,35,Adult
9,13,Teen


In [63]:
df.head(5)

Unnamed: 0,Passenger_ID,First_Name,Last_Name,Gender,Age,Nationality,Airport_Name,Airport_Country_Code,Country_Name,Airport_Continent,Continents,Departure_Date,Arrival_Airport,Pilot_Name,Flight_Status,Departure_Year,Departure_Month,Departure_Day,Departure_DayOfWeek,Passenger_Full_Name,Is_Flight_Delayed,Airport_Location,Flight_Type,Age_Group
0,ABVWIg,Edithe,Leggis,Female,62,Japan,Coldfoot Airport,US,United States,NAM,North America,2022-06-28,CXF,Fransisco Hazeldine,On Time,2022,6,28,Tuesday,Edithe Leggis,0,"Coldfoot Airport, United States",International,Senior Citizen
1,jkXXAX,Elwood,Catt,Male,62,Nicaragua,Kugluktuk Airport,CA,Canada,NAM,North America,2022-12-26,YCO,Marla Parsonage,On Time,2022,12,26,Monday,Elwood Catt,0,"Kugluktuk Airport, Canada",International,Senior Citizen
2,CdUz2g,Darby,Felgate,Male,67,Russia,Grenoble-Isère Airport,FR,France,EU,Europe,2022-01-18,GNB,Rhonda Amber,On Time,2022,1,18,Tuesday,Darby Felgate,0,"Grenoble-Isère Airport, France",International,Senior Citizen
3,BRS38V,Dominica,Pyle,Female,71,China,Ottawa / Gatineau Airport,CA,Canada,NAM,North America,2022-09-16,YND,Kacie Commucci,Delayed,2022,9,16,Friday,Dominica Pyle,1,"Ottawa / Gatineau Airport, Canada",International,Senior Citizen
4,9kvTLo,Bay,Pencost,Male,21,China,Gillespie Field,US,United States,NAM,North America,2022-02-25,SEE,Ebonee Tree,On Time,2022,2,25,Friday,Bay Pencost,0,"Gillespie Field, United States",International,Youth


In [64]:
df["Passenger_Full_Name"] = df["First_Name"] + " " + df["Last_Name"]
print("Passenger_Full_Name created")
df[["Passenger_Full_Name"]].head()


Passenger_Full_Name created


Unnamed: 0,Passenger_Full_Name
0,Edithe Leggis
1,Elwood Catt
2,Darby Felgate
3,Dominica Pyle
4,Bay Pencost


In [65]:
df.head(5)

Unnamed: 0,Passenger_ID,First_Name,Last_Name,Gender,Age,Nationality,Airport_Name,Airport_Country_Code,Country_Name,Airport_Continent,Continents,Departure_Date,Arrival_Airport,Pilot_Name,Flight_Status,Departure_Year,Departure_Month,Departure_Day,Departure_DayOfWeek,Passenger_Full_Name,Is_Flight_Delayed,Airport_Location,Flight_Type,Age_Group
0,ABVWIg,Edithe,Leggis,Female,62,Japan,Coldfoot Airport,US,United States,NAM,North America,2022-06-28,CXF,Fransisco Hazeldine,On Time,2022,6,28,Tuesday,Edithe Leggis,0,"Coldfoot Airport, United States",International,Senior Citizen
1,jkXXAX,Elwood,Catt,Male,62,Nicaragua,Kugluktuk Airport,CA,Canada,NAM,North America,2022-12-26,YCO,Marla Parsonage,On Time,2022,12,26,Monday,Elwood Catt,0,"Kugluktuk Airport, Canada",International,Senior Citizen
2,CdUz2g,Darby,Felgate,Male,67,Russia,Grenoble-Isère Airport,FR,France,EU,Europe,2022-01-18,GNB,Rhonda Amber,On Time,2022,1,18,Tuesday,Darby Felgate,0,"Grenoble-Isère Airport, France",International,Senior Citizen
3,BRS38V,Dominica,Pyle,Female,71,China,Ottawa / Gatineau Airport,CA,Canada,NAM,North America,2022-09-16,YND,Kacie Commucci,Delayed,2022,9,16,Friday,Dominica Pyle,1,"Ottawa / Gatineau Airport, Canada",International,Senior Citizen
4,9kvTLo,Bay,Pencost,Male,21,China,Gillespie Field,US,United States,NAM,North America,2022-02-25,SEE,Ebonee Tree,On Time,2022,2,25,Friday,Bay Pencost,0,"Gillespie Field, United States",International,Youth


In [66]:
df["Is_Flight_Delayed"] = df["Flight_Status"].apply(
    lambda x: 1 if str(x).lower() not in ["on time", "ontime", "scheduled"] else 0
)

print("Is_Flight_Delayed feature created")
df[["Flight_Status", "Is_Flight_Delayed"]].head(10)


Is_Flight_Delayed feature created


Unnamed: 0,Flight_Status,Is_Flight_Delayed
0,On Time,0
1,On Time,0
2,On Time,0
3,Delayed,1
4,On Time,0
5,On Time,0
6,Cancelled,1
7,Cancelled,1
8,On Time,0
9,On Time,0


In [67]:
df.head(5)

Unnamed: 0,Passenger_ID,First_Name,Last_Name,Gender,Age,Nationality,Airport_Name,Airport_Country_Code,Country_Name,Airport_Continent,Continents,Departure_Date,Arrival_Airport,Pilot_Name,Flight_Status,Departure_Year,Departure_Month,Departure_Day,Departure_DayOfWeek,Passenger_Full_Name,Is_Flight_Delayed,Airport_Location,Flight_Type,Age_Group
0,ABVWIg,Edithe,Leggis,Female,62,Japan,Coldfoot Airport,US,United States,NAM,North America,2022-06-28,CXF,Fransisco Hazeldine,On Time,2022,6,28,Tuesday,Edithe Leggis,0,"Coldfoot Airport, United States",International,Senior Citizen
1,jkXXAX,Elwood,Catt,Male,62,Nicaragua,Kugluktuk Airport,CA,Canada,NAM,North America,2022-12-26,YCO,Marla Parsonage,On Time,2022,12,26,Monday,Elwood Catt,0,"Kugluktuk Airport, Canada",International,Senior Citizen
2,CdUz2g,Darby,Felgate,Male,67,Russia,Grenoble-Isère Airport,FR,France,EU,Europe,2022-01-18,GNB,Rhonda Amber,On Time,2022,1,18,Tuesday,Darby Felgate,0,"Grenoble-Isère Airport, France",International,Senior Citizen
3,BRS38V,Dominica,Pyle,Female,71,China,Ottawa / Gatineau Airport,CA,Canada,NAM,North America,2022-09-16,YND,Kacie Commucci,Delayed,2022,9,16,Friday,Dominica Pyle,1,"Ottawa / Gatineau Airport, Canada",International,Senior Citizen
4,9kvTLo,Bay,Pencost,Male,21,China,Gillespie Field,US,United States,NAM,North America,2022-02-25,SEE,Ebonee Tree,On Time,2022,2,25,Friday,Bay Pencost,0,"Gillespie Field, United States",International,Youth


In [68]:
df["Airport_Location"] = df["Airport_Name"] + ", " + df["Country_Name"]
print("Airport_Location feature created")
df[["Airport_Location"]].head()


Airport_Location feature created


Unnamed: 0,Airport_Location
0,"Coldfoot Airport, United States"
1,"Kugluktuk Airport, Canada"
2,"Grenoble-Isère Airport, France"
3,"Ottawa / Gatineau Airport, Canada"
4,"Gillespie Field, United States"


In [69]:
df.head(5)

Unnamed: 0,Passenger_ID,First_Name,Last_Name,Gender,Age,Nationality,Airport_Name,Airport_Country_Code,Country_Name,Airport_Continent,Continents,Departure_Date,Arrival_Airport,Pilot_Name,Flight_Status,Departure_Year,Departure_Month,Departure_Day,Departure_DayOfWeek,Passenger_Full_Name,Is_Flight_Delayed,Airport_Location,Flight_Type,Age_Group
0,ABVWIg,Edithe,Leggis,Female,62,Japan,Coldfoot Airport,US,United States,NAM,North America,2022-06-28,CXF,Fransisco Hazeldine,On Time,2022,6,28,Tuesday,Edithe Leggis,0,"Coldfoot Airport, United States",International,Senior Citizen
1,jkXXAX,Elwood,Catt,Male,62,Nicaragua,Kugluktuk Airport,CA,Canada,NAM,North America,2022-12-26,YCO,Marla Parsonage,On Time,2022,12,26,Monday,Elwood Catt,0,"Kugluktuk Airport, Canada",International,Senior Citizen
2,CdUz2g,Darby,Felgate,Male,67,Russia,Grenoble-Isère Airport,FR,France,EU,Europe,2022-01-18,GNB,Rhonda Amber,On Time,2022,1,18,Tuesday,Darby Felgate,0,"Grenoble-Isère Airport, France",International,Senior Citizen
3,BRS38V,Dominica,Pyle,Female,71,China,Ottawa / Gatineau Airport,CA,Canada,NAM,North America,2022-09-16,YND,Kacie Commucci,Delayed,2022,9,16,Friday,Dominica Pyle,1,"Ottawa / Gatineau Airport, Canada",International,Senior Citizen
4,9kvTLo,Bay,Pencost,Male,21,China,Gillespie Field,US,United States,NAM,North America,2022-02-25,SEE,Ebonee Tree,On Time,2022,2,25,Friday,Bay Pencost,0,"Gillespie Field, United States",International,Youth


In [70]:
df["Flight_Type"] = df.apply(
    lambda row: "Domestic" if row["Nationality"] == row["Country_Name"] else "International",
    axis=1
)

print("Flight_Type feature created")
df[["Nationality", "Country_Name", "Flight_Type"]].head(10)


Flight_Type feature created


Unnamed: 0,Nationality,Country_Name,Flight_Type
0,Japan,United States,International
1,Nicaragua,Canada,International
2,Russia,France,International
3,China,Canada,International
4,China,United States,International
5,Brazil,Brazil,Domestic
6,Ivory Coast,United Kingdom,International
7,Vietnam,Brazil,International
8,Palestinian Territory,Italy,International
9,Thailand,Canada,International


In [71]:
df.head(5)

Unnamed: 0,Passenger_ID,First_Name,Last_Name,Gender,Age,Nationality,Airport_Name,Airport_Country_Code,Country_Name,Airport_Continent,Continents,Departure_Date,Arrival_Airport,Pilot_Name,Flight_Status,Departure_Year,Departure_Month,Departure_Day,Departure_DayOfWeek,Passenger_Full_Name,Is_Flight_Delayed,Airport_Location,Flight_Type,Age_Group
0,ABVWIg,Edithe,Leggis,Female,62,Japan,Coldfoot Airport,US,United States,NAM,North America,2022-06-28,CXF,Fransisco Hazeldine,On Time,2022,6,28,Tuesday,Edithe Leggis,0,"Coldfoot Airport, United States",International,Senior Citizen
1,jkXXAX,Elwood,Catt,Male,62,Nicaragua,Kugluktuk Airport,CA,Canada,NAM,North America,2022-12-26,YCO,Marla Parsonage,On Time,2022,12,26,Monday,Elwood Catt,0,"Kugluktuk Airport, Canada",International,Senior Citizen
2,CdUz2g,Darby,Felgate,Male,67,Russia,Grenoble-Isère Airport,FR,France,EU,Europe,2022-01-18,GNB,Rhonda Amber,On Time,2022,1,18,Tuesday,Darby Felgate,0,"Grenoble-Isère Airport, France",International,Senior Citizen
3,BRS38V,Dominica,Pyle,Female,71,China,Ottawa / Gatineau Airport,CA,Canada,NAM,North America,2022-09-16,YND,Kacie Commucci,Delayed,2022,9,16,Friday,Dominica Pyle,1,"Ottawa / Gatineau Airport, Canada",International,Senior Citizen
4,9kvTLo,Bay,Pencost,Male,21,China,Gillespie Field,US,United States,NAM,North America,2022-02-25,SEE,Ebonee Tree,On Time,2022,2,25,Friday,Bay Pencost,0,"Gillespie Field, United States",International,Youth


In [72]:
print("Final dataset shape:", df.shape)
print("Final columns:")
df.columns.tolist()


Final dataset shape: (98619, 24)
Final columns:


['Passenger_ID',
 'First_Name',
 'Last_Name',
 'Gender',
 'Age',
 'Nationality',
 'Airport_Name',
 'Airport_Country_Code',
 'Country_Name',
 'Airport_Continent',
 'Continents',
 'Departure_Date',
 'Arrival_Airport',
 'Pilot_Name',
 'Flight_Status',
 'Departure_Year',
 'Departure_Month',
 'Departure_Day',
 'Departure_DayOfWeek',
 'Passenger_Full_Name',
 'Is_Flight_Delayed',
 'Airport_Location',
 'Flight_Type',
 'Age_Group']

In [73]:
# Reorder columns into a clean, logical sequence

final_column_order = [
    "Passenger_ID",
    "First_Name",
    "Last_Name",
    "Passenger_Full_Name",
    "Gender",
    "Age",
    "Age_Group",
    "Nationality",

    "Airport_Name",
    "Airport_Country_Code",
    "Country_Name",
    "Airport_Continent",
    "Continents",
    "Airport_Location",

    "Departure_Date",
    "Departure_Year",
    "Departure_Month",
    "Departure_Day",
    "Departure_DayOfWeek",
    "Arrival_Airport",
    "Pilot_Name",
    "Flight_Status",
    "Is_Flight_Delayed",
    "Flight_Type"
]

df = df[final_column_order]

print("Columns reordered successfully")
df.head()


Columns reordered successfully


Unnamed: 0,Passenger_ID,First_Name,Last_Name,Passenger_Full_Name,Gender,Age,Age_Group,Nationality,Airport_Name,Airport_Country_Code,Country_Name,Airport_Continent,Continents,Airport_Location,Departure_Date,Departure_Year,Departure_Month,Departure_Day,Departure_DayOfWeek,Arrival_Airport,Pilot_Name,Flight_Status,Is_Flight_Delayed,Flight_Type
0,ABVWIg,Edithe,Leggis,Edithe Leggis,Female,62,Senior Citizen,Japan,Coldfoot Airport,US,United States,NAM,North America,"Coldfoot Airport, United States",2022-06-28,2022,6,28,Tuesday,CXF,Fransisco Hazeldine,On Time,0,International
1,jkXXAX,Elwood,Catt,Elwood Catt,Male,62,Senior Citizen,Nicaragua,Kugluktuk Airport,CA,Canada,NAM,North America,"Kugluktuk Airport, Canada",2022-12-26,2022,12,26,Monday,YCO,Marla Parsonage,On Time,0,International
2,CdUz2g,Darby,Felgate,Darby Felgate,Male,67,Senior Citizen,Russia,Grenoble-Isère Airport,FR,France,EU,Europe,"Grenoble-Isère Airport, France",2022-01-18,2022,1,18,Tuesday,GNB,Rhonda Amber,On Time,0,International
3,BRS38V,Dominica,Pyle,Dominica Pyle,Female,71,Senior Citizen,China,Ottawa / Gatineau Airport,CA,Canada,NAM,North America,"Ottawa / Gatineau Airport, Canada",2022-09-16,2022,9,16,Friday,YND,Kacie Commucci,Delayed,1,International
4,9kvTLo,Bay,Pencost,Bay Pencost,Male,21,Youth,China,Gillespie Field,US,United States,NAM,North America,"Gillespie Field, United States",2022-02-25,2022,2,25,Friday,SEE,Ebonee Tree,On Time,0,International


In [74]:
OUT_PATH = "../data/processed/week2_processed.csv"

df.to_csv(OUT_PATH, index=False)

print("Week 2 processed file saved successfully")
print("Saved at:", OUT_PATH)


Week 2 processed file saved successfully
Saved at: ../data/processed/week2_processed.csv
