In [1]:
import pandas as pd  # For data manipulation and analysis
import numpy as np  # For numerical operations
import matplotlib.pyplot as plot  # For creating visualizations
import seaborn as sns  # For advanced data visualizations

# This is a magic command (used only in Jupyter) to display plots inline below the code cell
%matplotlib inline

In [2]:
df = pd.read_excel("flight_price.xlsx")  # Load the Excel dataset into a DataFrame
df.head()  # Display the first 5 rows of the dataset


Unnamed: 0,Airline,Date_of_Journey,Source,Destination,Route,Dep_Time,Arrival_Time,Duration,Total_Stops,Additional_Info,Price
0,IndiGo,24/03/2019,Banglore,New Delhi,BLR → DEL,22:20,01:10 22 Mar,2h 50m,non-stop,No info,3897
1,Air India,1/05/2019,Kolkata,Banglore,CCU → IXR → BBI → BLR,05:50,13:15,7h 25m,2 stops,No info,7662
2,Jet Airways,9/06/2019,Delhi,Cochin,DEL → LKO → BOM → COK,09:25,04:25 10 Jun,19h,2 stops,No info,13882
3,IndiGo,12/05/2019,Kolkata,Banglore,CCU → NAG → BLR,18:05,23:30,5h 25m,1 stop,No info,6218
4,IndiGo,01/03/2019,Banglore,New Delhi,BLR → NAG → DEL,16:50,21:35,4h 45m,1 stop,No info,13302


In [3]:
df.info()  # Displays the summary of the DataFrame: column names, data types, non-null counts, and memory usage


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10683 entries, 0 to 10682
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Airline          10683 non-null  object
 1   Date_of_Journey  10683 non-null  object
 2   Source           10683 non-null  object
 3   Destination      10683 non-null  object
 4   Route            10682 non-null  object
 5   Dep_Time         10683 non-null  object
 6   Arrival_Time     10683 non-null  object
 7   Duration         10683 non-null  object
 8   Total_Stops      10682 non-null  object
 9   Additional_Info  10683 non-null  object
 10  Price            10683 non-null  int64 
dtypes: int64(1), object(10)
memory usage: 918.2+ KB


In [4]:
# Filling missing values in 'Route' column with the most frequent value (mode) Since it is categorical data
df["Route"].fillna(df["Route"].mode()[0], inplace=True)

# Filling missing values in 'Total_Stops' column with the most frequent value (mode) since it is a categorical data
df["Total_Stops"].fillna(df["Total_Stops"].mode()[0], inplace=True)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Route"].fillna(df["Route"].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Total_Stops"].fillna(df["Total_Stops"].mode()[0], inplace=True)


In [5]:
#No null values in data
df.isnull().sum()

Airline            0
Date_of_Journey    0
Source             0
Destination        0
Route              0
Dep_Time           0
Arrival_Time       0
Duration           0
Total_Stops        0
Additional_Info    0
Price              0
dtype: int64

In [6]:
# Extracting Day from 'Date_of_Journey' and storing it in a new column 'Date of Journey'
df["Date of Journey"] = df["Date_of_Journey"].str.split("/").str[0]

# Extracting Month from 'Date_of_Journey' and storing it in a new column 'Month of Journey'
df["Month of Journey"] = df["Date_of_Journey"].str.split("/").str[1]

# Extracting Year from 'Date_of_Journey' and storing it in a new column 'Year of Journey'
df["Year of Journey"] = df["Date_of_Journey"].str.split("/").str[2]


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10683 entries, 0 to 10682
Data columns (total 14 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Airline           10683 non-null  object
 1   Date_of_Journey   10683 non-null  object
 2   Source            10683 non-null  object
 3   Destination       10683 non-null  object
 4   Route             10683 non-null  object
 5   Dep_Time          10683 non-null  object
 6   Arrival_Time      10683 non-null  object
 7   Duration          10683 non-null  object
 8   Total_Stops       10683 non-null  object
 9   Additional_Info   10683 non-null  object
 10  Price             10683 non-null  int64 
 11  Date of Journey   10683 non-null  object
 12  Month of Journey  10683 non-null  object
 13  Year of Journey   10683 non-null  object
dtypes: int64(1), object(13)
memory usage: 1.1+ MB


In [8]:
# Converting 'Date of Journey' column to integer type
df["Date of Journey"] = df["Date of Journey"].astype(int)

# Converting 'Month of Journey' column to integer type
df["Month of Journey"] = df["Month of Journey"].astype(int)

# Converting 'Year of Journey' column to integer type
df["Year of Journey"] = df["Year of Journey"].astype(int)


In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10683 entries, 0 to 10682
Data columns (total 14 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Airline           10683 non-null  object
 1   Date_of_Journey   10683 non-null  object
 2   Source            10683 non-null  object
 3   Destination       10683 non-null  object
 4   Route             10683 non-null  object
 5   Dep_Time          10683 non-null  object
 6   Arrival_Time      10683 non-null  object
 7   Duration          10683 non-null  object
 8   Total_Stops       10683 non-null  object
 9   Additional_Info   10683 non-null  object
 10  Price             10683 non-null  int64 
 11  Date of Journey   10683 non-null  int64 
 12  Month of Journey  10683 non-null  int64 
 13  Year of Journey   10683 non-null  int64 
dtypes: int64(4), object(10)
memory usage: 1.1+ MB


In [10]:
# Dropping the original 'Date_of_Journey' column as it's now split into separate day, month, and year columns
df.drop("Date_of_Journey", axis=1, inplace=True)


In [11]:
df.head(2)

Unnamed: 0,Airline,Source,Destination,Route,Dep_Time,Arrival_Time,Duration,Total_Stops,Additional_Info,Price,Date of Journey,Month of Journey,Year of Journey
0,IndiGo,Banglore,New Delhi,BLR → DEL,22:20,01:10 22 Mar,2h 50m,non-stop,No info,3897,24,3,2019
1,Air India,Kolkata,Banglore,CCU → IXR → BBI → BLR,05:50,13:15,7h 25m,2 stops,No info,7662,1,5,2019


In [12]:
df["Arrival_Time"] = df["Arrival_Time"].str.split(" ").str[0]


In [13]:
# Extracting hour from 'Arrival_Time' column and storing in new column 'Arrival_hour'
df["Arrival_hour"] = df["Arrival_Time"].str.split(":").str[0]

# Extracting minute from 'Arrival_Time' column and storing in new column 'Arrival_minute'
df["Arrival_minute"] = df["Arrival_Time"].str.split(":").str[1]


In [14]:
df.head(2)

Unnamed: 0,Airline,Source,Destination,Route,Dep_Time,Arrival_Time,Duration,Total_Stops,Additional_Info,Price,Date of Journey,Month of Journey,Year of Journey,Arrival_hour,Arrival_minute
0,IndiGo,Banglore,New Delhi,BLR → DEL,22:20,01:10,2h 50m,non-stop,No info,3897,24,3,2019,1,10
1,Air India,Kolkata,Banglore,CCU → IXR → BBI → BLR,05:50,13:15,7h 25m,2 stops,No info,7662,1,5,2019,13,15


In [15]:
df["Arrival_hour"]=df["Arrival_hour"].astype(int)
df["Arrival_minute"]=df["Arrival_minute"].astype(int)

In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10683 entries, 0 to 10682
Data columns (total 15 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Airline           10683 non-null  object
 1   Source            10683 non-null  object
 2   Destination       10683 non-null  object
 3   Route             10683 non-null  object
 4   Dep_Time          10683 non-null  object
 5   Arrival_Time      10683 non-null  object
 6   Duration          10683 non-null  object
 7   Total_Stops       10683 non-null  object
 8   Additional_Info   10683 non-null  object
 9   Price             10683 non-null  int64 
 10  Date of Journey   10683 non-null  int64 
 11  Month of Journey  10683 non-null  int64 
 12  Year of Journey   10683 non-null  int64 
 13  Arrival_hour      10683 non-null  int64 
 14  Arrival_minute    10683 non-null  int64 
dtypes: int64(6), object(9)
memory usage: 1.2+ MB


In [None]:
# Drop the original 'Arrival_Time' column as we've extracted hour and minute separately
df.drop("Arrival_Time", axis=1, inplace=True)

In [19]:
df["Departure_hour"]=df["Dep_Time"].str.split(":").str[0]
df["Departure_minute"]=df["Dep_Time"].str.split(":").str[1]

In [18]:
# Extracting hour part from 'Dep_Time' column
df["Departure_hour"] = df["Dep_Time"].str.split(":").str[0]

# Extracting minute part from 'Dep_Time' column
df["Departure_minute"] = df["Dep_Time"].str.split(":").str[1]


In [19]:
df.drop("Dep_Time",inplace=True,axis=1)

In [20]:
df.head(2)

Unnamed: 0,Airline,Source,Destination,Route,Duration,Total_Stops,Additional_Info,Price,Date of Journey,Month of Journey,Year of Journey,Arrival_hour,Arrival_minute,Departure_hour,Departure_minute
0,IndiGo,Banglore,New Delhi,BLR → DEL,2h 50m,non-stop,No info,3897,24,3,2019,1,10,22,20
1,Air India,Kolkata,Banglore,CCU → IXR → BBI → BLR,7h 25m,2 stops,No info,7662,1,5,2019,13,15,5,50


In [21]:
#drop route as we already have source destination
df.drop("Route",axis=1,inplace=True)

In [22]:
df["Total_Stops"].unique()

array(['non-stop', '2 stops', '1 stop', '3 stops', '4 stops'],
      dtype=object)

In [23]:
#we can convert this categarical data to ordinal data as we know more number of stops more the price 
df["Total_Stops"]=df["Total_Stops"].map({
    "non-stop":0,
    "1 stop":1,
    "2 stops":2,
    "3 stops":3,
    "4 stops":4
})

In [26]:
df.head()

Unnamed: 0,Airline,Source,Destination,Duration,Total_Stops,Additional_Info,Price,Date of Journey,Month of Journey,Year of Journey,Arrival_hour,Arrival_minute,Departure_hour,Departure_minute
0,IndiGo,Banglore,New Delhi,2h 50m,0,No info,3897,24,3,2019,1,10,22,20
1,Air India,Kolkata,Banglore,7h 25m,2,No info,7662,1,5,2019,13,15,5,50
2,Jet Airways,Delhi,Cochin,19h,2,No info,13882,9,6,2019,4,25,9,25
3,IndiGo,Kolkata,Banglore,5h 25m,1,No info,6218,12,5,2019,23,30,18,5
4,IndiGo,Banglore,New Delhi,4h 45m,1,No info,13302,1,3,2019,21,35,16,50


In [24]:
# Extracting hours from 'Duration' column using regex (e.g., "2h 50m" → 2)
df['Duration_hour'] = df['Duration'].str.extract(r'(\d+)h')

# Extracting minutes from 'Duration' column using regex (e.g., "2h 50m" → 50)
df['Duration_minute'] = df['Duration'].str.extract(r'(\d+)m')



In [25]:
#Filling null values with 0
df['Duration_hour']=df['Duration_hour'].fillna(0)
df['Duration_minute']=df['Duration_minute'].fillna(0)


In [26]:
df['Duration_hour']=df['Duration_hour'].astype(int)
df['Duration_minute']=df['Duration_minute'].astype(int)

In [27]:
df.head(2)

Unnamed: 0,Airline,Source,Destination,Duration,Total_Stops,Additional_Info,Price,Date of Journey,Month of Journey,Year of Journey,Arrival_hour,Arrival_minute,Departure_hour,Departure_minute,Duration_hour,Duration_minute
0,IndiGo,Banglore,New Delhi,2h 50m,0,No info,3897,24,3,2019,1,10,22,20,2,50
1,Air India,Kolkata,Banglore,7h 25m,2,No info,7662,1,5,2019,13,15,5,50,7,25


In [28]:
# Converting total flight duration into minutes 
df["Duration in minute"] = df["Duration_hour"] * 60 + df["Duration_minute"]


In [29]:
df.head(3)

Unnamed: 0,Airline,Source,Destination,Duration,Total_Stops,Additional_Info,Price,Date of Journey,Month of Journey,Year of Journey,Arrival_hour,Arrival_minute,Departure_hour,Departure_minute,Duration_hour,Duration_minute,Duration in minute
0,IndiGo,Banglore,New Delhi,2h 50m,0,No info,3897,24,3,2019,1,10,22,20,2,50,170
1,Air India,Kolkata,Banglore,7h 25m,2,No info,7662,1,5,2019,13,15,5,50,7,25,445
2,Jet Airways,Delhi,Cochin,19h,2,No info,13882,9,6,2019,4,25,9,25,19,0,1140


In [30]:
df.drop(["Duration","Duration_hour","Duration_minute"],axis=1,inplace=True)

In [31]:
df["Airline"].unique()

array(['IndiGo', 'Air India', 'Jet Airways', 'SpiceJet',
       'Multiple carriers', 'GoAir', 'Vistara', 'Air Asia',
       'Vistara Premium economy', 'Jet Airways Business',
       'Multiple carriers Premium economy', 'Trujet'], dtype=object)

In [32]:
df["Source"].unique()

array(['Banglore', 'Kolkata', 'Delhi', 'Chennai', 'Mumbai'], dtype=object)

In [33]:
df["Destination"].unique()

array(['New Delhi', 'Banglore', 'Cochin', 'Kolkata', 'Delhi', 'Hyderabad'],
      dtype=object)

In [35]:
from sklearn.preprocessing import OneHotEncoder

In [36]:
encoder=OneHotEncoder()

In [37]:
# One-Hot Encoding applied to 'Airline', 'Source', and 'Destination' together
# Reason: These features are strongly related to the travel route and have a direct impact on price.
# 'Additional_Info' is NOT included here because:
# rare categories
# - We will handle or encode it separately to avoid introducing noise or sparsity in the dataset.
encoder.fit_transform(df[["Airline","Source","Destination"]]).toarray()

array([[0., 0., 0., ..., 0., 0., 1.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 1., 0., ..., 0., 0., 0.]], shape=(10683, 23))

In [38]:
# sparse matrix to dataframe
df1=pd.DataFrame(encoder.fit_transform(df[['Airline','Source','Destination']]).toarray(),columns=encoder.get_feature_names_out())
df1

Unnamed: 0,Airline_Air Asia,Airline_Air India,Airline_GoAir,Airline_IndiGo,Airline_Jet Airways,Airline_Jet Airways Business,Airline_Multiple carriers,Airline_Multiple carriers Premium economy,Airline_SpiceJet,Airline_Trujet,...,Source_Chennai,Source_Delhi,Source_Kolkata,Source_Mumbai,Destination_Banglore,Destination_Cochin,Destination_Delhi,Destination_Hyderabad,Destination_Kolkata,Destination_New Delhi
0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10678,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
10679,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
10680,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
10681,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [39]:
df=pd.concat([df,df1],axis=1)

In [40]:
df.head(12)

Unnamed: 0,Airline,Source,Destination,Total_Stops,Additional_Info,Price,Date of Journey,Month of Journey,Year of Journey,Arrival_hour,...,Source_Chennai,Source_Delhi,Source_Kolkata,Source_Mumbai,Destination_Banglore,Destination_Cochin,Destination_Delhi,Destination_Hyderabad,Destination_Kolkata,Destination_New Delhi
0,IndiGo,Banglore,New Delhi,0,No info,3897,24,3,2019,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,Air India,Kolkata,Banglore,2,No info,7662,1,5,2019,13,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2,Jet Airways,Delhi,Cochin,2,No info,13882,9,6,2019,4,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,IndiGo,Kolkata,Banglore,1,No info,6218,12,5,2019,23,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,IndiGo,Banglore,New Delhi,1,No info,13302,1,3,2019,21,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
5,SpiceJet,Kolkata,Banglore,0,No info,3873,24,6,2019,11,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
6,Jet Airways,Banglore,New Delhi,1,In-flight meal not included,11087,12,3,2019,10,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
7,Jet Airways,Banglore,New Delhi,1,No info,22270,1,3,2019,5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
8,Jet Airways,Banglore,New Delhi,1,In-flight meal not included,11087,12,3,2019,10,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
9,Multiple carriers,Delhi,Cochin,1,No info,8625,27,5,2019,19,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [41]:
df["Additional_Info"].unique()

array(['No info', 'In-flight meal not included',
       'No check-in baggage included', '1 Short layover', 'No Info',
       '1 Long layover', 'Change airports', 'Business class',
       'Red-eye flight', '2 Long layover'], dtype=object)

In [42]:
#no info and No info are same 
df['Additional_Info'] = df['Additional_Info'].str.lower().str.strip()


In [43]:
df["Additional_Info"].unique()

array(['no info', 'in-flight meal not included',
       'no check-in baggage included', '1 short layover',
       '1 long layover', 'change airports', 'business class',
       'red-eye flight', '2 long layover'], dtype=object)

In [44]:
r=OneHotEncoder()

In [45]:
encoded=r.fit_transform(df[["Additional_Info"]]).toarray()

In [46]:
df1=pd.DataFrame(encoded,columns=r.get_feature_names_out())
df1

Unnamed: 0,Additional_Info_1 long layover,Additional_Info_1 short layover,Additional_Info_2 long layover,Additional_Info_business class,Additional_Info_change airports,Additional_Info_in-flight meal not included,Additional_Info_no check-in baggage included,Additional_Info_no info,Additional_Info_red-eye flight
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...
10678,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
10679,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
10680,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
10681,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [47]:
df=pd.concat([df,df1],axis=1)

In [81]:
df.head(2)

Unnamed: 0,Airline,Source,Destination,Total_Stops,Additional_Info,Price,Date of Journey,Month of Journey,Year of Journey,Arrival_hour,...,Destination_New Delhi,Additional_Info_1 long layover,Additional_Info_1 short layover,Additional_Info_2 long layover,Additional_Info_business class,Additional_Info_change airports,Additional_Info_in-flight meal not included,Additional_Info_no check-in baggage included,Additional_Info_no info,Additional_Info_red-eye flight
0,IndiGo,Banglore,New Delhi,0,no info,3897,24,3,2019,1,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,Air India,Kolkata,Banglore,2,no info,7662,1,5,2019,13,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [48]:
# Remove all unecessary columns
df.drop(['Airline', 'Source', 'Destination','Additional_Info'], axis=1, inplace=True)


In [50]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10683 entries, 0 to 10682
Data columns (total 42 columns):
 #   Column                                        Non-Null Count  Dtype  
---  ------                                        --------------  -----  
 0   Total_Stops                                   10683 non-null  int64  
 1   Price                                         10683 non-null  int64  
 2   Date of Journey                               10683 non-null  int64  
 3   Month of Journey                              10683 non-null  int64  
 4   Year of Journey                               10683 non-null  int64  
 5   Arrival_hour                                  10683 non-null  int64  
 6   Arrival_minute                                10683 non-null  int64  
 7   Departure_hour                                10683 non-null  object 
 8   Departure_minute                              10683 non-null  object 
 9   Duration in minute                            10683 non-null 

In [51]:
df.head()

Unnamed: 0,Total_Stops,Price,Date of Journey,Month of Journey,Year of Journey,Arrival_hour,Arrival_minute,Departure_hour,Departure_minute,Duration in minute,...,Destination_New Delhi,Additional_Info_1 long layover,Additional_Info_1 short layover,Additional_Info_2 long layover,Additional_Info_business class,Additional_Info_change airports,Additional_Info_in-flight meal not included,Additional_Info_no check-in baggage included,Additional_Info_no info,Additional_Info_red-eye flight
0,0,3897,24,3,2019,1,10,22,20,170,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,2,7662,1,5,2019,13,15,5,50,445,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,2,13882,9,6,2019,4,25,9,25,1140,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,1,6218,12,5,2019,23,30,18,5,325,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,1,13302,1,3,2019,21,35,16,50,285,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [52]:
# our data is ready for machine learning input