In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler, LabelEncoder, OneHotEncoder
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

In [2]:
df=pd.read_csv('titanic.csv')
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [4]:
df.isnull().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

In [5]:
df_drop_rows=df.dropna()
print(f"\nAfter dropping rows with missing values: {df_drop_rows.shape}")


After dropping rows with missing values: (183, 12)


In [6]:
df_drop_rows

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
10,11,1,3,"Sandstrom, Miss. Marguerite Rut",female,4.0,1,1,PP 9549,16.7000,G6,S
11,12,1,1,"Bonnell, Miss. Elizabeth",female,58.0,0,0,113783,26.5500,C103,S
...,...,...,...,...,...,...,...,...,...,...,...,...
871,872,1,1,"Beckwith, Mrs. Richard Leonard (Sallie Monypeny)",female,47.0,1,1,11751,52.5542,D35,S
872,873,0,1,"Carlsson, Mr. Frans Olof",male,33.0,0,0,695,5.0000,B51 B53 B55,S
879,880,1,1,"Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)",female,56.0,0,1,11767,83.1583,C50,C
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S


In [7]:
df_drop_columns=df.dropna(axis=1)
df_drop_columns

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,SibSp,Parch,Ticket,Fare
0,1,0,3,"Braund, Mr. Owen Harris",male,1,0,A/5 21171,7.2500
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,1,0,PC 17599,71.2833
2,3,1,3,"Heikkinen, Miss. Laina",female,0,0,STON/O2. 3101282,7.9250
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,1,0,113803,53.1000
4,5,0,3,"Allen, Mr. William Henry",male,0,0,373450,8.0500
...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,0,0,211536,13.0000
887,888,1,1,"Graham, Miss. Margaret Edith",female,0,0,112053,30.0000
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,1,2,W./C. 6607,23.4500
889,890,1,1,"Behr, Mr. Karl Howell",male,0,0,111369,30.0000


In [8]:
df_mean=df.copy()
df_mean['Age'].fillna(df_mean['Age'].mean())

df_mean

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [9]:
df_mean

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [10]:
df_mean['Age'].fillna(df_mean['Age'].mode()[0],inplace=True)
print(f"\nAfter filling with mean/mode:")
df_mean


After filling with mean/mode:


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_mean['Age'].fillna(df_mean['Age'].mode()[0],inplace=True)


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,24.0,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [11]:
df_ffill = df.fillna(method='ffill')
df_bfill = df.fillna(method='bfill')
df_ffill

  df_ffill = df.fillna(method='ffill')
  df_bfill = df.fillna(method='bfill')


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,C85,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,C123,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,C50,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,19.0,1,2,W./C. 6607,23.4500,B42,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [12]:
df_bfill

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,C85,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,C123,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,E46,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,B42,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,26.0,1,2,W./C. 6607,23.4500,C148,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [17]:
imputer_mean = SimpleImputer(strategy='mean')
imputer_mode = SimpleImputer(strategy='most_frequent')
numerical_cols = ['PassengerId', 'Survived', 'Pclass', 'Age','SibSp','Parch','Fare']

df_imputed = df.copy()
df_imputed[numerical_cols] = imputer_mean.fit_transform(df_imputed[numerical_cols])
df_imputed[['Age']] = imputer_mode.fit_transform(df_imputed[['Age']])
print(f"\nAfter SimpleImputer:")
df_imputed


After SimpleImputer:


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1.0,0.0,3.0,"Braund, Mr. Owen Harris",male,22.000000,1.0,0.0,A/5 21171,7.2500,,S
1,2.0,1.0,1.0,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.000000,1.0,0.0,PC 17599,71.2833,C85,C
2,3.0,1.0,3.0,"Heikkinen, Miss. Laina",female,26.000000,0.0,0.0,STON/O2. 3101282,7.9250,,S
3,4.0,1.0,1.0,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.000000,1.0,0.0,113803,53.1000,C123,S
4,5.0,0.0,3.0,"Allen, Mr. William Henry",male,35.000000,0.0,0.0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887.0,0.0,2.0,"Montvila, Rev. Juozas",male,27.000000,0.0,0.0,211536,13.0000,,S
887,888.0,1.0,1.0,"Graham, Miss. Margaret Edith",female,19.000000,0.0,0.0,112053,30.0000,B42,S
888,889.0,0.0,3.0,"Johnston, Miss. Catherine Helen ""Carrie""",female,29.699118,1.0,2.0,W./C. 6607,23.4500,,S
889,890.0,1.0,1.0,"Behr, Mr. Karl Howell",male,26.000000,0.0,0.0,111369,30.0000,C148,C


In [18]:
knn_imputer = KNNImputer(n_neighbors=2)
df_knn = df.copy()
df_knn[numerical_cols] = knn_imputer.fit_transform(df_knn[numerical_cols])
print(f"\nAfter KNN Imputer (numerical columns only):")
print(df_knn[numerical_cols])


After KNN Imputer (numerical columns only):
     PassengerId  Survived  Pclass   Age  SibSp  Parch     Fare
0            1.0       0.0     3.0  22.0    1.0    0.0   7.2500
1            2.0       1.0     1.0  38.0    1.0    0.0  71.2833
2            3.0       1.0     3.0  26.0    0.0    0.0   7.9250
3            4.0       1.0     1.0  35.0    1.0    0.0  53.1000
4            5.0       0.0     3.0  35.0    0.0    0.0   8.0500
..           ...       ...     ...   ...    ...    ...      ...
886        887.0       0.0     2.0  27.0    0.0    0.0  13.0000
887        888.0       1.0     1.0  19.0    0.0    0.0  30.0000
888        889.0       0.0     3.0  32.5    1.0    2.0  23.4500
889        890.0       1.0     1.0  26.0    0.0    0.0  30.0000
890        891.0       0.0     3.0  32.0    0.0    0.0   7.7500

[891 rows x 7 columns]


In [20]:
df_1=pd.read_csv('flights.csv')
print(df_1)
print(df_1.describe())

     year      month  passengers
0    1949    January         112
1    1949   February         118
2    1949      March         132
3    1949      April         129
4    1949        May         121
..    ...        ...         ...
139  1960     August         606
140  1960  September         508
141  1960    October         461
142  1960   November         390
143  1960   December         432

[144 rows x 3 columns]
              year  passengers
count   144.000000  144.000000
mean   1954.500000  280.298611
std       3.464102  119.966317
min    1949.000000  104.000000
25%    1951.750000  180.000000
50%    1954.500000  265.500000
75%    1957.250000  360.500000
max    1960.000000  622.000000


In [23]:
df_1.sample(5)

Unnamed: 0,year,month,passengers
65,1954,June,264
59,1953,December,201
110,1958,March,362
112,1958,May,363
103,1957,August,467


In [26]:
scaler_minmax = MinMaxScaler()
numeric_cols = df_1.select_dtypes(include=['number']).columns
df_1_scaled = df_1.copy()
df_1_scaled[numeric_cols] = scaler_minmax.fit_transform(df_1[numeric_cols])

print("\nAfter Min-Max Scaling (0-1 range):")
print(df_1_scaled.describe())


After Min-Max Scaling (0-1 range):
             year  passengers
count  144.000000  144.000000
mean     0.500000    0.340345
std      0.314918    0.231595
min      0.000000    0.000000
25%      0.250000    0.146718
50%      0.500000    0.311776
75%      0.750000    0.495174
max      1.000000    1.000000


In [27]:
df_1_scaled.sample(5)

Unnamed: 0,year,month,passengers
103,0.727273,August,0.700772
114,0.818182,July,0.747104
83,0.545455,December,0.335907
141,1.0,October,0.689189
87,0.636364,April,0.403475


In [29]:
scaler_standard = StandardScaler()
numeric_cols = df_1.select_dtypes(include=['number']).columns
housing_data_scaled = df_1.copy()
housing_data_scaled[numeric_cols] = scaler_standard.fit_transform(df_1[numeric_cols])

print("\nAfter Standardization (mean=0, std=1):")
print(housing_data_scaled.describe())


After Standardization (mean=0, std=1):
               year    passengers
count  1.440000e+02  1.440000e+02
mean   9.868649e-17  1.973730e-16
std    1.003490e+00  1.003490e+00
min   -1.593255e+00 -1.474697e+00
25%   -7.966275e-01 -8.389746e-01
50%    0.000000e+00 -1.237869e-01
75%    7.966275e-01  6.708660e-01
max    1.593255e+00  2.858253e+00


In [30]:
housing_data_scaled.sample(5)

Unnamed: 0,year,month,passengers
54,-0.434524,July,-0.136334
23,-1.303572,December,-1.173565
34,-1.01389,November,-1.123377
42,-0.724207,July,-0.420736
8,-1.593255,September,-1.207024


In [32]:
scaler_robust = RobustScaler()
numeric_cols = df_1.select_dtypes(include=['number']).columns
housing_robust = df_1.copy()
housing_robust[numeric_cols] = scaler_robust.fit_transform(df_1[numeric_cols])

print("\nAfter Robust Scaling:")
print(housing_robust.describe())


After Robust Scaling:
               year  passengers
count  1.440000e+02  144.000000
mean   4.934325e-17    0.081987
std    6.298367e-01    0.664633
min   -1.000000e+00   -0.894737
25%   -5.000000e-01   -0.473684
50%    0.000000e+00    0.000000
75%    5.000000e-01    0.526316
max    1.000000e+00    1.975069


In [34]:
from sklearn.preprocessing import normalize
housing_unit = df_1.copy()
numeric_cols = df_1.select_dtypes(include=['number']).columns
housing_unit[numeric_cols] = normalize(df_1[numeric_cols], norm='l2')

print("\nAfter Unit Vector Scaling:")
print(housing_unit.describe())


After Unit Vector Scaling:
             year  passengers
count  144.000000  144.000000
mean     0.988231    0.141059
std      0.009518    0.058616
min      0.953155    0.053285
25%      0.983486    0.091812
50%      0.990895    0.134638
75%      0.995776    0.180983
max      0.998579    0.302481


In [35]:
housing_unit.head(5)

Unnamed: 0,year,month,passengers
0,0.998353,January,0.057371
1,0.998172,February,0.060433
2,0.997714,March,0.067572
3,0.997817,April,0.066043
4,0.998078,May,0.061964


In [36]:
categorical_label = df_1.copy()
le_month = LabelEncoder()
categorical_label['month_encoded'] = le_month.fit_transform(categorical_label['month'])

print("\nAfter Label Encoding:")
print(categorical_label[['month', 'month_encoded']].head())


After Label Encoding:
      month  month_encoded
0   January              4
1  February              3
2     March              7
3     April              0
4       May              8


In [37]:
categorical_label.sample(5)

Unnamed: 0,year,month,passengers,month_encoded
93,1956,October,306,10
90,1956,July,413,5
46,1952,November,172,9
50,1953,March,236,7
29,1951,June,178,6


In [38]:
categorical_onehot = pd.get_dummies(df_1, columns=['month'], prefix=['month'])
print("\nAfter One-Hot Encoding:")
print(categorical_onehot.head())



After One-Hot Encoding:
   year  passengers  month_April  month_August  month_December  \
0  1949         112        False         False           False   
1  1949         118        False         False           False   
2  1949         132        False         False           False   
3  1949         129         True         False           False   
4  1949         121        False         False           False   

   month_February  month_January  month_July  month_June  month_March  \
0           False           True       False       False        False   
1            True          False       False       False        False   
2           False          False       False       False         True   
3           False          False       False       False        False   
4           False          False       False       False        False   

   month_May  month_November  month_October  month_September  
0      False           False          False            False  
1      False 

In [39]:
categorical_onehot.head()

Unnamed: 0,year,passengers,month_April,month_August,month_December,month_February,month_January,month_July,month_June,month_March,month_May,month_November,month_October,month_September
0,1949,112,False,False,False,False,True,False,False,False,False,False,False,False
1,1949,118,False,False,False,True,False,False,False,False,False,False,False,False
2,1949,132,False,False,False,False,False,False,False,True,False,False,False,False
3,1949,129,True,False,False,False,False,False,False,False,False,False,False,False
4,1949,121,False,False,False,False,False,False,False,False,True,False,False,False


In [40]:
from sklearn.preprocessing import OrdinalEncoder
month_order = [['January', 'February', 'March', 'April', 'May', 'June',
                'July', 'August', 'September', 'October', 'November', 'December']]

ordinal_encoder = OrdinalEncoder(categories=month_order)
month_ordinal = ordinal_encoder.fit_transform(df_1[['month']])

print("\nOrdinal Encoding for 'month':")
print("Original:", df_1['month'].tolist()[:12])
print("Encoded:", month_ordinal.flatten().astype(int).tolist()[:12])


Ordinal Encoding for 'month':
Original: ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
Encoded: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]


In [41]:
df_1['passengers_bins_equal'] = pd.cut(
    df_1['passengers'],
    bins=4,
    labels=['Low', 'Moderate', 'High', 'Very High']
)

print("\nEqual-width binning for passengers:")
print(df_1[['passengers', 'passengers_bins_equal']].head(10))


Equal-width binning for passengers:
   passengers passengers_bins_equal
0         112                   Low
1         118                   Low
2         132                   Low
3         129                   Low
4         121                   Low
5         135                   Low
6         148                   Low
7         148                   Low
8         136                   Low
9         119                   Low


In [42]:
df_1.head(5)

Unnamed: 0,year,month,passengers,passengers_bins_equal
0,1949,January,112,Low
1,1949,February,118,Low
2,1949,March,132,Low
3,1949,April,129,Low
4,1949,May,121,Low


In [43]:
df_1['passengers_bins_quantile'] = pd.qcut(
    df_1['passengers'],
    q=4,
    labels=['Q1', 'Q2', 'Q3', 'Q4']
)

print("\nEqual-frequency binning for passengers:")
print(df_1[['passengers', 'passengers_bins_quantile']].head(10))


Equal-frequency binning for passengers:
   passengers passengers_bins_quantile
0         112                       Q1
1         118                       Q1
2         132                       Q1
3         129                       Q1
4         121                       Q1
5         135                       Q1
6         148                       Q1
7         148                       Q1
8         136                       Q1
9         119                       Q1


In [44]:
df_1.head(5)

Unnamed: 0,year,month,passengers,passengers_bins_equal,passengers_bins_quantile
0,1949,January,112,Low,Q1
1,1949,February,118,Low,Q1
2,1949,March,132,Low,Q1
3,1949,April,129,Low,Q1
4,1949,May,121,Low,Q1


In [45]:
custom_bins = [0, 100, 200, 300, 500]
custom_labels = ['Low', 'Moderate', 'High', 'Very High']

df_1['passengers_bins_custom'] = pd.cut(
    df_1['passengers'],
    bins=custom_bins,
    labels=custom_labels
)

print("\nCustom binning for passengers:")
print(df_1[['passengers', 'passengers_bins_custom']].head(10))



Custom binning for passengers:
   passengers passengers_bins_custom
0         112               Moderate
1         118               Moderate
2         132               Moderate
3         129               Moderate
4         121               Moderate
5         135               Moderate
6         148               Moderate
7         148               Moderate
8         136               Moderate
9         119               Moderate


In [46]:
df_1.head()

Unnamed: 0,year,month,passengers,passengers_bins_equal,passengers_bins_quantile,passengers_bins_custom
0,1949,January,112,Low,Q1,Moderate
1,1949,February,118,Low,Q1,Moderate
2,1949,March,132,Low,Q1,Moderate
3,1949,April,129,Low,Q1,Moderate
4,1949,May,121,Low,Q1,Moderate
