# Import Libraries

In [48]:
import pandas as pd 
import numpy as np
import re
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error as mae


In [2]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# Load the Data

In [3]:
data = pd.read_csv('train.csv')

# Getting to Know the Data

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB


Notes:

Looks like there is some missing data in the **Age** and **Cabin** and **Embarked**.

In [5]:
data

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


# Clean the Data

## Dropping the Cabin Column - Only 22% of the cabin numbers were provided.

In [6]:
data = data.drop(['Cabin'], axis=1)

## Removing Passengers that have unkown Embarked Locations.

Note: Only two of the remaining passangers have are missing there Embarked locations.

In [7]:
Embarked_rms = [n for n, x in zip(data.index, data['Embarked'].isnull()) if x is True]

In [8]:
data = data.drop(Embarked_rms, axis=0)

In [9]:
data = data.reset_index()

## Filling in Missing Ages

In [10]:
print(f"There are {data['Age'].isnull().sum()} missing age entries and that is {(data['Age'].isnull().sum()/len(data))*100:0.2f}% of the remaining data.")

There are 177 missing age entries and that is 19.91% of the remaining data.


In [11]:
print(f"The average age is {data['Age'].mean():0.2f} +/- {data['Age'].std():0.2f} years.")

The average age is 29.64 +/- 14.49 years.


## Make New Column of Data Using Titles

In [12]:
titles = []
for n, name, sex, age in zip(data.index, data['Name'], data['Sex'], data['Age']):
    split = re.split('\s+', name)
    for word in split:
        if "." in word and len(word) > 2:
            if word == "Mlle." or word == "Mme.": 
                titles.append("Miss.")
            else:
                titles.append(word) 
            
    if '.' not in name:
        print(name)

Does each person have a title?

In [13]:
print(f"{len(data)} = {len(titles)}")

889 = 889


In [14]:
data['Title'] = titles

In [15]:
data

Unnamed: 0,index,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Embarked,Title
0,0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,S,Mr.
1,1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C,Mrs.
2,2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,S,Miss.
3,3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,S,Mrs.
4,4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,S,Mr.
5,5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,Q,Mr.
6,6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,S,Mr.
7,7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,S,Master.
8,8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,S,Mrs.
9,9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,C,Mrs.


## Making New Columns by Decomposing Ticket Information

In [16]:
check = "A12"
print(check.isdigit())

False


In [21]:
Ticket_Ex = []
#T1 = 
#T2 = 
#T3 = 
for tick in data['Ticket']:
    if tick.isdigit():
        Ticket_Ex.append(0)
    else: 
        Ticket_Ex.append(1)

In [22]:
data['Ticket_Ex'] = Ticket_Ex

In [23]:
data

Unnamed: 0,index,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Embarked,Title,Ticket_Ex
0,0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,S,Mr.,1
1,1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C,Mrs.,1
2,2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,S,Miss.,1
3,3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,S,Mrs.,0
4,4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,S,Mr.,0
5,5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,Q,Mr.,0
6,6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,S,Mr.,0
7,7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,S,Master.,0
8,8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,S,Mrs.,0
9,9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,C,Mrs.,0


## Converting All Categorical Data to Numerical Data

For now I will remove all strings that can not be converted into numerical data such as the names of the passanger (**Name**) and ticket IDs (**Ticket**).

In [24]:
data = data.drop(['Name','Ticket'], axis=1)

In [25]:
data.head()

Unnamed: 0,index,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Title,Ticket_Ex
0,0,1,0,3,male,22.0,1,0,7.25,S,Mr.,1
1,1,2,1,1,female,38.0,1,0,71.2833,C,Mrs.,1
2,2,3,1,3,female,26.0,0,0,7.925,S,Miss.,1
3,3,4,1,1,female,35.0,1,0,53.1,S,Mrs.,0
4,4,5,0,3,male,35.0,0,0,8.05,S,Mr.,0


No I will use the OrdinalEncoder to convert the **Sex** and **Embarked** columns into numerical data.

In [26]:
o_encoder = OrdinalEncoder()
columns_4_o_encoder = ['Sex', 'Embarked']
data[columns_4_o_encoder] = o_encoder.fit_transform(data[columns_4_o_encoder])

In [27]:
data.head()

Unnamed: 0,index,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Title,Ticket_Ex
0,0,1,0,3,1.0,22.0,1,0,7.25,2.0,Mr.,1
1,1,2,1,1,0.0,38.0,1,0,71.2833,0.0,Mrs.,1
2,2,3,1,3,0.0,26.0,0,0,7.925,2.0,Miss.,1
3,3,4,1,1,0.0,35.0,1,0,53.1,2.0,Mrs.,0
4,4,5,0,3,1.0,35.0,0,0,8.05,2.0,Mr.,0


### Use One-hot Enconding on the Passenger Titles.

Since there ~15 different passenger titles (rather low cardinality) they will be treated using One-hot Encoding.

In [28]:
OHE = OneHotEncoder(handle_unknown='ignore', sparse=False)
OHE_df = pd.DataFrame(OHE.fit_transform(data['Title'].to_numpy().reshape(-1,1)))
data = pd.concat([data, OHE_df], axis=1).drop(['Title'], axis=1)

# Current State of the Data

In [29]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 889 entries, 0 to 888
Data columns (total 26 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   index        889 non-null    int64  
 1   PassengerId  889 non-null    int64  
 2   Survived     889 non-null    int64  
 3   Pclass       889 non-null    int64  
 4   Sex          889 non-null    float64
 5   Age          712 non-null    float64
 6   SibSp        889 non-null    int64  
 7   Parch        889 non-null    int64  
 8   Fare         889 non-null    float64
 9   Embarked     889 non-null    float64
 10  Ticket_Ex    889 non-null    int64  
 11  0            889 non-null    float64
 12  1            889 non-null    float64
 13  2            889 non-null    float64
 14  3            889 non-null    float64
 15  4            889 non-null    float64
 16  5            889 non-null    float64
 17  6            889 non-null    float64
 18  7            889 non-null    float64
 19  8       

The ages are still missing, but all the rest of the data is filled in. Now I will attempt to calculate the missing ages using a regressor. 

# Setting up and Using Several Machine Learning Methods

## Create a new dataframe that will be used to train and test regressor for prediciting the missing ages.

In [30]:
passengers_w_missing_ages = [n for n, x in zip(data.index, data['Age'].isnull()) if x is True]

The following dataframe is what you will use you trained model on.

In [31]:
passengers_wo_ages_test = data[data.index.isin(passengers_w_missing_ages)]

In [32]:
passengers_wo_ages_test

Unnamed: 0,index,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Ticket_Ex,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
5,5,6,0,3,1.0,,0,0,8.4583,1.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
17,17,18,1,2,1.0,,0,0,13.0,2.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
19,19,20,1,3,0.0,,0,0,7.225,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
26,26,27,0,3,1.0,,0,0,7.225,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
28,28,29,1,3,0.0,,0,0,7.8792,1.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
29,29,30,0,3,1.0,,0,0,7.8958,2.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
31,31,32,1,1,0.0,,1,0,146.5208,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
32,32,33,1,3,0.0,,0,0,7.75,1.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
36,36,37,1,3,1.0,,0,0,7.2292,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
42,42,43,0,3,1.0,,0,0,7.8958,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


This is the data set that you will use to train your regressor.

In [33]:
passengers_w_ages_train = data.drop(passengers_w_missing_ages, axis=0)

In [34]:
passengers_w_ages_train

Unnamed: 0,index,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Ticket_Ex,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,0,1,0,3,1.0,22.0,1,0,7.25,2.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,1,2,1,1,0.0,38.0,1,0,71.2833,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,2,3,1,3,0.0,26.0,0,0,7.925,2.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,3,4,1,1,0.0,35.0,1,0,53.1,2.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,4,5,0,3,1.0,35.0,0,0,8.05,2.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
6,6,7,0,1,1.0,54.0,0,0,51.8625,2.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
7,7,8,0,3,1.0,2.0,3,1,21.075,2.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
8,8,9,1,3,0.0,27.0,0,2,11.1333,2.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
9,9,10,1,2,0.0,14.0,1,0,30.0708,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
10,10,11,1,3,0.0,4.0,1,1,16.7,2.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


## Creating Train and Testing Data to Build and Test the Model

In [35]:
features = [x for x in passengers_w_ages_train.columns if x != 'Age' if x != 'PassengerId']

In [36]:
print(f"Total Number of Features: {len(features)}")

Total Number of Features: 24


In [37]:
x = passengers_w_ages_train[features]
y = passengers_w_ages_train['Age']

In [38]:
x_train, x_valid, y_train, y_valid = train_test_split(x, y, train_size=0.8, test_size=0.2, random_state=42)

## Using the Random Forest Regressor

In [39]:
from sklearn.ensemble import RandomForestRegressor

In [40]:
RF_model = RandomForestRegressor(n_estimators=100, random_state=42)
RF_model.fit(x_train, y_train)
RF_preds = RF_model.predict(x_valid)

In [41]:
print(f"The Mean Absolute Error in the Age Prediction is {mae(y_valid, RF_preds):0.2f} years.")
print(f"The Mean Absolute Error in using the mean age is {mae(y_valid, [data['Age'].mean() for i in range(len(y_valid))]):0.2f} years.")

The Mean Absolute Error in the Age Prediction is 8.85 years.
The Mean Absolute Error in using the mean age is 12.61 years.


In [42]:
from sklearn.ensemble import GradientBoostingRegressor

In [82]:
def booster(xs, ys, zs, x_train, y_train, x_valid, y_valid):
    check = []
    for x in xs:
        for y in ys:
            for z in zs:
                GBR_model = GradientBoostingRegressor(learning_rate=x, n_estimators=z, subsample=y, random_state=42)
                GBR_model.fit(x_train, y_train)
                GBR_preds = GBR_model.predict(x_valid)
                check.append(mae(y_valid, GBR_preds))
                print(mae(y_valid, GBR_preds), x, y, z)
    return(min(check))

In [83]:
xs = list(np.linspace(0.001,0.1, 40))
ys = list(np.linspace(0.01,  1, 20))
zs = [50, 100, 150, 200]

In [84]:
booster(xs, ys, zs, x_train, y_train, x_valid, y_valid)

12.553501805956731 0.001 0.01 50
12.493577206202225 0.001 0.01 100
12.462449902831219 0.001 0.01 150
12.41820064060368 0.001 0.01 200
12.48578672358777 0.001 0.06210526315789474 50
12.35037416725419 0.001 0.06210526315789474 100
12.25513766966424 0.001 0.06210526315789474 150
12.140766942688305 0.001 0.06210526315789474 200
12.444076666976239 0.001 0.11421052631578947 50
12.295427367210468 0.001 0.11421052631578947 100
12.164596017524444 0.001 0.11421052631578947 150
12.037470313774065 0.001 0.11421052631578947 200
12.447174737222515 0.001 0.16631578947368422 50
12.295810563566748 0.001 0.16631578947368422 100
12.137894336834307 0.001 0.16631578947368422 150
11.996784199991026 0.001 0.16631578947368422 200
12.427319120166601 0.001 0.21842105263157896 50
12.256447392858773 0.001 0.21842105263157896 100
12.088180293552849 0.001 0.21842105263157896 150
11.93207410408103 0.001 0.21842105263157896 200
12.405304354095653 0.001 0.2705263157894737 50
12.214804472803339 0.001 0.2705263157894737

10.318912770518464 0.0035384615384615385 0.9478947368421053 200
11.832025935130215 0.0035384615384615385 1.0 50
11.17458199785341 0.0035384615384615385 1.0 100
10.692892733104442 0.0035384615384615385 1.0 150
10.322040140068221 0.0035384615384615385 1.0 200
12.304738072414999 0.006076923076923077 0.01 50
11.99702279039528 0.006076923076923077 0.01 100
11.828854329010792 0.006076923076923077 0.01 150
11.634818855348461 0.006076923076923077 0.01 200
11.917671196100228 0.006076923076923077 0.06210526315789474 50
11.276180357502206 0.006076923076923077 0.06210526315789474 100
10.910899362663248 0.006076923076923077 0.06210526315789474 150
10.642151244795558 0.006076923076923077 0.06210526315789474 200
11.687013533679826 0.006076923076923077 0.11421052631578947 50
11.064176883545679 0.006076923076923077 0.11421052631578947 100
10.611386812905426 0.006076923076923077 0.11421052631578947 150
10.296142010208028 0.006076923076923077 0.11421052631578947 200
11.721866353183119 0.00607692307692307

11.024978697585649 0.008615384615384615 0.6873684210526316 50
10.070755689382295 0.008615384615384615 0.6873684210526316 100
9.53313982376904 0.008615384615384615 0.6873684210526316 150
9.226689762323833 0.008615384615384615 0.6873684210526316 200
11.025278748142167 0.008615384615384615 0.7394736842105263 50
10.069945853760299 0.008615384615384615 0.7394736842105263 100
9.511759465356587 0.008615384615384615 0.7394736842105263 150
9.25051377471201 0.008615384615384615 0.7394736842105263 200
10.99855257546448 0.008615384615384615 0.791578947368421 50
10.077699881308922 0.008615384615384615 0.791578947368421 100
9.515623000827453 0.008615384615384615 0.791578947368421 150
9.239412101410881 0.008615384615384615 0.791578947368421 200
10.996404722638351 0.008615384615384615 0.8436842105263158 50
10.038135848376161 0.008615384615384615 0.8436842105263158 100
9.506290616884177 0.008615384615384615 0.8436842105263158 150
9.191942535081987 0.008615384615384615 0.8436842105263158 200
10.97873547

9.187227243046589 0.013692307692307691 0.37473684210526315 150
8.993807201982468 0.013692307692307691 0.37473684210526315 200
10.464618631429287 0.013692307692307691 0.4268421052631579 50
9.590392769192787 0.013692307692307691 0.4268421052631579 100
9.178081907789597 0.013692307692307691 0.4268421052631579 150
8.950031722740693 0.013692307692307691 0.4268421052631579 200
10.475407555261933 0.013692307692307691 0.4789473684210527 50
9.616952729035138 0.013692307692307691 0.4789473684210527 100
9.213115111132069 0.013692307692307691 0.4789473684210527 150
8.963766764324403 0.013692307692307691 0.4789473684210527 200
10.44748867011139 0.013692307692307691 0.5310526315789474 50
9.553487823087224 0.013692307692307691 0.5310526315789474 100
9.17600990886579 0.013692307692307691 0.5310526315789474 150
8.996786017314111 0.013692307692307691 0.5310526315789474 200
10.402652875515782 0.013692307692307691 0.5831578947368421 50
9.44194273966602 0.013692307692307691 0.5831578947368421 100
9.0971832

9.417796482733918 0.01876923076923077 0.06210526315789474 200
10.481525908063505 0.01876923076923077 0.11421052631578947 50
9.586647190665005 0.01876923076923077 0.11421052631578947 100
9.322335811327056 0.01876923076923077 0.11421052631578947 150
9.110133941710686 0.01876923076923077 0.11421052631578947 200
10.57354440356358 0.01876923076923077 0.16631578947368422 50
9.744130092472842 0.01876923076923077 0.16631578947368422 100
9.371151017568788 0.01876923076923077 0.16631578947368422 150
9.217870978250366 0.01876923076923077 0.16631578947368422 200
10.316057037340078 0.01876923076923077 0.21842105263157896 50
9.507900703711412 0.01876923076923077 0.21842105263157896 100
9.225941609760866 0.01876923076923077 0.21842105263157896 150
9.069291245683738 0.01876923076923077 0.21842105263157896 200
10.183405706039727 0.01876923076923077 0.2705263157894737 50
9.427138725311677 0.01876923076923077 0.2705263157894737 100
9.070910422240562 0.01876923076923077 0.2705263157894737 150
8.9116495364

8.605070813248584 0.02130769230769231 0.8436842105263158 200
9.742415799085606 0.02130769230769231 0.8957894736842106 50
8.963977019176667 0.02130769230769231 0.8957894736842106 100
8.726117822527211 0.02130769230769231 0.8957894736842106 150
8.633478700019495 0.02130769230769231 0.8957894736842106 200
9.743239507018362 0.02130769230769231 0.9478947368421053 50
8.966728451035566 0.02130769230769231 0.9478947368421053 100
8.75211283339238 0.02130769230769231 0.9478947368421053 150
8.61054523935846 0.02130769230769231 0.9478947368421053 200
9.742857822768274 0.02130769230769231 1.0 50
8.98381827380663 0.02130769230769231 1.0 100
8.758747722758963 0.02130769230769231 1.0 150
8.65994105107598 0.02130769230769231 1.0 200
11.545275165040277 0.023846153846153847 0.01 50
10.85029950897301 0.023846153846153847 0.01 100
10.9351298824837 0.023846153846153847 0.01 150
10.41777864655335 0.023846153846153847 0.01 200
10.63235048308411 0.023846153846153847 0.06210526315789474 50
9.574547495262259 0.0

8.898799966250538 0.026384615384615385 0.5831578947368421 100
8.701475134725356 0.026384615384615385 0.5831578947368421 150
8.663806635665134 0.026384615384615385 0.5831578947368421 200
9.505046060224846 0.026384615384615385 0.6352631578947369 50
8.879059153080288 0.026384615384615385 0.6352631578947369 100
8.691154451689684 0.026384615384615385 0.6352631578947369 150
8.564584309454517 0.026384615384615385 0.6352631578947369 200
9.53236432515002 0.026384615384615385 0.6873684210526316 50
8.920695246727952 0.026384615384615385 0.6873684210526316 100
8.758789350313137 0.026384615384615385 0.6873684210526316 150
8.527299937418697 0.026384615384615385 0.6873684210526316 200
9.489675219044262 0.026384615384615385 0.7394736842105263 50
8.83882036638946 0.026384615384615385 0.7394736842105263 100
8.643215531409814 0.026384615384615385 0.7394736842105263 150
8.52997717655823 0.026384615384615385 0.7394736842105263 200
9.504324955775093 0.026384615384615385 0.791578947368421 50
8.81943157733898

8.598139060205257 0.031461538461538464 0.32263157894736844 150
8.516707854628978 0.031461538461538464 0.32263157894736844 200
9.381287820829739 0.031461538461538464 0.37473684210526315 50
8.882819569704157 0.031461538461538464 0.37473684210526315 100
8.681623213501636 0.031461538461538464 0.37473684210526315 150
8.689717437744703 0.031461538461538464 0.37473684210526315 200
9.395407673771281 0.031461538461538464 0.4268421052631579 50
8.785382360336438 0.031461538461538464 0.4268421052631579 100
8.612100895140118 0.031461538461538464 0.4268421052631579 150
8.566004029818812 0.031461538461538464 0.4268421052631579 200
9.381806214887687 0.031461538461538464 0.4789473684210527 50
8.858751462038564 0.031461538461538464 0.4789473684210527 100
8.660929160932909 0.031461538461538464 0.4789473684210527 150
8.56777122770962 0.031461538461538464 0.4789473684210527 200
9.352953251987985 0.031461538461538464 0.5310526315789474 50
8.875975550111821 0.031461538461538464 0.5310526315789474 100
8.76656

8.584151443227476 0.03653846153846154 0.32263157894736844 150
8.493099487700652 0.03653846153846154 0.32263157894736844 200
9.261931933825144 0.03653846153846154 0.37473684210526315 50
8.81170779019701 0.03653846153846154 0.37473684210526315 100
8.717097365112537 0.03653846153846154 0.37473684210526315 150
8.75160545006713 0.03653846153846154 0.37473684210526315 200
9.225325504867492 0.03653846153846154 0.4268421052631579 50
8.679072624887706 0.03653846153846154 0.4268421052631579 100
8.573632758818555 0.03653846153846154 0.4268421052631579 150
8.574929737335076 0.03653846153846154 0.4268421052631579 200
9.1987298907142 0.03653846153846154 0.4789473684210527 50
8.746094375601956 0.03653846153846154 0.4789473684210527 100
8.661096154420404 0.03653846153846154 0.4789473684210527 150
8.586821739452741 0.03653846153846154 0.4789473684210527 200
9.253457171560624 0.03653846153846154 0.5310526315789474 50
8.825126120838904 0.03653846153846154 0.5310526315789474 100
8.656465310980586 0.036538

9.084459996578376 0.04161538461538462 0.06210526315789474 200
9.499495483408976 0.04161538461538462 0.11421052631578947 50
8.914383129636327 0.04161538461538462 0.11421052631578947 100
8.720602348578797 0.04161538461538462 0.11421052631578947 150
8.622361670760865 0.04161538461538462 0.11421052631578947 200
9.614338316870727 0.04161538461538462 0.16631578947368422 50
8.97890988196833 0.04161538461538462 0.16631578947368422 100
8.722239862809115 0.04161538461538462 0.16631578947368422 150
8.791666010567408 0.04161538461538462 0.16631578947368422 200
9.386474628332842 0.04161538461538462 0.21842105263157896 50
8.902035352404853 0.04161538461538462 0.21842105263157896 100
8.818716161481 0.04161538461538462 0.21842105263157896 150
8.753614124127994 0.04161538461538462 0.21842105263157896 200
9.408910362955746 0.04161538461538462 0.2705263157894737 50
8.958010075096608 0.04161538461538462 0.2705263157894737 100
8.771743397105555 0.04161538461538462 0.2705263157894737 150
8.643010784548407 0

8.619074035360738 0.044153846153846155 0.8436842105263158 100
8.557214436837192 0.044153846153846155 0.8436842105263158 150
8.498048217958248 0.044153846153846155 0.8436842105263158 200
8.86662893468643 0.044153846153846155 0.8957894736842106 50
8.529676410746507 0.044153846153846155 0.8957894736842106 100
8.437206822864832 0.044153846153846155 0.8957894736842106 150
8.43713821741337 0.044153846153846155 0.8957894736842106 200
8.928412219032582 0.044153846153846155 0.9478947368421053 50
8.580439311272452 0.044153846153846155 0.9478947368421053 100
8.487425786036615 0.044153846153846155 0.9478947368421053 150
8.492033305247219 0.044153846153846155 0.9478947368421053 200
8.934821158894687 0.044153846153846155 1.0 50
8.6132636810994 0.044153846153846155 1.0 100
8.528188960643257 0.044153846153846155 1.0 150
8.566980494527025 0.044153846153846155 1.0 200
10.958391127532577 0.04669230769230769 0.01 50
10.075221881915844 0.04669230769230769 0.01 100
10.13649796551909 0.04669230769230769 0.01

8.67661442539775 0.04923076923076923 0.5831578947368421 100
8.631269843663217 0.04923076923076923 0.5831578947368421 150
8.652427560695791 0.04923076923076923 0.5831578947368421 200
8.966476211828256 0.04923076923076923 0.6352631578947369 50
8.545107398640152 0.04923076923076923 0.6352631578947369 100
8.453049857318407 0.04923076923076923 0.6352631578947369 150
8.375567504646847 0.04923076923076923 0.6352631578947369 200
8.916953888143745 0.04923076923076923 0.6873684210526316 50
8.652400294020186 0.04923076923076923 0.6873684210526316 100
8.550406313845453 0.04923076923076923 0.6873684210526316 150
8.564354479061942 0.04923076923076923 0.6873684210526316 200
8.942327339592746 0.04923076923076923 0.7394736842105263 50
8.570962524376364 0.04923076923076923 0.7394736842105263 100
8.602237497862522 0.04923076923076923 0.7394736842105263 150
8.550211987763715 0.04923076923076923 0.7394736842105263 200
8.875326665180218 0.04923076923076923 0.791578947368421 50
8.518520379562416 0.0492307692

8.660713023228432 0.05430769230769231 0.32263157894736844 150
8.513112924042012 0.05430769230769231 0.32263157894736844 200
8.906028116258002 0.05430769230769231 0.37473684210526315 50
8.702252077526834 0.05430769230769231 0.37473684210526315 100
8.626073500345539 0.05430769230769231 0.37473684210526315 150
8.57446199677499 0.05430769230769231 0.37473684210526315 200
8.93606251691347 0.05430769230769231 0.4268421052631579 50
8.69896819968147 0.05430769230769231 0.4268421052631579 100
8.75114629277215 0.05430769230769231 0.4268421052631579 150
8.72880527405166 0.05430769230769231 0.4268421052631579 200
8.936031999223912 0.05430769230769231 0.4789473684210527 50
8.697428580060478 0.05430769230769231 0.4789473684210527 100
8.72044276260546 0.05430769230769231 0.4789473684210527 150
8.65496775067736 0.05430769230769231 0.4789473684210527 200
8.872232181736203 0.05430769230769231 0.5310526315789474 50
8.626754293023318 0.05430769230769231 0.5310526315789474 100
8.642580680763794 0.054307692

9.344677117832784 0.05938461538461538 0.06210526315789474 200
9.26285800714564 0.05938461538461538 0.11421052631578947 50
8.691695712414434 0.05938461538461538 0.11421052631578947 100
8.747098636956709 0.05938461538461538 0.11421052631578947 150
8.659450556523872 0.05938461538461538 0.11421052631578947 200
9.403262324648196 0.05938461538461538 0.16631578947368422 50
8.848293922158632 0.05938461538461538 0.16631578947368422 100
8.680716792736652 0.05938461538461538 0.16631578947368422 150
8.812442544875582 0.05938461538461538 0.16631578947368422 200
9.07605494235685 0.05938461538461538 0.21842105263157896 50
8.765073056640603 0.05938461538461538 0.21842105263157896 100
8.66852764566902 0.05938461538461538 0.21842105263157896 150
8.649816137863928 0.05938461538461538 0.21842105263157896 200
9.19411713429138 0.05938461538461538 0.2705263157894737 50
8.912837757910149 0.05938461538461538 0.2705263157894737 100
8.726059319142442 0.05938461538461538 0.2705263157894737 150
8.657798707884046 0

8.420230424060911 0.06192307692307693 0.8436842105263158 100
8.40271073401488 0.06192307692307693 0.8436842105263158 150
8.428737953239764 0.06192307692307693 0.8436842105263158 200
8.718884482888143 0.06192307692307693 0.8957894736842106 50
8.469064951086915 0.06192307692307693 0.8957894736842106 100
8.520266842489729 0.06192307692307693 0.8957894736842106 150
8.605272030159782 0.06192307692307693 0.8957894736842106 200
8.724253354091749 0.06192307692307693 0.9478947368421053 50
8.46535710699198 0.06192307692307693 0.9478947368421053 100
8.451702673456378 0.06192307692307693 0.9478947368421053 150
8.550674046016821 0.06192307692307693 0.9478947368421053 200
8.694341815849738 0.06192307692307693 1.0 50
8.549846640946681 0.06192307692307693 1.0 100
8.543394520092328 0.06192307692307693 1.0 150
8.599251306532652 0.06192307692307693 1.0 200
10.689731687409669 0.06446153846153846 0.01 50
10.001682899397883 0.06446153846153846 0.01 100
9.93038908160505 0.06446153846153846 0.01 150
10.935776

8.528862566305573 0.067 0.7394736842105263 100
8.586663224792456 0.067 0.7394736842105263 150
8.591818349387411 0.067 0.7394736842105263 200
8.72618343849525 0.067 0.791578947368421 50
8.587023146517438 0.067 0.791578947368421 100
8.689757143542774 0.067 0.791578947368421 150
8.657599632194955 0.067 0.791578947368421 200
8.726019462928846 0.067 0.8436842105263158 50
8.474969778443022 0.067 0.8436842105263158 100
8.490737508943242 0.067 0.8436842105263158 150
8.49457166050407 0.067 0.8436842105263158 200
8.738844056109313 0.067 0.8957894736842106 50
8.538370577607633 0.067 0.8957894736842106 100
8.546686658554343 0.067 0.8957894736842106 150
8.625172285474157 0.067 0.8957894736842106 200
8.713422473562396 0.067 0.9478947368421053 50
8.557389704624558 0.067 0.9478947368421053 100
8.516311219711973 0.067 0.9478947368421053 150
8.620665155689654 0.067 0.9478947368421053 200
8.71588422057642 0.067 1.0 50
8.540971456999552 0.067 1.0 100
8.606728142371534 0.067 1.0 150
8.68159111545404 0.067 

8.680783932459603 0.07207692307692308 0.5310526315789474 100
8.712733550309997 0.07207692307692308 0.5310526315789474 150
8.580945909437128 0.07207692307692308 0.5310526315789474 200
8.876343047395542 0.07207692307692308 0.5831578947368421 50
8.769952344387177 0.07207692307692308 0.5831578947368421 100
8.654209716919741 0.07207692307692308 0.5831578947368421 150
8.775144237038464 0.07207692307692308 0.5831578947368421 200
8.798374505302478 0.07207692307692308 0.6352631578947369 50
8.42600377097232 0.07207692307692308 0.6352631578947369 100
8.476425739758213 0.07207692307692308 0.6352631578947369 150
8.490126609285033 0.07207692307692308 0.6352631578947369 200
8.65752784325471 0.07207692307692308 0.6873684210526316 50
8.549388666226688 0.07207692307692308 0.6873684210526316 100
8.578710915780485 0.07207692307692308 0.6873684210526316 150
8.622494651664578 0.07207692307692308 0.6873684210526316 200
8.826425420420259 0.07207692307692308 0.7394736842105263 50
8.487049196076228 0.0720769230

8.418143004015375 0.07715384615384616 0.32263157894736844 100
8.481139231127749 0.07715384615384616 0.32263157894736844 150
8.63980311162334 0.07715384615384616 0.32263157894736844 200
8.813057421882986 0.07715384615384616 0.37473684210526315 50
8.852189517695422 0.07715384615384616 0.37473684210526315 100
8.720628450063552 0.07715384615384616 0.37473684210526315 150
8.651038913976121 0.07715384615384616 0.37473684210526315 200
8.773000494319005 0.07715384615384616 0.4268421052631579 50
8.611312337240212 0.07715384615384616 0.4268421052631579 100
8.537929592973025 0.07715384615384616 0.4268421052631579 150
8.691927190915454 0.07715384615384616 0.4268421052631579 200
8.695611108644364 0.07715384615384616 0.4789473684210527 50
8.526545732725726 0.07715384615384616 0.4789473684210527 100
8.684629987937607 0.07715384615384616 0.4789473684210527 150
8.715409634174003 0.07715384615384616 0.4789473684210527 200
8.730540673125109 0.07715384615384616 0.5310526315789474 50
8.562083292804752 0.07

9.169858370278753 0.08223076923076923 0.06210526315789474 200
9.166096781091976 0.08223076923076923 0.11421052631578947 50
9.172126951457257 0.08223076923076923 0.11421052631578947 100
9.206334673869568 0.08223076923076923 0.11421052631578947 150
9.16614394541482 0.08223076923076923 0.11421052631578947 200
9.274350571715487 0.08223076923076923 0.16631578947368422 50
8.803751688262647 0.08223076923076923 0.16631578947368422 100
8.676464983406541 0.08223076923076923 0.16631578947368422 150
8.897012147466008 0.08223076923076923 0.16631578947368422 200
8.914617950567411 0.08223076923076923 0.21842105263157896 50
8.49801501547444 0.08223076923076923 0.21842105263157896 100
8.70337819552383 0.08223076923076923 0.21842105263157896 150
8.719760006911148 0.08223076923076923 0.21842105263157896 200
8.839330826254317 0.08223076923076923 0.2705263157894737 50
8.413199692325794 0.08223076923076923 0.2705263157894737 100
8.358560542824344 0.08223076923076923 0.2705263157894737 150
8.48491736607044 0

8.491246257812476 0.08476923076923078 0.8436842105263158 200
8.629882552628457 0.08476923076923078 0.8957894736842106 50
8.506562511234847 0.08476923076923078 0.8957894736842106 100
8.467959291768432 0.08476923076923078 0.8957894736842106 150
8.604415744901067 0.08476923076923078 0.8957894736842106 200
8.456460254850812 0.08476923076923078 0.9478947368421053 50
8.47967956001719 0.08476923076923078 0.9478947368421053 100
8.547209222594915 0.08476923076923078 0.9478947368421053 150
8.623374698119981 0.08476923076923078 0.9478947368421053 200
8.615382768573328 0.08476923076923078 1.0 50
8.45591595319072 0.08476923076923078 1.0 100
8.479090008515318 0.08476923076923078 1.0 150
8.641792081324747 0.08476923076923078 1.0 200
11.044320964140926 0.08730769230769231 0.01 50
10.38849286349546 0.08730769230769231 0.01 100
10.233192985798862 0.08730769230769231 0.01 150
10.896397518795272 0.08730769230769231 0.01 200
9.802082827593196 0.08730769230769231 0.06210526315789474 50
9.348020137887103 0.0

8.68562914355604 0.08984615384615385 0.6352631578947369 50
8.484377128557114 0.08984615384615385 0.6352631578947369 100
8.571555012045664 0.08984615384615385 0.6352631578947369 150
8.567840673528405 0.08984615384615385 0.6352631578947369 200
8.701711813247142 0.08984615384615385 0.6873684210526316 50
8.614502273423218 0.08984615384615385 0.6873684210526316 100
8.66357491359992 0.08984615384615385 0.6873684210526316 150
8.760943891279968 0.08984615384615385 0.6873684210526316 200
8.558589761278844 0.08984615384615385 0.7394736842105263 50
8.520444728429862 0.08984615384615385 0.7394736842105263 100
8.612791700266438 0.08984615384615385 0.7394736842105263 150
8.602079082561632 0.08984615384615385 0.7394736842105263 200
8.614522342729273 0.08984615384615385 0.791578947368421 50
8.580386939039567 0.08984615384615385 0.791578947368421 100
8.675388828427417 0.08984615384615385 0.791578947368421 150
8.669440189511562 0.08984615384615385 0.791578947368421 200
8.73372161065338 0.089846153846153

8.765177815448315 0.09492307692307693 0.37473684210526315 200
8.596759401022615 0.09492307692307693 0.4268421052631579 50
8.53463125356481 0.09492307692307693 0.4268421052631579 100
8.688590089816818 0.09492307692307693 0.4268421052631579 150
8.857156848244095 0.09492307692307693 0.4268421052631579 200
8.623756732920508 0.09492307692307693 0.4789473684210527 50
8.772414551146857 0.09492307692307693 0.4789473684210527 100
8.864968414203963 0.09492307692307693 0.4789473684210527 150
8.889394585494607 0.09492307692307693 0.4789473684210527 200
8.79517739059284 0.09492307692307693 0.5310526315789474 50
8.580461314918221 0.09492307692307693 0.5310526315789474 100
8.431517496067881 0.09492307692307693 0.5310526315789474 150
8.527561340760894 0.09492307692307693 0.5310526315789474 200
8.610069237715715 0.09492307692307693 0.5831578947368421 50
8.538117630168703 0.09492307692307693 0.5831578947368421 100
8.496642915309286 0.09492307692307693 0.5831578947368421 150
8.590648579539584 0.094923076

8.991025992340814 0.1 0.16631578947368422 150
8.918457087583327 0.1 0.16631578947368422 200
8.650966167935364 0.1 0.21842105263157896 50
8.513771421709416 0.1 0.21842105263157896 100
8.88125307179861 0.1 0.21842105263157896 150
9.020998597484203 0.1 0.21842105263157896 200
8.989410516042758 0.1 0.2705263157894737 50
8.64511548390999 0.1 0.2705263157894737 100
8.797433976738766 0.1 0.2705263157894737 150
9.008780113516293 0.1 0.2705263157894737 200
8.696696755378097 0.1 0.32263157894736844 50
8.800808661601652 0.1 0.32263157894736844 100
9.01232461957106 0.1 0.32263157894736844 150
9.201136431989944 0.1 0.32263157894736844 200
8.49024126324251 0.1 0.37473684210526315 50
8.322933068074642 0.1 0.37473684210526315 100
8.580296255417762 0.1 0.37473684210526315 150
8.670300634276183 0.1 0.37473684210526315 200
8.594157562537024 0.1 0.4268421052631579 50
8.610279011929329 0.1 0.4268421052631579 100
8.58571205339382 0.1 0.4268421052631579 150
8.82206752258527 0.1 0.4268421052631579 200
8.68240

8.288999750809802

In [87]:
GBR_model = GradientBoostingRegressor(learning_rate=0.08223076923076923, n_estimators=150, subsample=0.4268421052631579, random_state=42)
GBR_model.fit(x_train, y_train)
GBR_preds = GBR_model.predict(x_valid)

In [88]:
print(f"The Mean Absolute Error in the Age Prediction is {mae(y_valid, GBR_preds):0.2f} years.")
print(f"The Mean Absolute Error in using the mean age is {mae(y_valid, [data['Age'].mean() for i in range(len(y_valid))]):0.2f} years.")

The Mean Absolute Error in the Age Prediction is 8.29 years.
The Mean Absolute Error in using the mean age is 12.61 years.


# Conclusion 

In conclusion there is a slight increase in the age prediction compared to using a standard mean value for all missing ages and should be added back into the main dataset. 

# Finally Prediction of the Missing Ages

Using the best model from above generate predictions of the missing ages. 

In [89]:
final_x = passengers_wo_ages_test[features]

In [90]:
final_ages = GBR_model.predict(final_x)

In [91]:
final_ages

array([34.40928611, 36.53011749, 32.87838269, 28.426806  , 23.96020775,
       35.40278562, 32.1698923 , 33.97374283, 22.2799067 , 24.05280737,
       26.44249426, 28.68676644, 23.04746895, 22.23175344, 43.13678949,
       45.40572015,  4.3718939 , 27.81338077, 30.09612662, 20.98844269,
       29.46970526, 30.32662408, 28.69482114, 26.3544944 , 18.03127099,
       29.60062186, 34.62610051,  7.04082209, 24.47313947, 31.40981822,
       34.40384854,  9.20587019, 36.47224212, 47.37986555,  6.56745677,
       10.49968726, 31.95869194, 48.18497636, 27.57594841, 35.52564659,
       26.83748171, 19.13988339, 31.82855583, 28.55803169,  9.46501548,
       24.57818532, 19.96064027, 18.84227865, 30.08828417, 41.08131694,
       38.14823126, 31.82799924, 48.03248137, 29.6532407 , 33.62821669,
       48.53536579, 43.58125776, 38.88248596, 26.21363427, 23.03051333,
       25.32174692, 30.82509898, 30.64242392, 19.98178511, 15.19545631,
       34.28703317, 29.091462  , 27.21750108, 49.02779975, 27.50

# Replace Data

In [92]:
missing_ages_filled = passengers_wo_ages_test.drop(['Age'], axis=1)

In [93]:
missing_ages_filled['Age'] = final_ages

In [94]:
missing_ages_filled = missing_ages_filled[passengers_w_ages_train.columns.to_list()] 

In [95]:
missing_ages_filled

Unnamed: 0,index,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Ticket_Ex,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
5,5,6,0,3,1.0,34.409286,0,0,8.4583,1.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
17,17,18,1,2,1.0,36.530117,0,0,13.0,2.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
19,19,20,1,3,0.0,32.878383,0,0,7.225,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
26,26,27,0,3,1.0,28.426806,0,0,7.225,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
28,28,29,1,3,0.0,23.960208,0,0,7.8792,1.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
29,29,30,0,3,1.0,35.402786,0,0,7.8958,2.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
31,31,32,1,1,0.0,32.169892,1,0,146.5208,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
32,32,33,1,3,0.0,33.973743,0,0,7.75,1.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
36,36,37,1,3,1.0,22.279907,0,0,7.2292,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
42,42,43,0,3,1.0,24.052807,0,0,7.8958,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [96]:
final_data = pd.concat([passengers_w_ages_train, missing_ages_filled], axis=0).drop(['index'], axis=1)
final_data.sort_values(['PassengerId'], inplace=True)

In [97]:
final_data

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Ticket_Ex,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,1,0,3,1.0,22.0,1,0,7.25,2.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,2,1,1,0.0,38.0,1,0,71.2833,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,3,1,3,0.0,26.0,0,0,7.925,2.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,4,1,1,0.0,35.0,1,0,53.1,2.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,5,0,3,1.0,35.0,0,0,8.05,2.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
5,6,0,3,1.0,34.409286,0,0,8.4583,1.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
6,7,0,1,1.0,54.0,0,0,51.8625,2.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
7,8,0,3,1.0,2.0,3,1,21.075,2.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
8,9,1,3,0.0,27.0,0,2,11.1333,2.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
9,10,1,2,0.0,14.0,1,0,30.0708,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [98]:
final_data.to_csv('new_data_v2.csv', index=False) 