In [1]:
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/kaggle.json
!chmod 600 ~/.kaggle/kaggle.json

In [2]:
!kaggle competitions download -c titanic

Downloading titanic.zip to /content
  0% 0.00/34.1k [00:00<?, ?B/s]
100% 34.1k/34.1k [00:00<00:00, 21.2MB/s]


In [3]:
!unzip titanic.zip

Archive:  titanic.zip
  inflating: gender_submission.csv   
  inflating: test.csv                
  inflating: train.csv               


In [126]:
!pip install -q pycaret

In [114]:
!pip install -q catboost

[K     |████████████████████████████████| 76.6 MB 1.4 MB/s 
[?25h

# Here We Go!

In [152]:
#imports
import pandas as pd
import numpy as np

from sklearn.neighbors import KNeighborsRegressor

import scipy.stats

from sklearn.preprocessing import StandardScaler

from pycaret.regression import setup, compare_models

from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier

from sklearn.model_selection import KFold, cross_val_score

## Load Data

In [5]:
df_train = pd.read_csv('train.csv')
df_test = pd.read_csv('test.csv')

In [6]:
df_train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [7]:
df_train.shape

(891, 12)

In [9]:
df_test.shape

(418, 11)

## Combine train and test data

In [8]:
target = df_train['Survived']
testIds = df_test['PassengerId']

train = df_train.drop(['PassengerId', 'Survived'], axis = 1)
test = df_test.drop('PassengerId', axis=1)

data = pd.concat([train, test], axis=0).reset_index(drop=True)
data

Unnamed: 0,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...
1304,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.0500,,S
1305,1,"Oliva y Ocana, Dona. Fermina",female,39.0,0,0,PC 17758,108.9000,C105,C
1306,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.2500,,S
1307,3,"Ware, Mr. Frederick",male,,0,0,359309,8.0500,,S


## Primer acercamiento y entendimiento de los datos

In [10]:
data.select_dtypes(np.number)

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare
0,3,22.0,1,0,7.2500
1,1,38.0,1,0,71.2833
2,3,26.0,0,0,7.9250
3,1,35.0,1,0,53.1000
4,3,35.0,0,0,8.0500
...,...,...,...,...,...
1304,3,,0,0,8.0500
1305,1,39.0,0,0,108.9000
1306,3,38.5,0,0,7.2500
1307,3,,0,0,8.0500


In [11]:
data.select_dtypes(np.object_)

Unnamed: 0,Name,Sex,Ticket,Cabin,Embarked
0,"Braund, Mr. Owen Harris",male,A/5 21171,,S
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,PC 17599,C85,C
2,"Heikkinen, Miss. Laina",female,STON/O2. 3101282,,S
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,113803,C123,S
4,"Allen, Mr. William Henry",male,373450,,S
...,...,...,...,...,...
1304,"Spector, Mr. Woolf",male,A.5. 3236,,S
1305,"Oliva y Ocana, Dona. Fermina",female,PC 17758,C105,C
1306,"Saether, Mr. Simon Sivertsen",male,SOTON/O.Q. 3101262,,S
1307,"Ware, Mr. Frederick",male,359309,,S


In [13]:
pd.unique(data['Cabin'])

array([nan, 'C85', 'C123', 'E46', 'G6', 'C103', 'D56', 'A6',
       'C23 C25 C27', 'B78', 'D33', 'B30', 'C52', 'B28', 'C83', 'F33',
       'F G73', 'E31', 'A5', 'D10 D12', 'D26', 'C110', 'B58 B60', 'E101',
       'F E69', 'D47', 'B86', 'F2', 'C2', 'E33', 'B19', 'A7', 'C49', 'F4',
       'A32', 'B4', 'B80', 'A31', 'D36', 'D15', 'C93', 'C78', 'D35',
       'C87', 'B77', 'E67', 'B94', 'C125', 'C99', 'C118', 'D7', 'A19',
       'B49', 'D', 'C22 C26', 'C106', 'C65', 'E36', 'C54',
       'B57 B59 B63 B66', 'C7', 'E34', 'C32', 'B18', 'C124', 'C91', 'E40',
       'T', 'C128', 'D37', 'B35', 'E50', 'C82', 'B96 B98', 'E10', 'E44',
       'A34', 'C104', 'C111', 'C92', 'E38', 'D21', 'E12', 'E63', 'A14',
       'B37', 'C30', 'D20', 'B79', 'E25', 'D46', 'B73', 'C95', 'B38',
       'B39', 'B22', 'C86', 'C70', 'A16', 'C101', 'C68', 'A10', 'E68',
       'B41', 'A20', 'D19', 'D50', 'D9', 'A23', 'B50', 'A26', 'D48',
       'E58', 'C126', 'B71', 'B51 B53 B55', 'D49', 'B5', 'B20', 'F G63',
       'C62 C64',

In [14]:
pd.unique(data['Embarked'])

array(['S', 'C', 'Q', nan], dtype=object)

In [103]:
pd.unique(data['Ticket'])

array(['A/5 21171', 'PC 17599', 'STON/O2. 3101282', '113803', '373450',
       '330877', '17463', '349909', '347742', '237736', 'PP 9549',
       '113783', 'A/5. 2151', '347082', '350406', '248706', '382652',
       '244373', '345763', '2649', '239865', '248698', '330923', '113788',
       '347077', '2631', '19950', '330959', '349216', 'PC 17601',
       'PC 17569', '335677', 'C.A. 24579', 'PC 17604', '113789', '2677',
       'A./5. 2152', '345764', '2651', '7546', '11668', '349253',
       'SC/Paris 2123', '330958', 'S.C./A.4. 23567', '370371', '14311',
       '2662', '349237', '3101295', 'A/4. 39886', 'PC 17572', '2926',
       '113509', '19947', 'C.A. 31026', '2697', 'C.A. 34651', 'CA 2144',
       '2669', '113572', '36973', '347088', 'PC 17605', '2661',
       'C.A. 29395', 'S.P. 3464', '3101281', '315151', 'C.A. 33111',
       'S.O.C. 14879', '2680', '1601', '348123', '349208', '374746',
       '248738', '364516', '345767', '345779', '330932', '113059',
       'SO/C 14885', '31012

In [16]:
data = data.drop('Name', axis=1)

In [104]:
data = data.drop('Ticket', axis=1)

## Taking Care of Missing Data

### Categorical Features

In [17]:
data.select_dtypes(np.object_).loc[:, data.isna().sum() > 0].columns

Index(['Cabin', 'Embarked'], dtype='object')

In [18]:
for column in ['Cabin', 'Embarked']:
  data[column].fillna(data[column].mode()[0], inplace = True)

In [19]:
data.select_dtypes(np.object_).loc[:, data.isna().sum() > 0].columns

Index([], dtype='object')

### Numeric Features

In [20]:
data.select_dtypes(np.number).loc[:, data.isna().sum() > 0].columns

Index(['Age', 'Fare'], dtype='object')

In [22]:
def knn_impute (df, column_na):
  df = df.copy()

  numeric_df = df.select_dtypes(np.number)
  non_na_columns = numeric_df.loc[: , numeric_df.isna().sum() == 0].columns

  y_train = numeric_df.loc[numeric_df[column_na].isna() == False, column_na]
  X_train = numeric_df.loc[numeric_df[column_na].isna() == False, non_na_columns]
  X_test = numeric_df.loc[numeric_df[column_na].isna() == True, non_na_columns]

  knn = KNeighborsRegressor()
  knn.fit(X_train, y_train)

  y_pred = knn.predict(X_test)

  df.loc[df[column_na].isna() == True, column_na] = y_pred

  return df

In [23]:
for column in ['Age', 'Fare']:
  data = knn_impute(data, column)

In [24]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1309 entries, 0 to 1308
Data columns (total 9 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Pclass    1309 non-null   int64  
 1   Sex       1309 non-null   object 
 2   Age       1309 non-null   float64
 3   SibSp     1309 non-null   int64  
 4   Parch     1309 non-null   int64  
 5   Ticket    1309 non-null   object 
 6   Fare      1309 non-null   float64
 7   Cabin     1309 non-null   object 
 8   Embarked  1309 non-null   object 
dtypes: float64(2), int64(3), object(4)
memory usage: 92.2+ KB


In [36]:
data1 = data.copy()

## Feature Engineer

In [40]:
data1[['1stCabin', '2ndCabin', '3tdCabin', '4thCabin']] = data1['Cabin'].str.split(' ', 4, expand=True)

In [43]:
data1.drop('Cabin', axis=1, inplace=True)

In [89]:
data2 = data1.copy()

In [90]:
data2[['space', '1stCabin-Letter', '1stCabin-Number']] = data2['1stCabin'].str.split('', 2, expand=True)
data2[['space', '2ndCabin-Letter', '2ndCabin-Number']] = data2['2ndCabin'].str.split('', 2, expand=True)
data2[['space', '3tdCabin-Letter', '3tdCabin-Number']] = data2['3tdCabin'].str.split('', 2, expand=True)
data2[['space', '4thCabin-Letter', '4thCabin-Number']] = data2['4thCabin'].str.split('', 2, expand=True)

In [91]:
data2.drop(['space', '1stCabin', '2ndCabin', '3tdCabin', '4thCabin'], axis=1, inplace=True)

In [92]:
data2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1309 entries, 0 to 1308
Data columns (total 16 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Pclass           1309 non-null   int64  
 1   Sex              1309 non-null   object 
 2   Age              1309 non-null   float64
 3   SibSp            1309 non-null   int64  
 4   Parch            1309 non-null   int64  
 5   Ticket           1309 non-null   object 
 6   Fare             1309 non-null   float64
 7   Embarked         1309 non-null   object 
 8   1stCabin-Letter  1309 non-null   object 
 9   1stCabin-Number  1309 non-null   object 
 10  2ndCabin-Letter  1055 non-null   object 
 11  2ndCabin-Number  1055 non-null   object 
 12  3tdCabin-Letter  1029 non-null   object 
 13  3tdCabin-Number  1029 non-null   object 
 14  4thCabin-Letter  5 non-null      object 
 15  4thCabin-Number  5 non-null      object 
dtypes: float64(2), int64(3), object(11)
memory usage: 163.8+ KB


In [93]:
data2.select_dtypes(np.object_).loc[:, data2.isna().sum() > 0].columns

Index(['2ndCabin-Letter', '2ndCabin-Number', '3tdCabin-Letter',
       '3tdCabin-Number', '4thCabin-Letter', '4thCabin-Number'],
      dtype='object')

### Taking care of the new missing data

In [94]:
for column in ['2ndCabin-Letter', '2ndCabin-Number', '3tdCabin-Letter',
       '3tdCabin-Number', '4thCabin-Letter', '4thCabin-Number']:
  data2[column].fillna("None", inplace = True)

In [95]:
data2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1309 entries, 0 to 1308
Data columns (total 16 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Pclass           1309 non-null   int64  
 1   Sex              1309 non-null   object 
 2   Age              1309 non-null   float64
 3   SibSp            1309 non-null   int64  
 4   Parch            1309 non-null   int64  
 5   Ticket           1309 non-null   object 
 6   Fare             1309 non-null   float64
 7   Embarked         1309 non-null   object 
 8   1stCabin-Letter  1309 non-null   object 
 9   1stCabin-Number  1309 non-null   object 
 10  2ndCabin-Letter  1309 non-null   object 
 11  2ndCabin-Number  1309 non-null   object 
 12  3tdCabin-Letter  1309 non-null   object 
 13  3tdCabin-Number  1309 non-null   object 
 14  4thCabin-Letter  1309 non-null   object 
 15  4thCabin-Number  1309 non-null   object 
dtypes: float64(2), int64(3), object(11)
memory usage: 163.8+ KB


## Feature Transformations

### Transform numeric features with skew normal distribution

In [96]:
skew_df = pd.DataFrame(data2.select_dtypes(np.number).columns, columns=['Feature'])
skew_df['Skew'] = skew_df['Feature'].apply(lambda feature: scipy.stats.skew(data2[feature]))
skew_df['Absolute Skew'] = skew_df['Skew'].apply(abs)
skew_df['Skewed'] = skew_df['Absolute Skew'].apply(lambda x: True if x>=0.5 else False)
skew_df

Unnamed: 0,Feature,Skew,Absolute Skew,Skewed
0,Pclass,-0.597961,0.597961,True
1,Age,0.513989,0.513989,True
2,SibSp,3.839814,3.839814,True
3,Parch,3.664872,3.664872,True
4,Fare,4.36459,4.36459,True


In [97]:
data2[skew_df.query("Skewed == True")['Feature'].values].describe()

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare
count,1309.0,1309.0,1309.0,1309.0,1309.0
mean,2.294882,29.286636,0.498854,0.385027,33.283922
std,0.837836,13.627259,1.041658,0.86556,51.740569
min,1.0,0.17,0.0,0.0,0.0
25%,2.0,22.0,0.0,0.0,7.8958
50%,3.0,26.0,0.0,0.0,14.4542
75%,3.0,38.0,1.0,0.0,31.275
max,3.0,80.0,8.0,9.0,512.3292


In [98]:
for column in skew_df.query("Skewed == True")['Feature'].values:
  data2[column] = np.log1p(data2[column])

In [102]:
data3 = data2.copy()

In [105]:
data3 = data3.drop('Ticket', axis=1)

## Enconding Categoricals

In [106]:
data3 = pd.get_dummies(data3)
data3

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare,Sex_female,Sex_male,Embarked_C,Embarked_Q,Embarked_S,...,3tdCabin-Letter_None,3tdCabin-Number_27,3tdCabin-Number_55,3tdCabin-Number_56,3tdCabin-Number_63,3tdCabin-Number_None,4thCabin-Letter_B,4thCabin-Letter_None,4thCabin-Number_66,4thCabin-Number_None
0,1.386294,3.135494,0.693147,0.000000,2.110213,0,1,0,0,1,...,0,1,0,0,0,0,0,1,0,1
1,0.693147,3.663562,0.693147,0.000000,4.280593,1,0,1,0,0,...,1,0,0,0,0,1,0,1,0,1
2,1.386294,3.295837,0.000000,0.000000,2.188856,1,0,0,0,1,...,0,1,0,0,0,0,0,1,0,1
3,0.693147,3.583519,0.693147,0.000000,3.990834,1,0,0,0,1,...,1,0,0,0,0,1,0,1,0,1
4,1.386294,3.583519,0.000000,0.000000,2.202765,0,1,0,0,1,...,0,1,0,0,0,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1304,1.386294,3.161247,0.000000,0.000000,2.202765,0,1,0,0,1,...,0,1,0,0,0,0,0,1,0,1
1305,0.693147,3.688879,0.000000,0.000000,4.699571,1,0,1,0,0,...,1,0,0,0,0,1,0,1,0,1
1306,1.386294,3.676301,0.000000,0.000000,2.110213,0,1,0,0,1,...,0,1,0,0,0,0,0,1,0,1
1307,1.386294,3.161247,0.000000,0.000000,2.202765,0,1,0,0,1,...,0,1,0,0,0,0,0,1,0,1


In [107]:
data4 = data3.copy()

## Scaling

In [109]:
scaler = StandardScaler()
scaler.fit(data4)
data4 = pd.DataFrame(scaler.transform(data4), index=data4.index, columns=data4.columns)

In [110]:
data4

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare,Sex_female,Sex_male,Embarked_C,Embarked_Q,Embarked_S,...,3tdCabin-Letter_None,3tdCabin-Number_27,3tdCabin-Number_55,3tdCabin-Number_56,3tdCabin-Number_63,3tdCabin-Number_None,4thCabin-Letter_B,4thCabin-Letter_None,4thCabin-Number_66,4thCabin-Number_None
0,0.808483,-0.220274,0.932011,-0.518791,-0.897977,-0.743497,0.743497,-0.50977,-0.32204,0.655011,...,-0.521641,0.532291,-0.047928,-0.02765,-0.061922,-0.521641,-0.061922,0.061922,-0.061922,0.061922
1,-1.607929,0.633646,0.932011,-0.518791,1.344317,1.344995,-1.344995,1.96167,-0.32204,-1.526692,...,1.917029,-1.878673,-0.047928,-0.02765,-0.061922,1.917029,-0.061922,0.061922,-0.061922,0.061922
2,0.808483,0.039011,-0.615607,-0.518791,-0.816728,1.344995,-1.344995,-0.50977,-0.32204,0.655011,...,-0.521641,0.532291,-0.047928,-0.02765,-0.061922,-0.521641,-0.061922,0.061922,-0.061922,0.061922
3,-1.607929,0.504212,0.932011,-0.518791,1.044957,1.344995,-1.344995,-0.50977,-0.32204,0.655011,...,1.917029,-1.878673,-0.047928,-0.02765,-0.061922,1.917029,-0.061922,0.061922,-0.061922,0.061922
4,0.808483,0.504212,-0.615607,-0.518791,-0.802359,-0.743497,0.743497,-0.50977,-0.32204,0.655011,...,-0.521641,0.532291,-0.047928,-0.02765,-0.061922,-0.521641,-0.061922,0.061922,-0.061922,0.061922
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1304,0.808483,-0.178631,-0.615607,-0.518791,-0.802359,-0.743497,0.743497,-0.50977,-0.32204,0.655011,...,-0.521641,0.532291,-0.047928,-0.02765,-0.061922,-0.521641,-0.061922,0.061922,-0.061922,0.061922
1305,-1.607929,0.674586,-0.615607,-0.518791,1.777177,1.344995,-1.344995,1.96167,-0.32204,-1.526692,...,1.917029,-1.878673,-0.047928,-0.02765,-0.061922,1.917029,-0.061922,0.061922,-0.061922,0.061922
1306,0.808483,0.654246,-0.615607,-0.518791,-0.897977,-0.743497,0.743497,-0.50977,-0.32204,0.655011,...,-0.521641,0.532291,-0.047928,-0.02765,-0.061922,-0.521641,-0.061922,0.061922,-0.061922,0.061922
1307,0.808483,-0.178631,-0.615607,-0.518791,-0.802359,-0.743497,0.743497,-0.50977,-0.32204,0.655011,...,-0.521641,0.532291,-0.047928,-0.02765,-0.061922,-0.521641,-0.061922,0.061922,-0.061922,0.061922


## Split train and test data

In [111]:
train_final = data4.loc[:train.index.max(), :].copy()
test_final = data4.loc[train.index.max()+1:, :].reset_index(drop=True).copy()

In [112]:
test_final

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare,Sex_female,Sex_male,Embarked_C,Embarked_Q,Embarked_S,...,3tdCabin-Letter_None,3tdCabin-Number_27,3tdCabin-Number_55,3tdCabin-Number_56,3tdCabin-Number_63,3tdCabin-Number_None,4thCabin-Letter_B,4thCabin-Letter_None,4thCabin-Number_66,4thCabin-Number_None
0,0.808483,0.481595,-0.615607,-0.518791,-0.827878,-0.743497,0.743497,-0.50977,3.105202,-1.526692,...,-0.521641,0.532291,-0.047928,-0.02765,-0.061922,-0.521641,-0.061922,0.061922,-0.061922,0.061922
1,0.808483,0.969412,0.932011,-0.518791,-0.929768,1.344995,-1.344995,-0.50977,-0.322040,0.655011,...,-0.521641,0.532291,-0.047928,-0.02765,-0.061922,-0.521641,-0.061922,0.061922,-0.061922,0.061922
2,-0.194418,1.409147,-0.615607,-0.518791,-0.630538,-0.743497,0.743497,-0.50977,3.105202,-1.526692,...,-0.521641,0.532291,-0.047928,-0.02765,-0.061922,-0.521641,-0.061922,0.061922,-0.061922,0.061922
3,0.808483,0.097820,-0.615607,-0.518791,-0.734701,-0.743497,0.743497,-0.50977,-0.322040,0.655011,...,-0.521641,0.532291,-0.047928,-0.02765,-0.061922,-0.521641,-0.061922,0.061922,-0.061922,0.061922
4,0.808483,-0.220274,0.932011,1.150572,-0.405574,1.344995,-1.344995,-0.50977,-0.322040,0.655011,...,-0.521641,0.532291,-0.047928,-0.02765,-0.061922,-0.521641,-0.061922,0.061922,-0.061922,0.061922
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
413,0.808483,-0.178631,-0.615607,-0.518791,-0.802359,-0.743497,0.743497,-0.50977,-0.322040,0.655011,...,-0.521641,0.532291,-0.047928,-0.02765,-0.061922,-0.521641,-0.061922,0.061922,-0.061922,0.061922
414,-1.607929,0.674586,-0.615607,-0.518791,1.777177,1.344995,-1.344995,1.96167,-0.322040,-1.526692,...,1.917029,-1.878673,-0.047928,-0.02765,-0.061922,1.917029,-0.061922,0.061922,-0.061922,0.061922
415,0.808483,0.654246,-0.615607,-0.518791,-0.897977,-0.743497,0.743497,-0.50977,-0.322040,0.655011,...,-0.521641,0.532291,-0.047928,-0.02765,-0.061922,-0.521641,-0.061922,0.061922,-0.061922,0.061922
416,0.808483,-0.178631,-0.615607,-0.518791,-0.802359,-0.743497,0.743497,-0.50977,-0.322040,0.655011,...,-0.521641,0.532291,-0.047928,-0.02765,-0.061922,-0.521641,-0.061922,0.061922,-0.061922,0.061922


## Training Model

In [153]:
classifier = GradientBoostingClassifier()
classifier.fit(train_final, target)

GradientBoostingClassifier()

In [154]:
final_predictions = classifier.predict(test_final)

In [148]:
final_predictions

array([0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0,
       1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1,
       1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1,
       1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1,
       0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1,
       1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1,
       0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0,
       1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1,
       1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1,
       0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0,
       0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0,

## Make Submission

In [155]:
submission = pd.concat([testIds, pd.Series(final_predictions, name='Survived')], axis=1)

In [156]:
submission.to_csv('./submission.csv', index=False, header=True)