# Code for estimating survival on Titanic (Kaggle Competition)

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
train = pd.read_csv("~/samurai/kaggle/titanic/data/train.csv")
test = pd.read_csv("~/samurai/kaggle/titanic/data/test.csv")

In [3]:
train

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [4]:
test.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


In [5]:
test = test.drop(columns=("Embarked"))

In [6]:
test = test.drop(columns=("Cabin"))

In [7]:
test = test.drop(columns=("Ticket"))

## Pclass

In [8]:
pclass_table = pd.crosstab(
    train["Survived"]==1,
    train["Pclass"],
    margins=True,
    normalize="columns" 
)

In [9]:
pclass_table

Pclass,1,2,3,All
Survived,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
False,0.37037,0.527174,0.757637,0.616162
True,0.62963,0.472826,0.242363,0.383838


In [10]:
pclass_table[1][1]

0.6296296296296297

In [11]:
# Given the Pclass,
# Pclass(1) = 62.9% Chance of survival
# Pclass(2) = 47.3% Chance of survival
# Pclass(3) = 24.2% Chance of survival

## Sex

In [12]:
sex_table = pd.crosstab(
    train["Survived"]==1,
    train["Sex"],
    margins=True,
    normalize="columns"
)

In [13]:
sex_table

Sex,female,male,All
Survived,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
False,0.257962,0.811092,0.616162
True,0.742038,0.188908,0.383838


In [14]:
# Given Sex,
# Male = 18.9% Chance of survival
# Female = 74.2% Chance of survival

## Age

In [15]:
age_table =pd.crosstab(
    train["Survived"]==1,
    train["Age"],
    normalize="columns"
)

In [16]:
age_table

Age,0.42,0.67,0.75,0.83,0.92,1.00,2.00,3.00,4.00,5.00,...,62.00,63.00,64.00,65.00,66.00,70.00,70.50,71.00,74.00,80.00
Survived,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
False,0.0,0.0,0.0,0.0,0.0,0.285714,0.7,0.166667,0.3,0.0,...,0.5,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0
True,1.0,1.0,1.0,1.0,1.0,0.714286,0.3,0.833333,0.7,1.0,...,0.5,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [17]:
age_table[1][1]

0.7142857142857143

In [18]:
age_table[min(age_table, key=lambda x:abs(x-1))][1] #pd.crosstabのmarginsを消さないとエラーが出る？

0.7142857142857143

In [19]:
# Use age table to calculate chance of survival per age
# use later

## SibSp

In [20]:
sibsp_table = pd.crosstab(
    train["Survived"]==1,
    train["SibSp"],
    margins=True,
    normalize="columns"
)

In [21]:
sibsp_table

SibSp,0,1,2,3,4,5,8,All
Survived,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
False,0.654605,0.464115,0.535714,0.75,0.833333,1.0,1.0,0.616162
True,0.345395,0.535885,0.464286,0.25,0.166667,0.0,0.0,0.383838


In [22]:
sibsp_table[0][1]

0.34539473684210525

## Parch

In [23]:
parch_table = pd.crosstab(
    train["Survived"]==0,
    train["Parch"],
    normalize="columns"
)

In [24]:
parch_table

Parch,0,1,2,3,4,5,6
Survived,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
False,0.343658,0.550847,0.5,0.6,0.0,0.2,0.0
True,0.656342,0.449153,0.5,0.4,1.0,0.8,1.0


## Fare

In [25]:
titanic_fare = train[["Survived","Fare"]]

In [26]:
fare_table = pd.crosstab(
    train["Survived"]==1,
    train["Fare"],
    normalize="columns"
)

In [27]:
fare_table

Fare,0.0000,4.0125,5.0000,6.2375,6.4375,6.4500,6.4958,6.7500,6.8583,6.9500,...,153.4625,164.8667,211.3375,211.5000,221.7792,227.5250,247.5208,262.3750,263.0000,512.3292
Survived,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
False,0.933333,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.333333,0.0,0.0,1.0,1.0,0.25,0.5,0.0,0.5,0.0
True,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.666667,1.0,1.0,0.0,0.0,0.75,0.5,1.0,0.5,1.0


In [28]:
fare_table[min(fare_table, key=lambda x:abs(x-0))][1]

0.06666666666666667

# Time to test

In [29]:
test.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Fare
0,892,3,"Kelly, Mr. James",male,34.5,0,0,7.8292
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,7.0
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,9.6875
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,8.6625
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,12.2875


In [30]:
test = test.drop(columns=("Name"))

In [31]:
test

Unnamed: 0,PassengerId,Pclass,Sex,Age,SibSp,Parch,Fare
0,892,3,male,34.5,0,0,7.8292
1,893,3,female,47.0,1,0,7.0000
2,894,2,male,62.0,0,0,9.6875
3,895,3,male,27.0,0,0,8.6625
4,896,3,female,22.0,1,1,12.2875
...,...,...,...,...,...,...,...
413,1305,3,male,,0,0,8.0500
414,1306,1,female,39.0,0,0,108.9000
415,1307,3,male,38.5,0,0,7.2500
416,1308,3,male,,0,0,8.0500


In [32]:
test["Age"].describe()

count    332.000000
mean      30.272590
std       14.181209
min        0.170000
25%       21.000000
50%       27.000000
75%       39.000000
max       76.000000
Name: Age, dtype: float64

In [33]:
test.isna().sum()

PassengerId     0
Pclass          0
Sex             0
Age            86
SibSp           0
Parch           0
Fare            1
dtype: int64

In [34]:
test["Age"].fillna(30.27,inplace=True)#NaNには平均年齢を代用

In [35]:
test.isna().sum()

PassengerId    0
Pclass         0
Sex            0
Age            0
SibSp          0
Parch          0
Fare           1
dtype: int64

In [36]:
test["Fare"].describe()

count    417.000000
mean      35.627188
std       55.907576
min        0.000000
25%        7.895800
50%       14.454200
75%       31.500000
max      512.329200
Name: Fare, dtype: float64

In [37]:
test["Fare"].fillna(35.6, inplace=True)

In [38]:
test.isna().sum()

PassengerId    0
Pclass         0
Sex            0
Age            0
SibSp          0
Parch          0
Fare           0
dtype: int64

## Calculating data based off test.csv

In [39]:
# 考えているコードの書き方のまとめ
# すごく雑で効率がすごく悪い気がしますが、取り敢えず試してみます
# Get range of test list = 417
# Use the crosstab tables made earlier to get the survival rate and add it to a 
# Add all of the values and divide by the number of factors

In [40]:
test_pclass_calculated = []
for x in range(418):
    test_pclass_calculated.append(pclass_table[(test["Pclass"][x])][1])

In [41]:
test_sex_calculated = []
for x in range(418):
    test_sex_calculated.append(sex_table[(test["Sex"][x])][1])

In [42]:
test_age_calculated = []
for x in range(418):
    b = test["Age"][x]
    test_age_calculated.append(
        age_table[
            min(age_table, key=lambda x:abs(x-b))][1]
    )

In [43]:
test_sibsp_calculated = []
for x in range(418):
    test_sibsp_calculated.append(sibsp_table[(test["SibSp"][x])][1])

In [44]:
test_parch_calculated = []
for x in range(418):
    c = test["Parch"][x]
    test_parch_calculated.append(
        parch_table[
            min(parch_table, key=lambda x:abs(x-c))][1]
    )

In [45]:
test_fare_calculated = []
for x in range(418):
    d = test["Fare"][x]
    test_fare_calculated.append(
        fare_table[
            min(fare_table, key=lambda x:abs(x-d))][1]
    )

In [46]:
#全部入ったか確かめる

In [47]:
test_fare_calculated

[0.5,
 0.5,
 0.5,
 0.07692307692307693,
 1.0,
 0.0,
 0.0,
 1.0,
 0.26666666666666666,
 0.125,
 0.02631578947368421,
 0.4838709677419355,
 0.5,
 0.4838709677419355,
 0.0,
 0.2,
 0.6666666666666666,
 0.25,
 0.4444444444444444,
 0.25,
 1.0,
 0.0,
 0.75,
 0.0,
 1.0,
 0.2857142857142857,
 0.0,
 0.25,
 0.8,
 0.0,
 0.4838709677419355,
 0.75,
 0.5,
 0.0,
 1.0,
 0.26666666666666666,
 0.11627906976744186,
 0.07692307692307693,
 0.2222222222222222,
 0.7142857142857143,
 1.0,
 0.5333333333333333,
 0.23076923076923078,
 0.38095238095238093,
 1.0,
 0.4444444444444444,
 0.3333333333333333,
 0.35294117647058826,
 1.0,
 1.0,
 1.0,
 0.0,
 1.0,
 0.5,
 0.0,
 0.0,
 0.02631578947368421,
 0.25,
 0.2222222222222222,
 1.0,
 0.02631578947368421,
 0.25,
 0.35294117647058826,
 0.0,
 1.0,
 0.3333333333333333,
 1.0,
 0.0,
 0.0,
 0.5,
 0.35294117647058826,
 0.02631578947368421,
 0.4444444444444444,
 0.2,
 0.0,
 0.0,
 0.11627906976744186,
 0.0,
 0.38095238095238093,
 0.35294117647058826,
 0.6,
 0.0,
 0.48387096774193

In [48]:
test_age_calculated

[0.0,
 0.1111111111111111,
 0.5,
 0.6111111111111112,
 0.4074074074074074,
 0.5,
 0.4,
 0.3333333333333333,
 0.34615384615384615,
 0.20833333333333334,
 0.0,
 0.0,
 0.3333333333333333,
 1.0,
 0.1111111111111111,
 0.5,
 0.6111111111111112,
 0.20833333333333334,
 0.6111111111111112,
 0.4166666666666667,
 0.5,
 0.25,
 0.0,
 0.20833333333333334,
 0.6666666666666666,
 0.5,
 0.4074074074074074,
 0.4074074074074074,
 0.3333333333333333,
 0.0,
 0.5,
 0.5,
 0.4,
 0.0,
 0.4,
 0.34615384615384615,
 0.0,
 0.20833333333333334,
 0.2608695652173913,
 0.0,
 0.35714285714285715,
 0.0,
 0.3333333333333333,
 0.4,
 0.4166666666666667,
 0.2608695652173913,
 0.4166666666666667,
 0.0,
 0.5,
 0.5,
 0.5,
 0.6111111111111112,
 0.2,
 0.28,
 0.0,
 0.0,
 0.6111111111111112,
 0.2608695652173913,
 0.0,
 0.5,
 0.46153846153846156,
 0.5,
 0.34615384615384615,
 0.4074074074074074,
 1.0,
 0.0,
 0.34615384615384615,
 0.1111111111111111,
 0.47058823529411764,
 0.5,
 0.5,
 0.20833333333333334,
 0.4,
 0.0,
 0.61111111111111

In [49]:
test_parch_calculated

[0.6563421828908554,
 0.6563421828908554,
 0.6563421828908554,
 0.6563421828908554,
 0.4491525423728814,
 0.6563421828908554,
 0.6563421828908554,
 0.4491525423728814,
 0.6563421828908554,
 0.6563421828908554,
 0.6563421828908554,
 0.6563421828908554,
 0.6563421828908554,
 0.6563421828908554,
 0.6563421828908554,
 0.6563421828908554,
 0.6563421828908554,
 0.6563421828908554,
 0.6563421828908554,
 0.6563421828908554,
 0.6563421828908554,
 0.4491525423728814,
 0.6563421828908554,
 0.4491525423728814,
 0.4,
 0.6563421828908554,
 0.4491525423728814,
 0.6563421828908554,
 0.6563421828908554,
 0.6563421828908554,
 0.6563421828908554,
 0.6563421828908554,
 0.5,
 0.5,
 0.6563421828908554,
 0.6563421828908554,
 0.6563421828908554,
 0.6563421828908554,
 0.6563421828908554,
 0.6563421828908554,
 0.4491525423728814,
 0.6563421828908554,
 0.6563421828908554,
 0.6563421828908554,
 0.6563421828908554,
 0.6563421828908554,
 0.6563421828908554,
 0.6563421828908554,
 0.6563421828908554,
 0.5,
 0.6563421

In [50]:
test_pclass_calculated

[0.24236252545824846,
 0.24236252545824846,
 0.47282608695652173,
 0.24236252545824846,
 0.24236252545824846,
 0.24236252545824846,
 0.24236252545824846,
 0.47282608695652173,
 0.24236252545824846,
 0.24236252545824846,
 0.24236252545824846,
 0.6296296296296297,
 0.6296296296296297,
 0.47282608695652173,
 0.6296296296296297,
 0.47282608695652173,
 0.47282608695652173,
 0.24236252545824846,
 0.24236252545824846,
 0.24236252545824846,
 0.6296296296296297,
 0.24236252545824846,
 0.6296296296296297,
 0.6296296296296297,
 0.6296296296296297,
 0.24236252545824846,
 0.6296296296296297,
 0.24236252545824846,
 0.6296296296296297,
 0.24236252545824846,
 0.47282608695652173,
 0.47282608695652173,
 0.24236252545824846,
 0.24236252545824846,
 0.6296296296296297,
 0.24236252545824846,
 0.24236252545824846,
 0.24236252545824846,
 0.24236252545824846,
 0.24236252545824846,
 0.24236252545824846,
 0.6296296296296297,
 0.24236252545824846,
 0.47282608695652173,
 0.6296296296296297,
 0.24236252545824846,


In [51]:
test_age_calculated

[0.0,
 0.1111111111111111,
 0.5,
 0.6111111111111112,
 0.4074074074074074,
 0.5,
 0.4,
 0.3333333333333333,
 0.34615384615384615,
 0.20833333333333334,
 0.0,
 0.0,
 0.3333333333333333,
 1.0,
 0.1111111111111111,
 0.5,
 0.6111111111111112,
 0.20833333333333334,
 0.6111111111111112,
 0.4166666666666667,
 0.5,
 0.25,
 0.0,
 0.20833333333333334,
 0.6666666666666666,
 0.5,
 0.4074074074074074,
 0.4074074074074074,
 0.3333333333333333,
 0.0,
 0.5,
 0.5,
 0.4,
 0.0,
 0.4,
 0.34615384615384615,
 0.0,
 0.20833333333333334,
 0.2608695652173913,
 0.0,
 0.35714285714285715,
 0.0,
 0.3333333333333333,
 0.4,
 0.4166666666666667,
 0.2608695652173913,
 0.4166666666666667,
 0.0,
 0.5,
 0.5,
 0.5,
 0.6111111111111112,
 0.2,
 0.28,
 0.0,
 0.0,
 0.6111111111111112,
 0.2608695652173913,
 0.0,
 0.5,
 0.46153846153846156,
 0.5,
 0.34615384615384615,
 0.4074074074074074,
 1.0,
 0.0,
 0.34615384615384615,
 0.1111111111111111,
 0.47058823529411764,
 0.5,
 0.5,
 0.20833333333333334,
 0.4,
 0.0,
 0.61111111111111

In [52]:
test_sibsp_calculated

[0.34539473684210525,
 0.5358851674641149,
 0.34539473684210525,
 0.34539473684210525,
 0.5358851674641149,
 0.34539473684210525,
 0.34539473684210525,
 0.5358851674641149,
 0.34539473684210525,
 0.4642857142857143,
 0.34539473684210525,
 0.34539473684210525,
 0.5358851674641149,
 0.5358851674641149,
 0.5358851674641149,
 0.5358851674641149,
 0.34539473684210525,
 0.34539473684210525,
 0.5358851674641149,
 0.34539473684210525,
 0.5358851674641149,
 0.34539473684210525,
 0.34539473684210525,
 0.34539473684210525,
 0.5358851674641149,
 0.5358851674641149,
 0.34539473684210525,
 0.34539473684210525,
 0.34539473684210525,
 0.4642857142857143,
 0.5358851674641149,
 0.4642857142857143,
 0.5358851674641149,
 0.5358851674641149,
 0.5358851674641149,
 0.34539473684210525,
 0.34539473684210525,
 0.34539473684210525,
 0.34539473684210525,
 0.34539473684210525,
 0.34539473684210525,
 0.34539473684210525,
 0.34539473684210525,
 0.34539473684210525,
 0.5358851674641149,
 0.34539473684210525,
 0.3453

In [53]:
survival_sum1 = np.add(
    test_pclass_calculated, 
    test_sex_calculated
)
#一気に全てのリストを足す方法がわからないから一つづつ

In [54]:
survival_sum2 = np.add(
    survival_sum1, 
    test_age_calculated)

In [55]:
survival_sum3 = np.add(
    survival_sum2, 
    test_sibsp_calculated)

In [56]:
survival_sum4 = np.add(
    survival_sum3, 
    test_parch_calculated)

In [57]:
survival_total = np.add(
    survival_sum4, 
    test_fare_calculated
)

In [58]:
survival_total

array([1.93300759, 2.7877392 , 2.66347115, 2.12104178, 3.37684586,
       1.93300759, 2.38613766, 2.98010528, 2.59895817, 1.8852319 ,
       1.45932338, 2.30414566, 3.39722853, 3.33783255, 2.67500631,
       3.10709165, 2.94124893, 1.89134092, 3.23218365, 2.65280433,
       3.51076513, 1.47581795, 3.12340477, 1.82141839, 3.97421968,
       2.40921231, 2.57362253, 2.090415  , 2.95360803, 1.55189857,
       2.83783255, 3.03236213, 2.92028591, 2.02028591, 3.41076513,
       2.0458281 , 2.10241673, 2.27139407, 1.91609938, 2.14729331,
       2.58296081, 2.35360803, 1.99711015, 2.9975536 , 3.98056186,
       2.1383216 , 2.57027469, 1.78594877, 3.87340477, 3.32979548,
       3.51076513, 2.27458226, 3.32830256, 2.90166785, 1.66347115,
       1.04708988, 2.07043449, 1.94387716, 1.84572024, 3.87340477,
       1.92086184, 2.41347115, 2.13210261, 2.39354507, 3.78282349,
       2.54993456, 3.33229151, 1.93138581, 2.29086293, 3.90755301,
       2.83907884, 1.66765671, 2.83058211, 2.02027469, 2.98451

In [59]:
survival_final_preprocessed = survival_total/5

In [60]:
len(survival_final_preprocessed)

418

In [61]:
survival_final_preprocessed

array([0.38660152, 0.55754784, 0.53269423, 0.42420836, 0.67536917,
       0.38660152, 0.47722753, 0.59602106, 0.51979163, 0.37704638,
       0.29186468, 0.46082913, 0.67944571, 0.66756651, 0.53500126,
       0.62141833, 0.58824979, 0.37826818, 0.64643673, 0.53056087,
       0.70215303, 0.29516359, 0.62468095, 0.36428368, 0.79484394,
       0.48184246, 0.51472451, 0.418083  , 0.59072161, 0.31037971,
       0.56756651, 0.60647243, 0.58405718, 0.40405718, 0.68215303,
       0.40916562, 0.42048335, 0.45427881, 0.38321988, 0.42945866,
       0.51659216, 0.47072161, 0.39942203, 0.59951072, 0.79611237,
       0.42766432, 0.51405494, 0.35718975, 0.77468095, 0.6659591 ,
       0.70215303, 0.45491645, 0.66566051, 0.58033357, 0.33269423,
       0.20941798, 0.4140869 , 0.38877543, 0.36914405, 0.77468095,
       0.38417237, 0.48269423, 0.42642052, 0.47870901, 0.7565647 ,
       0.50998691, 0.6664583 , 0.38627716, 0.45817259, 0.7815106 ,
       0.56781577, 0.33353134, 0.56611642, 0.40405494, 0.59690

In [62]:
# ここで問題が生存する確率がどこを上まったら生き残るの仮定するか
# 取り敢えず x > 0.5なら生存するとする

In [63]:
survival_final = []
for x in survival_final_preprocessed:
    if x>0.5:
        survival_final.append(1)
    else:
        survival_final.append(0)

In [64]:
survival_final

[0,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,


In [65]:
test.head()

Unnamed: 0,PassengerId,Pclass,Sex,Age,SibSp,Parch,Fare
0,892,3,male,34.5,0,0,7.8292
1,893,3,female,47.0,1,0,7.0
2,894,2,male,62.0,0,0,9.6875
3,895,3,male,27.0,0,0,8.6625
4,896,3,female,22.0,1,1,12.2875


In [66]:
test["Survived"] = survival_final

In [67]:
test = test.drop(columns=("Pclass"))

In [68]:
test = test.drop(columns=("Fare"))
test = test.drop(columns=("Parch"))
test = test.drop(columns=("SibSp"))
test = test.drop(columns=("Age"))
test = test.drop(columns=("Sex"))
test

Unnamed: 0,PassengerId,Survived
0,892,0
1,893,1
2,894,1
3,895,0
4,896,1
...,...,...
413,1305,0
414,1306,1
415,1307,0
416,1308,0


In [69]:
test

Unnamed: 0,PassengerId,Survived
0,892,0
1,893,1
2,894,1
3,895,0
4,896,1
...,...,...
413,1305,0
414,1306,1
415,1307,0
416,1308,0


In [70]:
test.to_csv("submit.csv", index=False)