In [43]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf

In [44]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.losses import BinaryCrossentropy

In [45]:
train = pd.read_csv('../titanic-raw/train.csv')
test = pd.read_csv('../titanic-raw/test.csv')


In [46]:
train.head()


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [47]:
test.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


In [48]:
train.shape , test.shape

((891, 12), (418, 11))

In [49]:
print(list(train.columns))

['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked']


In [50]:
train.dtypes

PassengerId      int64
Survived         int64
Pclass           int64
Name            object
Sex             object
Age            float64
SibSp            int64
Parch            int64
Ticket          object
Fare           float64
Cabin           object
Embarked        object
dtype: object

In [51]:
train.describe()


Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
count,891.0,891.0,891.0,714.0,891.0,891.0,891.0
mean,446.0,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208
std,257.353842,0.486592,0.836071,14.526497,1.102743,0.806057,49.693429
min,1.0,0.0,1.0,0.42,0.0,0.0,0.0
25%,223.5,0.0,2.0,20.125,0.0,0.0,7.9104
50%,446.0,0.0,3.0,28.0,0.0,0.0,14.4542
75%,668.5,1.0,3.0,38.0,1.0,0.0,31.0
max,891.0,1.0,3.0,80.0,8.0,6.0,512.3292


In [52]:
train.isnull().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

In [53]:
test.isnull().sum()

PassengerId      0
Pclass           0
Name             0
Sex              0
Age             86
SibSp            0
Parch            0
Ticket           0
Fare             1
Cabin          327
Embarked         0
dtype: int64

In [54]:
#fixing train_data

train2 = train.copy()
train2['Age'] = train2['Age'].replace(np.NAN, train2['Age'].mean())
train['Age'].mean() , train2['Age'].mean()
train2['Cabin'] = train2['Cabin'].replace(np.NAN, 'XX')
train2['Cabin'].value_counts()
train2['Embarked'] = train2['Embarked'].replace(np.NAN, 'YY')
train2['Embarked'].value_counts()
train2.isnull().sum()


PassengerId    0
Survived       0
Pclass         0
Name           0
Sex            0
Age            0
SibSp          0
Parch          0
Ticket         0
Fare           0
Cabin          0
Embarked       0
dtype: int64

In [55]:
#cleaning test data

test2 = test.copy()
test2['Age'] = test2['Age'].replace(np.NAN, test2['Age'].mean())
test['Age'].mean() , test2['Age'].mean()
test2['Cabin'] = test2['Cabin'].replace(np.NAN, 'XX')
test2['Cabin'].value_counts(dropna = False)
test2['Fare'] = test2['Fare'].replace(np.NAN, test2['Fare'].mean())
test['Fare'].mean() , test2['Fare'].mean()
test2.isnull().sum()

PassengerId    0
Pclass         0
Name           0
Sex            0
Age            0
SibSp          0
Parch          0
Ticket         0
Fare           0
Cabin          0
Embarked       0
dtype: int64

In [56]:
trainLM = train2.copy()
testLM = test2.copy()

In [57]:
trainLM.Sex.value_counts()

m1 = {'male':0, 'female':1}
trainLM['Sex'] = trainLM['Sex'].map(m1)
testLM['Sex'] = testLM['Sex'].map(m1) 

# note : running this snippet 2nd time creates NaN value
print(trainLM['Sex'].value_counts())
testLM['Sex'].value_counts()
trainLM.dtypes
testLM.dtypes
trainLM2 = trainLM.iloc[:, [1,2,4,5,6,7,9,]]
print(trainLM2)

0    577
1    314
Name: Sex, dtype: int64
     Survived  Pclass  Sex        Age  SibSp  Parch     Fare
0           0       3    0  22.000000      1      0   7.2500
1           1       1    1  38.000000      1      0  71.2833
2           1       3    1  26.000000      0      0   7.9250
3           1       1    1  35.000000      1      0  53.1000
4           0       3    0  35.000000      0      0   8.0500
..        ...     ...  ...        ...    ...    ...      ...
886         0       2    0  27.000000      0      0  13.0000
887         1       1    1  19.000000      0      0  30.0000
888         0       3    1  29.699118      1      2  23.4500
889         1       1    0  26.000000      0      0  30.0000
890         0       3    0  32.000000      0      0   7.7500

[891 rows x 7 columns]


In [58]:
trainLM2.isnull().sum()

Survived    0
Pclass      0
Sex         0
Age         0
SibSp       0
Parch       0
Fare        0
dtype: int64

In [59]:
testLM2 = testLM.iloc[:, [1,3,4,5,6,8]]
print(testLM2)

     Pclass  Sex       Age  SibSp  Parch      Fare
0         3    0  34.50000      0      0    7.8292
1         3    1  47.00000      1      0    7.0000
2         2    0  62.00000      0      0    9.6875
3         3    0  27.00000      0      0    8.6625
4         3    1  22.00000      1      1   12.2875
..      ...  ...       ...    ...    ...       ...
413       3    0  30.27259      0      0    8.0500
414       1    1  39.00000      0      0  108.9000
415       3    0  38.50000      0      0    7.2500
416       3    0  30.27259      0      0    8.0500
417       3    0  30.27259      1      1   22.3583

[418 rows x 6 columns]


In [60]:
#K time to prep train data for neural network
X=tf.convert_to_tensor(trainLM2.drop('Survived',axis=1))
Y=tf.convert_to_tensor(trainLM2['Survived'])


In [61]:
type(X)

tensorflow.python.framework.ops.EagerTensor

In [62]:
X

<tf.Tensor: shape=(891, 6), dtype=float64, numpy=
array([[ 3.        ,  0.        , 22.        ,  1.        ,  0.        ,
         7.25      ],
       [ 1.        ,  1.        , 38.        ,  1.        ,  0.        ,
        71.2833    ],
       [ 3.        ,  1.        , 26.        ,  0.        ,  0.        ,
         7.925     ],
       ...,
       [ 3.        ,  1.        , 29.69911765,  1.        ,  2.        ,
        23.45      ],
       [ 1.        ,  0.        , 26.        ,  0.        ,  0.        ,
        30.        ],
       [ 3.        ,  0.        , 32.        ,  0.        ,  0.        ,
         7.75      ]])>

In [63]:
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(X)


In [64]:
#making our model

model=Sequential([
    normalizer,
    tf.keras.layers.Input(shape=(6,)),
    Dense(units=20,activation="sigmoid"),
    Dense(units=15,activation="sigmoid"),
    Dense(units=5,activation="sigmoid"),
    Dense(units=1,activation="sigmoid"),

])


In [65]:
model.compile(loss=BinaryCrossentropy())


In [66]:
model.fit(X,Y,epochs=1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.callbacks.History at 0x7f05e6dee990>