In [33]:
import numpy as np
from sklearn import preprocessing

import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import pandas as pd


In [34]:
raw_csv = pd.read_csv('train.csv')
raw_csv.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [35]:
raw_csv.drop(['Name','Ticket','Cabin'],axis=1, inplace=True)
raw_csv.head()

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,1,0,3,male,22.0,1,0,7.25,S
1,2,1,1,female,38.0,1,0,71.2833,C
2,3,1,3,female,26.0,0,0,7.925,S
3,4,1,1,female,35.0,1,0,53.1,S
4,5,0,3,male,35.0,0,0,8.05,S


In [36]:
embarked_dummies = pd.get_dummies(raw_csv.Embarked, prefix='Embarked')

sex_dummies = pd.get_dummies(raw_csv.Sex, prefix='Sex')

raw_csv.drop(['Sex','Embarked'],axis=1, inplace=True)

encoded_data = df = pd.concat([raw_csv, sex_dummies, embarked_dummies], axis=1)

encoded_data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare,Sex_female,Sex_male,Embarked_C,Embarked_Q,Embarked_S
0,1,0,3,22.0,1,0,7.25,0,1,0,0,1
1,2,1,1,38.0,1,0,71.2833,1,0,1,0,0
2,3,1,3,26.0,0,0,7.925,1,0,0,0,1
3,4,1,1,35.0,1,0,53.1,1,0,0,0,1
4,5,0,3,35.0,0,0,8.05,0,1,0,0,1


In [37]:
encoded_data = encoded_data[['PassengerId','Pclass','Age','SibSp','Parch','Fare','Sex_female','Embarked_Q','Embarked_C','Survived']]
encoded_data.dropna(subset = ["Age"], inplace=True)
encoded_data.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,PassengerId,Pclass,Age,SibSp,Parch,Fare,Sex_female,Embarked_Q,Embarked_C,Survived
0,1,3,22.0,1,0,7.25,0,0,0,0
1,2,1,38.0,1,0,71.2833,1,0,1,1
2,3,3,26.0,0,0,7.925,1,0,0,1
3,4,1,35.0,1,0,53.1,1,0,0,1
4,5,3,35.0,0,0,8.05,0,0,0,0


In [38]:
encoded_data.to_csv('encoded_data.csv',index=False, header=False)

In [43]:
encoded_data_csv = np.loadtxt('encoded_data.csv', delimiter = ',')

unscaled_inputs_all = encoded_data_csv[:,1:-1]
targets_all = encoded_data_csv[:,-1]

In [45]:
scaled_inputs = preprocessing.scale(unscaled_inputs_all)

In [50]:
samples_count = scaled_inputs.shape[0]

train_samples_count = int(.85*samples_count)
validation_samples = samples_count - train_samples_count

In [51]:
train_inputs = scaled_inputs[:train_samples_count]
train_targets= targets_all[:train_samples_count]

validation_inputs = scaled_inputs[train_samples_count:]
validation_targets = targets_all[train_samples_count:]

In [52]:
np.savez('train_data', inputs = train_inputs, targets = train_targets)
np.savez('validation_data', inputs = validation_inputs, targets = validation_targets)


In [53]:
npz = np.load('train_data.npz')

train_inputs = npz['inputs'].astype(np.float)
train_targets = npz['targets'].astype(np.int)

npz = np.load('validation_data.npz')

validation_inputs = npz['inputs'].astype(np.float)
validation_targets = npz['targets'].astype(np.int)

In [55]:
input_size = 8
output_size = 2
hidden_layer_size = 150

model = tf.keras.Sequential([
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(output_size, activation='softmax')    
])

model.compile(optimizer='adam',loss='sparse_categorical_crossentropy', metrics=['accuracy'])

batch_size = 200
max_epochs = 100
early_stopping = tf.keras.callbacks.EarlyStopping(patience = 5)

model.fit(train_inputs , train_targets,
         batch_size = batch_size,
         epochs = max_epochs,
         callbacks = [early_stopping],
         validation_data=(validation_inputs,validation_targets),
         verbose = 2)

Epoch 1/100
4/4 - 0s - loss: 0.6657 - accuracy: 0.6122 - val_loss: 0.5821 - val_accuracy: 0.7685
Epoch 2/100
4/4 - 0s - loss: 0.5766 - accuracy: 0.7541 - val_loss: 0.5132 - val_accuracy: 0.8426
Epoch 3/100
4/4 - 0s - loss: 0.5287 - accuracy: 0.7673 - val_loss: 0.4822 - val_accuracy: 0.8056
Epoch 4/100
4/4 - 0s - loss: 0.5049 - accuracy: 0.7805 - val_loss: 0.4620 - val_accuracy: 0.7870
Epoch 5/100
4/4 - 0s - loss: 0.4858 - accuracy: 0.7937 - val_loss: 0.4459 - val_accuracy: 0.8056
Epoch 6/100
4/4 - 0s - loss: 0.4708 - accuracy: 0.8069 - val_loss: 0.4346 - val_accuracy: 0.8148
Epoch 7/100
4/4 - 0s - loss: 0.4604 - accuracy: 0.7970 - val_loss: 0.4234 - val_accuracy: 0.8241
Epoch 8/100
4/4 - 0s - loss: 0.4550 - accuracy: 0.7871 - val_loss: 0.4156 - val_accuracy: 0.8333
Epoch 9/100
4/4 - 0s - loss: 0.4518 - accuracy: 0.7888 - val_loss: 0.4064 - val_accuracy: 0.8333
Epoch 10/100
4/4 - 0s - loss: 0.4494 - accuracy: 0.7888 - val_loss: 0.3965 - val_accuracy: 0.8333
Epoch 11/100
4/4 - 0s - loss:

<tensorflow.python.keras.callbacks.History at 0x214d1ea3288>