In [1]:
# Imports
import os, sys

# pandas
import pandas as pd

import numpy as np
import matplotlib.pyplot as plt

#keras
from keras.models import Model
from keras.layers.core import Dense, Activation, Dropout
from keras.layers import Input, concatenate, Flatten, BatchNormalization
from keras.layers.embeddings import Embedding
from keras.optimizers import Adam
sys.path.append('../../lotufo')
from my_keras_utilities import (get_available_gpus, load_model_and_history, save_model_and_history, TrainingPlotter)

Using TensorFlow backend.


## Load data

In [2]:
titanic_train = pd.read_csv('../Data/train.csv')
titanic_test = pd.read_csv('../Data/test.csv')

# print(titanic_train.info())
# print('-'*100)
# print(titanic_test.info())
# # titanic_train.head(-1)

## Remove not interesting data

In [3]:
titanic_train = titanic_train.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)
titanic_test = titanic_test.drop(['Name', 'Ticket', 'Cabin'], axis=1)
# print(titanic_train.info())
# print('-'*100)
# print(titanic_test.info())

## Remove rows with missing data

In [4]:
titanic_train = titanic_train[titanic_train.Age.notnull()]
titanic_test = titanic_test[titanic_test.Age.notnull()]

titanic_train = titanic_train[titanic_train.Embarked.notnull()]
titanic_test = titanic_test[titanic_test.Embarked.notnull()]

# print(titanic_train.info())
# print('-'*100)
# print(titanic_test.info())
titanic_train.head(1)

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,male,22.0,1,0,7.25,S


## Normalize categorical data

In [5]:
#train
Pclass_classes_train, Pclass_norm_train = np.unique(titanic_train.Pclass.as_matrix(), return_inverse=True)
Sex_classes_train, Sex_norm_train = np.unique(titanic_train.Sex.as_matrix(), return_inverse=True)
# SibSp_classes_train, SibSp_norm_train = np.unique(titanic_train.SibSp.as_matrix(), return_inverse=True)
# Parch_classes_train, Parch_norm_train = np.unique(titanic_train.Parch.as_matrix(), return_inverse=True)
Embarked_classes_train, Embarked_norm_train = np.unique(titanic_train.Embarked.as_matrix(), return_inverse=True)

#test
Pclass_classes_test, Pclass_norm_test = np.unique(titanic_test.Pclass.as_matrix(), return_inverse=True)
Sex_classes_test, Sex_norm_test = np.unique(titanic_test.Sex.as_matrix(), return_inverse=True)
# SibSp_classes_test, SibSp_norm_test = np.unique(titanic_test.SibSp.as_matrix(), return_inverse=True)
# Parch_classes_test, Parch_norm_test = np.unique(titanic_test.Parch.as_matrix(), return_inverse=True)
Embarked_classes_test, Embarked_norm_test = np.unique(titanic_test.Embarked.as_matrix(), return_inverse=True)

## Normalize non categorical data

In [None]:
#train
Pclass_classes_train, Pclass_norm_train = np.unique(titanic_train.Pclass.as_matrix(), return_inverse=True)


#test
Pclass_classes_test, Pclass_norm_test = np.unique(titanic_test.Pclass.as_matrix(), return_inverse=True)

In [7]:
#Survived Pclass Sex Age SibSp Parch Fare Embarked
def build_keras_model():
    Pclass_in = Input(shape=(1,), dtype='int64', name='Pclass_in')
    x = Embedding(Pclass_classes_train.size, 2, input_length=1, name='Pclass_embedding')(Pclass_in)
    Pclass_emb = Flatten(name='Pclass_flatten')(x)
    
    Sex_in = Input(shape=(1,), name='Sex_in')
    Sex_out = Dense(1, input_dim=1)(Sex_in)
    
    Age_in = Input(shape=(1,), name='Age_in')
    Age_out = Dense(1, input_dim=1)(Age_in)
    
    SibSp_in = Input(shape=(1,), name='SibSp_in')
    SibSp_out = Dense(1, input_dim=1)(SibSp_in)
    
    Parch_in = Input(shape=(1,), name='Parch_in')
    Parch_out = Dense(1, input_dim=1)(Parch_in)
    
    Fare_in = Input(shape=(1,), name='Fare_in')
    Fare_out = Dense(1, input_dim=1)(Fare_in)
    
    Embarked_in = Input(shape=(1,), dtype='int64', name='Embarked')
    x = Embedding(3, 2, input_length=1, name='Embarked_embedding')(Embarked_in)
    Embarked_emb = Flatten(name='Embarked_flatten')(x)
    
    
    xin = concatenate([Pclass_emb, Sex_out, Age_out, SibSp_out, Parch_out, Fare_out, Embarked_emb])
    x = Dense(1000, kernel_initializer='uniform', activation='relu')(xin)
    x = Dense(500, kernel_initializer='uniform', activation='relu')(x)
    x_out = Dense(1, activation='sigmoid')(x)
    
    return Model([Pclass_in, Sex_in, Age_in, SibSp_in, Parch_in, Fare_in, Embarked_in], x_out)

model_ti = build_keras_model()
model_ti.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
Pclass_in (InputLayer)           (None, 1)             0                                            
____________________________________________________________________________________________________
Embarked (InputLayer)            (None, 1)             0                                            
____________________________________________________________________________________________________
Pclass_embedding (Embedding)     (None, 1, 2)          6           Pclass_in[0][0]                  
____________________________________________________________________________________________________
Sex_in (InputLayer)              (None, 1)             0                                            
___________________________________________________________________________________________

In [40]:
num_records = len(titanic_train['Survived'].as_matrix())
train_ratio = 0.7
train_size = int(train_ratio * num_records)


X_train_p = [Pclass_norm_train[:train_size], Sex_norm_train[:train_size], titanic_train['Age'].as_matrix()[:train_size],
             titanic_train['SibSp'].as_matrix()[:train_size], titanic_train['Parch'].as_matrix()[:train_size],
             titanic_train['Fare'].as_matrix()[:train_size], Embarked_norm_train[:train_size]]
y_train = titanic_train['Survived'].as_matrix()[:train_size]

X_val_p = [Pclass_norm_train[train_size:], Sex_norm_train[train_size:], titanic_train['Age'].as_matrix()[train_size:],
             titanic_train['SibSp'].as_matrix()[train_size:], titanic_train['Parch'].as_matrix()[train_size:],
             titanic_train['Fare'].as_matrix()[train_size:], Embarked_norm_train[train_size:]]
y_val = titanic_train['Survived'].as_matrix()[train_size:]

In [41]:
# For a mean squared error regression problem
model_ti.compile(optimizer='rmsprop', loss='mse')

# Train the model, iterating on the data in batches of 32 samples
model_ti.fit(X_train_p, y_train, epochs=50, batch_size=32)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x25b1dabaa90>

In [42]:
loss = model_ti.evaluate(X_val_p, y_val, verbose=0)
print('Final loss:',loss)

W = model_ti.get_weights()
print('Bias:\n', W[1])
print('W:\n', W[0])

Final loss: 0.10290114888
Bias:
 [[-0.42652225  0.42300346]
 [-0.14842899  0.19342794]
 [ 0.03573124 -0.0676112 ]]
W:
 [[-0.2773459  -0.18338183]
 [-0.22380771 -0.31480265]
 [ 0.23386265  0.26994267]]


In [43]:
Y_hat = model_ti.predict(X_val_p)

In [45]:
Y_hat
# y_val

accuracy = sum((Y_hat.reshape(len(y_val))>0.5) == y_val)/(float(len(y_val)))
accuracy



0.83644859813084116

In [40]:
# from keras.utils import plot_model
# plot_model(model_rs, to_file='model_rs.png')
# print pydot.find_graphviz()

# from IPython.display import SVG
# from keras.utils.vis_utils import model_to_dot
# SVG(model_to_dot(model_rs).create(prog='dot', format='svg'))

In [57]:
class MyCb(TrainingPlotter):
    
    def on_epoch_end(self, epoch, logs={}):
        super().on_epoch_end(epoch, logs)
        
def train_network(model, X_train, y_train, Xval, yval, 
                  model_name = None,
                  loss="categorical_crossentropy",
                  opt='rmsprop', batch_size=60, nepochs=100, patience=10, nr_seed=20170522, 
                  shuffle=True,
                  reset=False, ploss=1.0):

    do_plot = (ploss > 0.0)
    
    model_fn = model_name + '.model'
    if reset and os.path.isfile(model_fn):
        os.unlink(model_name + '.model')
        
    if not os.path.isfile(model_fn):
        # initialize the optimizer and model
        print("[INFO] compiling model...")
        model.compile(loss=loss, optimizer=opt, metrics=["accuracy"])    

        # History, checkpoint, earlystop, plot losses:
        cb = MyCb(n=1, filepath=model_name, patience=patience, plot_losses=do_plot)
        
    else:
        print("[INFO] loading model...")
        model, cb = load_model_and_history(model_name)
        cb.patience = patience

    past_epochs = cb.get_nepochs()
    tr_epochs = nepochs - past_epochs
    
    if do_plot:
        import matplotlib.pyplot as plot
        vv = 0
        fig = plot.figure(figsize=(15,6))
        plot.ylim(0.0, ploss)
        plot.xlim(0, nepochs)
        plot.grid(True)
    else:
        vv = 2

    print("[INFO] training for {} epochs...".format(tr_epochs))
    try:
        h = model.fit(X_train, y_train, batch_size=60, epochs=tr_epochs, verbose=0, 
                      validation_data=(Xval, yval),
                      shuffle=shuffle,
                      callbacks=[cb])
    except KeyboardInterrupt:
        pass

    return model, cb

NameError: name 'TrainingPlotter' is not defined

In [53]:
num_records = len(titanic_train['Survived'].as_matrix())
train_ratio = 0.9
train_size = int(train_ratio * num_records)


X_train_p = [Pclass_norm_train[:train_size], Sex_norm_train[:train_size], titanic_train['Age'].as_matrix()[:train_size],
             titanic_train['SibSp'].as_matrix()[:train_size], titanic_train['Parch'].as_matrix()[:train_size],
             titanic_train['Fare'].as_matrix()[:train_size], Embarked_norm_train[:train_size]]
y_train = titanic_train['Survived'].as_matrix()[:train_size]

X_val_p = [Pclass_norm_train[train_size:], Sex_norm_train[train_size:], titanic_train['Age'].as_matrix()[train_size:],
             titanic_train['SibSp'].as_matrix()[train_size:], titanic_train['Parch'].as_matrix()[train_size:],
             titanic_train['Fare'].as_matrix()[train_size:], Embarked_norm_train[train_size:]]
y_val = titanic_train['Survived'].as_matrix()[train_size:]

In [56]:
model_name = '../../models/rossmann'
fit_params = {
    'model_name': model_name,
    'loss': 'mean_absolute_error',
    'opt':        Adam(), 
    'batch_size': 128, 
    'nepochs':    5,
    'patience':   5,
    'ploss':      0.015,
    'shuffle':    False,
    'reset':      True,
}

train_network(model_rs, X_train_p, y_train, X_val_p, y_val, **fit_params);

[INFO] compiling model...


NameError: name 'MyCb' is not defined

In [11]:
# titanic_train.Sex.as_matrix()
# titanic_train.Sex = 1
import graphviz 

In [None]:
classes, i_norm = np.unique(titanic_train.Sex.as_matrix(), return_inverse=True)
# titanic_train.Sex_norm = np.unique(titanic_train.Sex.as_matrix(), return_inverse=True)
titanic_train.Sex_norm = i_norm
i_norm

In [None]:
#normalize
titanic_train = titanic_train.as_matrix()
X_norm = np.empty_like(titanic_train)
ndex_norm_coll = [1, 2, -1]
for i in ndex_norm_coll: # para cada coluna (atributo)
    classes, i_norm = np.unique(titanic_train[:,i], return_inverse=True)
    titanic_train[:,i] = i_norm
    


In [None]:
# titanic_train.values[:,0]
titanic_train.as_matrix([5])

In [None]:
class trainset(object):
    PassengerId = 0

In [None]:
NomeDaClasse.seq