In [1]:
import numpy as np
import tflearn 

# Download the Titanic dataset
from tflearn.datasets import titanic
titanic.download_dataset("titanic_dataset.csv")

# Download dataset and indicate that the first column represents the labels
from tflearn.data_utils import load_csv
data, labels = load_csv("titanic_dataset.csv", target_column = 0,
                       categorical_labels = True, n_classes = 2)

Instructions for updating:
non-resource variables are not supported in the long term


In [14]:
data

array([[  1.    ,   1.    ,  29.    ,   0.    ,   0.    , 211.3375],
       [  1.    ,   0.    ,   0.9167,   1.    ,   2.    , 151.55  ],
       [  1.    ,   1.    ,   2.    ,   1.    ,   2.    , 151.55  ],
       ...,
       [  3.    ,   0.    ,  26.5   ,   0.    ,   0.    ,   7.225 ],
       [  3.    ,   0.    ,  27.    ,   0.    ,   0.    ,   7.225 ],
       [  3.    ,   0.    ,  29.    ,   0.    ,   0.    ,   7.875 ]],
      dtype=float32)

In [2]:
data[1:5]

[['1',
  'Allison, Master. Hudson Trevor',
  'male',
  '0.9167',
  '1',
  '2',
  '113781',
  '151.5500'],
 ['1',
  'Allison, Miss. Helen Loraine',
  'female',
  '2',
  '1',
  '2',
  '113781',
  '151.5500'],
 ['1',
  'Allison, Mr. Hudson Joshua Creighton',
  'male',
  '30',
  '1',
  '2',
  '113781',
  '151.5500'],
 ['1',
  'Allison, Mrs. Hudson J C (Bessie Waldo Daniels)',
  'female',
  '25',
  '1',
  '2',
  '113781',
  '151.5500']]

In [3]:
# Preprocessing the data (define the process): We are not looking for the name of passenger or the ticket number
def preprocess(passengers, columns_to_delete):
    # Sort by descending ID and delete columns
    for column_to_delete in sorted(columns_to_delete, reverse = True):
        [passenger.pop(column_to_delete) for passenger in passengers]
    for i in range(len(passengers)):
        # Converting "sex" field to a float value(id is 1 after removing labels column)
        passengers[i][1] = 1. if passengers[i][1] == "female" else 0
    return np.array(passengers, dtype = "float32")

# Ignore "name" and "ticket" columns (columns 1 and 6 of data array)
to_ignore = [1,6]

# Preprocess the data
data = preprocess(data, to_ignore)

In [4]:
# Building the neural network
net = tflearn.input_data(shape = [None, 6])  # Input layer with 6 features from the data
net = tflearn.fully_connected(net, 32)  # two hidden layers with 32 nodes each
net = tflearn.fully_connected(net, 32)
net = tflearn.fully_connected(net, 2, activation = "softmax")  # Output layer with 2 nodes
net = tflearn.regression(net)  # Fine tune the model

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [5]:
# Create the model
model = tflearn.DNN(net)
# Start the training (apply gradient descent algorithm)
model.fit(data, labels, n_epoch = 10, batch_size = 16, show_metric = True)

Training Step: 819  | total loss: [1m[32m0.49442[0m[0m | time: 0.296s
| Adam | epoch: 010 | loss: 0.49442 - acc: 0.7952 -- iter: 1296/1309
Training Step: 820  | total loss: [1m[32m0.47708[0m[0m | time: 0.299s
| Adam | epoch: 010 | loss: 0.47708 - acc: 0.8094 -- iter: 1309/1309
--


In [6]:
# Running the model on made up data
dicaprio = [3, "Jack Dawson", "male", 19, 0, 0, "N/A", 5.0000]
winslet = [1, "Rose DeWitt Bukater" ,"female", 17, 1, 2, "N/A", 100.0000]
# Preprocess the data
dicaprio, winslet = preprocess([dicaprio, winslet], to_ignore)
# Predict each one's survival chances(class 1 results)
pred = model.predict([dicaprio, winslet])
print("Dicaprio Surviving Rate:", pred[0][1])
print("Winslet Surviving Rate:", pred[1][1])

Dicaprio Surviving Rate: 0.13026267
Winslet Surviving Rate: 0.7997492


### Implementing an RNN with tflearn

In [7]:
# import numpy as np
# import tflearn  # We already have those loaded
from __future__ import division, print_function, absolute_import

In [8]:
# Import the data and separate into training and testing sets
import tflearn.datasets.mnist as mnist
X, Y, testX, testY = mnist.load_data(one_hot = True) #X and Y are the training set and labels for training set
# testX and testY are the images in the test set and testY are the labels for the test set
X = np.reshape(X, (-1, 28, 28))  # Training set
testX = np.reshape(testX, (-1, 28, 28))  # Testing set

Extracting mnist/train-images-idx3-ubyte.gz
Extracting mnist/train-labels-idx1-ubyte.gz
Extracting mnist/t10k-images-idx3-ubyte.gz
Extracting mnist/t10k-labels-idx1-ubyte.gz


In [9]:
# Take a look at a sample of the training data
testY[2]  # Digit in this image is 1

array([0., 1., 0., 0., 0., 0., 0., 0., 0., 0.])

In [10]:
testY[1]  # Digit in this image is 2

array([0., 0., 1., 0., 0., 0., 0., 0., 0., 0.])

In [11]:
testY[0]  # Digit in this image is 7

array([0., 0., 0., 0., 0., 0., 0., 1., 0., 0.])

In [12]:
# Build the neural network
net = tflearn.input_data(shape = [None, 28, 28])  # 3D layer for an lstm model
net = tflearn.lstm(net, 128, return_seq= True)
net = tflearn.lstm(net, 128)   # Tow hidden layers with 128 nodes each
net = tflearn.fully_connected(net, 10, activation = "softmax")
net = tflearn.regression(net, optimizer = "adam",
                        loss = "categorical_crossentropy", name = "output1")

Instructions for updating:
Please use `keras.layers.RNN(cell, unroll=True)`, which is equivalent to this API


In [13]:
# Build the model
model = tflearn.DNN(net)
model.fit(X, Y, n_epoch = 1, validation_set = 0.1, show_metric = True, 
         snapshot_step = 100)

IndexError: list index out of range

In [None]:
result = model.predict(testX)

In [None]:
result[1]

In [None]:
testY[1]