# Import libraries

In [1]:
import pandas as pd
import numpy as np
import tflearn
from tflearn.data_utils import load_csv
from tflearn.datasets import titanic
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import seaborn as sns

Instructions for updating:
non-resource variables are not supported in the long term
curses is not supported on this machine (please install/reinstall curses for an optimal experience)


In [2]:
# Download des Titanic-Datensatzes
titanic.download_dataset('titanic_dataset.csv')

# CSV-Datei laden, angeben, dass die erste Spalte Beschriftungen darstellt
data, labels = load_csv('titanic_dataset.csv', target_column=0,
                        categorical_labels=True, n_classes=2, has_header=True)

df = pd.DataFrame(data)

# Führen Sie einen Test/Train-Split durch
X_train, X_test, y_train, y_test = train_test_split(df, labels, test_size=0.2, random_state=42)

In [3]:
X_train.head()

Unnamed: 0,0,1,2,3,4,5,6,7
772,3,"Dika, Mr. Mirko",male,17,0,0,349232,7.8958
543,2,"Reeves, Mr. David",male,36,0,0,C.A. 17248,10.5
289,1,"Taussig, Miss. Ruth",female,18,0,2,110413,79.65
10,1,"Astor, Col. John Jacob",male,47,1,0,PC 17757,227.525
147,1,"Harrington, Mr. Charles H",male,0,0,0,113796,42.4


# Transformationen
- Spalte 1 und 6 streichen 
- Spalte zwei als Kat-Codes umcodieren
- Typ Float32 für alle Spalten

In [4]:
X_train.head()

Unnamed: 0,0,1,2,3,4,5,6,7
772,3,"Dika, Mr. Mirko",male,17,0,0,349232,7.8958
543,2,"Reeves, Mr. David",male,36,0,0,C.A. 17248,10.5
289,1,"Taussig, Miss. Ruth",female,18,0,2,110413,79.65
10,1,"Astor, Col. John Jacob",male,47,1,0,PC 17757,227.525
147,1,"Harrington, Mr. Charles H",male,0,0,0,113796,42.4


In [5]:
def preprocess(r):
    r = r.drop([1, 6], axis=1,errors='ignore')
    r[2] = r[2].astype('category')
    r[2] = r[2].cat.codes
    for column in r.columns:
        r[column] = r[column].astype(np.float32)
    return r.values

In [6]:
X_train = preprocess(X_train)
pd.DataFrame(X_train).head()

Unnamed: 0,0,1,2,3,4,5
0,3.0,1.0,17.0,0.0,0.0,7.8958
1,2.0,1.0,36.0,0.0,0.0,10.5
2,1.0,0.0,18.0,0.0,2.0,79.650002
3,1.0,1.0,47.0,1.0,0.0,227.524994
4,1.0,1.0,0.0,0.0,0.0,42.400002


In [7]:
X_train.shape

(1047, 6)

In [8]:
y_train.shape

(1047, 2)

# Aufbau des neuronalen Netzes

In [9]:
import tensorflow as tf

# Bauen neural network
net = tflearn.input_data(shape=[None, 6])
net = tflearn.fully_connected(net, 32)
net = tflearn.fully_connected(net, 32)
net = tflearn.fully_connected(net, 2, activation="softmax")
net = tflearn.regression(net)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


# Train it

In [10]:
# Definieren das model
model = tflearn.DNN(net)
# Start training (apply gradient descent algorithm)
model.fit(X_train, y_train, n_epoch=20, batch_size=32, show_metric=True)
# Mit cross validation
model2 = tflearn.DNN(net)
model2.fit(X_train, y_train, n_epoch=20, batch_size=32, show_metric=True, validation_set=0.1)

Training Step: 599  | time: 0.154s
| Adam | epoch: 020 | loss: 0.00000 - acc: 0.0000 -- iter: 928/942
Training Step: 600  | time: 1.167s
| Adam | epoch: 020 | loss: 0.00000 - acc: 0.0000 | val_loss: 0.44775 - val_acc: 0.8286 -- iter: 942/942
--


# Bewerten Sie es

In [11]:
X_test = preprocess(X_test)
metric_train = model.evaluate(X_train, y_train)
metric_test = model.evaluate(X_test, y_test)
metric_train_1 = model2.evaluate(X_train, y_train)
metric_test_1 = model2.evaluate(X_test, y_test)

In [12]:
print('Model 1 Accuracy on train set: %.9f' % metric_train[0])
print("Model 1 Accuracy on test set: %.9f" % metric_test[0])
print('Model 2 Accuracy on train set: %.9f' % metric_train_1[0])
print("Model 2 Accuracy on test set: %.9f" % metric_test_1[0])

Model 1 Accuracy on train set: 0.786055396
Model 1 Accuracy on test set: 0.763358779
Model 2 Accuracy on train set: 0.793696276
Model 2 Accuracy on test set: 0.763358779


# Erstellen wir einige gefälschte Daten für Leo und Cate

In [13]:
# Let's create some data for DiCaprio and Winslet
dicaprio = [3, 'Jack Dawson', 'male', 19, 0, 0, 'N/A', 5.0000]
winslet = [1, 'Rose DeWitt Bukater', 'female', 17, 1, 2, 'N/A', 100.0000]
# Preprocess data
dicaprio, winslet = preprocess(pd.DataFrame([dicaprio, winslet]))

# Überlebenschancen vorhersagen 
pred = model.predict([dicaprio, winslet])
print("DiCaprio Surviving Rate:", pred[0][1])
print("Winslet Surviving Rate:", pred[1][1])

DiCaprio Surviving Rate: 0.12484403
Winslet Surviving Rate: 0.8591321
