# Titanic StatML_Adaboost

## 概要：
- 运行时间比较长的训练，还是应该弄个TensorBoard，方便监控结果，免得每次都需要用鼠标手动托页面查看最新的运行结果。
- 模型为全链接神经网络和统计学习方法。

## Result:


Reference: 
1. https://www.kaggle.com/c/titanic#tutorials
2. https://www.kaggle.com/sinakhorami/titanic-best-working-classifier
3. https://www.kaggle.com/arthurtok/introduction-to-ensembling-stacking-in-python/notebook


## 1. Preprocess

### Import pkgs

In [1]:
import os
import time
import pdb

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, KFold
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import MinMaxScaler
from IPython.display import display

%matplotlib inline

random_state = 258

### Import original data as DataFrame

In [2]:
data_train = pd.read_csv('./input/train.csv')
data_test = pd.read_csv('./input/test.csv')

display(data_train.head(2))
display(data_test.head(2))
data_train.loc[2, 'Ticket']

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C


Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S


'STON/O2. 3101282'

### Show columns of dataframe

In [3]:
data_train_original_col = data_train.columns
data_test_original_col = data_test.columns
print(data_train_original_col)
print(data_test_original_col)
# data_train0 = data_train.drop(data_train_original_col, axis = 1)
# data_test0  = data_test.drop(data_test_original_col, axis = 1)
# display(data_train0.head(2))
# display(data_test0.head(2))

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')
Index(['PassengerId', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch',
       'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')


### Preprocess features

In [4]:
full_data = [data_train, data_test]

In [5]:
# Pclass
for dataset in full_data:
    temp = dataset[dataset['Pclass'].isnull()]
    if len(temp) == 0:
        print('Do not have null value!')
    else:
        temp.head(2)
        
for dataset in full_data:
    dataset['a_Pclass'] = dataset['Pclass']
#     display(dataset.head())

Do not have null value!
Do not have null value!


In [6]:
# Name
for dataset in full_data:
    dataset['a_Name_Length'] = dataset['Name'].apply(len)
#     display(dataset.head(2))

In [7]:
# Sex
for dataset in full_data:
    dataset['a_Sex'] = dataset['Sex'].map({'female': 0, 'male': 1}).astype(int)
#     display(dataset.head(2))

In [8]:
# Age
def is_child(age):
    if age >= 0 and age <=15:
        return 1
    return 0

def age_level(age):
    if age >= 0 and age <=16:
        return 0
    elif age > 16 and age <=32:
        return 1
    elif age > 32 and age <=48:
        return 2
    else:
        return 3

for dataset in full_data:
    dataset['a_Age'] = dataset['Age'].fillna(-1)
    dataset['a_Have_Age'] = dataset['Age'].isnull().map({True: 0, False: 1}).astype(int)
    dataset['a_Is_Child'] = dataset['a_Age'].apply(is_child)
    dataset['a_Age_Level'] = dataset['a_Age'].apply(age_level)
#     display(dataset[dataset['Age'].isnull()].head(2))
#     display(dataset[dataset['Age']<=15].head(2))
#     display(dataset[dataset['a_Age_Level']>2].head(2))
#     display(dataset.head(2))

In [9]:
# SibSp and Parch
for dataset in full_data:
    dataset['a_FamilySize'] = dataset['SibSp'] + dataset['Parch'] + 1
    dataset['a_IsAlone'] = dataset['a_FamilySize'].apply(lambda x: 1 if x<=1 else 0)
#     display(dataset.head(2))

In [10]:
# Ticket(Very one have a ticket)
for dataset in full_data:
    dataset['a_Have_Ticket'] = dataset['Ticket'].isnull().map({True: 0, False: 1}).astype(int)
#     display(dataset[dataset['Ticket'].isnull()].head(2))
#     display(dataset.head(2))

In [11]:
# Fare
def fare_level(fare):
    if fare < 0:
        return -1
    elif fare >= 0 and fare <=7.91:
        return 0
    elif fare > 7.91 and fare <=14.454:
        return 1
    elif fare > 14.454 and fare <=31:
        return 2
    else:
        return 3

for dataset in full_data:
    dataset['a_Fare'] = dataset['Fare'].fillna(-1)
    dataset['a_Fare_Level'] = dataset['a_Fare'].apply(fare_level)
    dataset['a_Have_Fare'] = dataset['Fare'].isnull().map({True: 0, False: 1}).astype(int)
    dataset['a_Income'] = dataset['a_Fare'] / dataset['a_FamilySize']
#     display(dataset[dataset['Fare'].isnull()].head(2))
    display(dataset[dataset['Fare']==0].head(2))
#     display(dataset.head(2))

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,...,a_Have_Age,a_Is_Child,a_Age_Level,a_FamilySize,a_IsAlone,a_Have_Ticket,a_Fare,a_Fare_Level,a_Have_Fare,a_Income
179,180,0,3,"Leonard, Mr. Lionel",male,36.0,0,0,LINE,0.0,...,1,0,2,1,1,1,0.0,0,1,0.0
263,264,0,1,"Harrison, Mr. William",male,40.0,0,0,112059,0.0,...,1,0,2,1,1,1,0.0,0,1,0.0


Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,...,a_Have_Age,a_Is_Child,a_Age_Level,a_FamilySize,a_IsAlone,a_Have_Ticket,a_Fare,a_Fare_Level,a_Have_Fare,a_Income
266,1158,1,"Chisholm, Mr. Roderick Robert Crispin",male,,0,0,112051,0.0,,...,0,0,3,1,1,1,0.0,0,1,0.0
372,1264,1,"Ismay, Mr. Joseph Bruce",male,49.0,0,0,112058,0.0,B52 B54 B56,...,1,0,3,1,1,1,0.0,0,1,0.0


In [12]:
# Cabin
for dataset in full_data:
    dataset['a_Have_Cabin'] = dataset['Cabin'].isnull().map({True: 0, False: 1}).astype(int)
#     display(dataset[dataset['Cabin'].isnull()].head(2))
#     display(dataset.head(2))

In [13]:
# Embarked
for dataset in full_data:
#     dataset['Embarked'] = dataset['Embarked'].fillna('N')
    dataset['a_Embarked'] = dataset['Embarked'].map( {'S': 0, 'C': 1, 'Q': 2, None: 3} ).astype(int)
    dataset['a_Have_Embarked'] = dataset['Embarked'].isnull().map({True: 0, False: 1}).astype(int)
#     display(dataset[dataset['Embarked'].isnull()].head(2))
#     display(dataset.head(2))

Name words segmentation and one-hote

In [14]:
# Name words segmentation
import re
name_words = []

# Inorder to allign columns of data_train and data_test, only data_train to fetch word
for name in data_train['Name']:
#     print(name)
    words = re.findall(r"[\w']+", name)
#     print(len(words))
#     print(words)
    for w in words:
        if w not in name_words:
            name_words.append(w)
# print(len(name_words))
name_words.sort()
# print(name_words)

In [15]:
# Add columns
for dataset in full_data:
    for w in name_words:
        col_name = 'a_Name_' + w
        dataset[col_name] = 0
    dataset.head(1)

In [16]:
# Name words one-hote
for dataset in full_data:
    for i, row in dataset.iterrows():
    #     print(row['Name'])
        words = re.findall(r"[\w']+", row['Name'])
        for w in words:
            if w in name_words:
                col_name = 'a_Name_' + w
                dataset.loc[i, col_name] = 1
#     display(dataset[dataset['a_Name_Braund'] == 1])

Cabin segmentation and one-hote

In [17]:
# Get cabin segmentation words
import re
cabin_words = []

# Inorder to allign columns of data_train and data_test, only data_train to fetch number
for c in data_train['Cabin']:
#     print(c)
    if c is not np.nan:
        word = re.findall(r"[a-zA-Z]", c)
#         print(words[0])
        cabin_words.append(word[0])
print(len(cabin_words))
cabin_words.sort()
print(np.unique(cabin_words))
cabin_words_unique = list(np.unique(cabin_words))

204
['A' 'B' 'C' 'D' 'E' 'F' 'G' 'T']


In [18]:
def get_cabin_word(cabin):
    if cabin is not np.nan:
        word = re.findall(r"[a-zA-Z]", cabin)
        if word:
            return cabin_words_unique.index(word[0])
    return -1

for dataset in full_data:
    dataset['a_Cabin_Word'] = dataset['Cabin'].apply(get_cabin_word)
    # dataset['a_Cabin_Word'].head(100)

In [19]:
def get_cabin_number(cabin):
    if cabin is not np.nan:
        word = re.findall(r"[0-9]+", cabin)
        if word:
            return int(word[0])
    return -1

for dataset in full_data:
    dataset['a_Cabin_Number'] = dataset['Cabin'].apply(get_cabin_number)
    # dataset['a_Cabin_Number'].head(100)

In [20]:
# Clean data
# Reference: 
#    1. https://www.kaggle.com/sinakhorami/titanic-best-working-classifier
#    2. https://www.kaggle.com/arthurtok/introduction-to-ensembling-stacking-in-python/notebook
# full_data = [data_train, data_test]
# for dataset in full_data:
#     dataset['a_Name_length'] = dataset['Name'].apply(len)
#     #dataset['Sex'] = (dataset['Sex']=='male').astype(int)
#     dataset['a_Sex'] = dataset['Sex'].map( {'female': 0, 'male': 1} ).astype(int)
#     dataset['a_Age'] = dataset['Age'].fillna(0)
#     dataset['a_Age_IsNull'] = dataset['Age'].isnull()
#     dataset['a_FamilySize'] = dataset['SibSp'] + dataset['Parch'] + 1
#     dataset['a_IsAlone'] = dataset['a_FamilySize'].apply(lambda x: 1 if x<=1 else 0)
#     dataset['a_Fare'] = dataset['Fare'].fillna(dataset['Fare'].median())
#     #dataset['Has_Cabin'] = dataset['Cabin'].apply(lambda x: 1 if type(x) == str else 0) # same as below
#     dataset['a_Has_Cabin'] = dataset['Cabin'].apply(lambda x: 0 if type(x) == float else 1)
#     dataset['a_Has_Embarked'] = dataset['Embarked'].isnull()
#     dataset['Embarked'] = dataset['Embarked'].fillna('N')
#     dataset['a_Embarked'] = dataset['Embarked'].map( {'S': 0, 'C': 1, 'Q': 2, 'N': 3} ).astype(int)
#     dataset['Embarked'] = dataset['Embarked'].fillna('S')
    
# display(data_train.head(2))
# display(data_test.head(2))

In [21]:
survived = data_train['Survived']
data_train0 = data_train.drop(data_train_original_col, axis = 1)
data_test0  = data_test.drop(data_test_original_col, axis = 1)
display(data_train0.head(2))
display(data_test0.head(2))

features = data_train0
display(features.head(2))

Unnamed: 0,a_Pclass,a_Name_Length,a_Sex,a_Age,a_Have_Age,a_Is_Child,a_Age_Level,a_FamilySize,a_IsAlone,a_Have_Ticket,...,a_Name_de,a_Name_del,a_Name_der,a_Name_hoef,a_Name_of,a_Name_the,a_Name_van,a_Name_y,a_Cabin_Word,a_Cabin_Number
0,3,23,1,22.0,1,0,1,2,0,1,...,0,0,0,0,0,0,0,0,-1,-1
1,1,51,0,38.0,1,0,2,2,0,1,...,0,0,0,0,0,0,0,0,2,85


Unnamed: 0,a_Pclass,a_Name_Length,a_Sex,a_Age,a_Have_Age,a_Is_Child,a_Age_Level,a_FamilySize,a_IsAlone,a_Have_Ticket,...,a_Name_de,a_Name_del,a_Name_der,a_Name_hoef,a_Name_of,a_Name_the,a_Name_van,a_Name_y,a_Cabin_Word,a_Cabin_Number
0,3,16,1,34.5,1,0,2,1,1,1,...,0,0,0,0,0,0,0,0,-1,-1
1,3,32,0,47.0,1,0,2,2,0,1,...,0,0,0,0,0,0,0,0,-1,-1


Unnamed: 0,a_Pclass,a_Name_Length,a_Sex,a_Age,a_Have_Age,a_Is_Child,a_Age_Level,a_FamilySize,a_IsAlone,a_Have_Ticket,...,a_Name_de,a_Name_del,a_Name_der,a_Name_hoef,a_Name_of,a_Name_the,a_Name_van,a_Name_y,a_Cabin_Word,a_Cabin_Number
0,3,23,1,22.0,1,0,1,2,0,1,...,0,0,0,0,0,0,0,0,-1,-1
1,1,51,0,38.0,1,0,2,2,0,1,...,0,0,0,0,0,0,0,0,2,85


Check and confirm all columns is proccessed

In [22]:
for col in features.columns:
    if not col.startswith('a_'):
        print(col)

In [23]:
# Shuffle and split the train_data into train, crossvalidation and testing subsets
x_train, x_val, y_train, y_val = train_test_split(features, survived, test_size=0.2, random_state=random_state)

In [24]:
# Show distribute of abave data sets
print(x_train.shape)
print(x_val.shape)
print(y_train.shape)
print(y_val.shape)
display(x_train.head(2))
display(y_train.head(2))

(712, 1548)
(179, 1548)
(712,)
(179,)


Unnamed: 0,a_Pclass,a_Name_Length,a_Sex,a_Age,a_Have_Age,a_Is_Child,a_Age_Level,a_FamilySize,a_IsAlone,a_Have_Ticket,...,a_Name_de,a_Name_del,a_Name_der,a_Name_hoef,a_Name_of,a_Name_the,a_Name_van,a_Name_y,a_Cabin_Word,a_Cabin_Number
499,3,18,1,24.0,1,0,1,1,1,1,...,0,0,0,0,0,0,0,0,-1,-1
884,3,22,1,25.0,1,0,1,1,1,1,...,0,0,0,0,0,0,0,0,-1,-1


499    0
884    0
Name: Survived, dtype: int64

In [25]:
features = np.array(features)
survived = np.array(survived)
x_train = np.array(x_train)
y_train = np.array(y_train)
x_val = np.array(x_val)
y_val = np.array(y_val)
x_test = np.array(data_test0)

print(features.shape)
print(survived.shape)
print(x_train.shape)
print(y_train.shape)
print(x_val.shape)
print(y_val.shape)
print(x_test.shape)

print(type(features))
print(type(survived))
print(type(x_train))
print(type(y_train))
print(type(x_val))
print(type(y_val))
print(type(x_test))

(891, 1548)
(891,)
(712, 1548)
(712,)
(179, 1548)
(179,)
(418, 1548)
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


## 2. Build model

In [26]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

def my_GridSearchCV(model, parameters):
    if parameters:
        clf = GridSearchCV(model, parameters, cv=5)
    else:
        clf = model
    clf.fit(x_train, y_train)
    return clf

def my_train(models):
    clfs = {}
    for key, value in models.items():
        print('start ' + key)
        clf = my_GridSearchCV(value['model'], value['param'])
        if hasattr(clf, 'best_estimator_'):
            print(clf.best_estimator_)
        clfs[key] = clf
    return clfs

def my_predict(clfs, x_val=x_val, y_val=y_val):
    preds = {}
    for key, value in clfs.items():
        print(key, end=':  ')
        y_pred = value.predict(x_val)
        preds[key] = accuracy_score(y_val, y_pred)
        print(preds[key])
    return preds

In [40]:
import numpy as np
import math
import pdb

# 【p25，公式2.2】
def sign(x):
    if x > 0.5:
        return 1
    else:
        return 0

In [41]:
%%time
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neighbors.nearest_centroid import NearestCentroid
from sklearn.neighbors import RadiusNeighborsClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier

class Adaboost(object):
    def __init__(self, classifier_count = 10):
        self._x_train = None
        self._y_train = None
        # 【p138，算法8.1，第（2）步】这里M是分类器的数量，【140】中部，有提到
        # “步骤（3）线性组合f(x)实现M个基本分类器的加权表决”
        self._classifier_count = classifier_count
        

    def fit(self, x_train: np.array, y_train: np.array):
        self._x_train = x_train
        self._y_train = y_train
        self._m = len(self._x_train)
        # self._n = len(self._x_train[0])

        self._clf = None
        self._error = 0
        # self._weight = None
        self._alpha = 0

        self._classifiers = []
        self._errors = []
        self._weights = []
        self._alphas = []
        org_clfs = [
            DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=10),
            GradientBoostingClassifier(n_estimators=14, max_depth=5),
#             RandomForestClassifier(n_estimators=17, max_depth=17)
        ]
        # 【p138，第（1）步，初始化权值】
        self._weight = np.ones((self._m,)) / self._m
        for i in range(self._classifier_count):
#             self._clf = DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=10)
            self._clf = GradientBoostingClassifier(n_estimators=10, max_depth=10)
#             self._clf = org_clfs[i % len(org_clfs)]
#             pdb.set_trace()
            self._clf.fit(x_train, y_train, self._weight)
            err = self._clf.predict(x_train) != y_train
            err = err.astype(int)
            err = err * self._weight
            print('err: %s' % err[:5])
            self._error = sum(err) / len(y_train)
            # 【p139，公式8.2】
            self._alpha = 1. / 2 * math.log((1 - self._error) / self._error)
            self._weight = self._get_weight()

            self._classifiers.append(self._clf)
            self._errors.append(self._error)
            self._alphas.append(self._alpha)
            self._weights.append(self._weight)
            print('error: %s' % self._error)
            print('alpha: %s' % self._alpha)
            print('weight:%s' % self._weight[:5])
            print('*'*30)
            # 终止条件，没有找到书中对应的内容，属于个人添加
            if(all(self.predict(x_train) == y_train)):
                print('全部正确分类，满足终止条件：%s of %s' %(i, self._classifier_count))
                break
            pass


    def predict_prob(self, x_test):
        y_preds = []
        for i, clf in enumerate(self._classifiers):
            y_preds.append(self._alphas[i] * clf.predict(x_test))
        y_pred = sum(y_preds)
        return y_pred
    def predict(self, x_test):
        # 【p8.7，公式8.7】这里把probability和sign分开成两个函数来实现，
        # 便于需要probability的情况
        y_preds = self.predict_prob(x_test)
        result = np.ones((len(y_preds),))
        for i in range(len(y_preds)):
            result[i] = sign(y_preds[i])
        return result


    @property
    def x_fit(self):
        return self._x_train
    @property
    def y_fit(self):
        return self._y_train
    
    def _get_weight(self):
        # 【p139，公式8.3，8.4，8.5】
        weight_factors = self._weight*np.exp(-self._alpha*self._y_train*self._clf.predict(self._x_train))
        z = sum(weight_factors)
        new_wf = weight_factors / z
        return new_wf

Wall time: 0 ns


In [43]:
ada = Adaboost(5)
ada.fit(x_train, y_train)
print('用x_train数据测试整体模型：')
truth = ada.predict(x_train) == y_train
print(sum(truth) / len(y_train))
print('用x_val数据测试整体模型：')
truth = ada.predict(x_val) == y_val
pred_acc = sum(truth) / len(y_val)
print(pred_acc)

err: [ 0.  0.  0.  0.  0.]
error: 5.72055296048e-05
alpha: 4.884401392835787
weight:[ 0.002198  0.002198  0.002198  0.002198  0.002198]
******************************
err: [ 0.  0.  0.  0.  0.]
error: 5.22603311952e-05
alpha: 4.92961034937222
weight:[ 0.00224718  0.00224718  0.00224718  0.00224718  0.00224718]
******************************
err: [ 0.  0.  0.  0.  0.]
error: 2.20970304609e-05
alpha: 5.360022614738734
weight:[ 0.0022986  0.0022986  0.0022986  0.0022986  0.0022986]
******************************
err: [ 0.  0.  0.  0.  0.]
error: 6.02646275006e-06
alpha: 6.0096721507401005
weight:[ 0.00232623  0.00232623  0.00232623  0.00232623  0.00232623]
******************************
err: [ 0.  0.  0.  0.  0.]
error: 5.27538717903e-06
alpha: 6.076226603513366
weight:[ 0.00232766  0.00232766  0.00232766  0.00232766  0.00232766]
******************************
用x_train数据测试整体模型：
0.994382022472
用x_val数据测试整体模型：
0.837988826816


### Predict and Export pred.csv file

In [30]:
train_cols = data_train.columns
for col in data_test0.columns:
    if col not in train_cols:
        print(col)

In [31]:
project_name = 'Titanic'
step_name = 'StatML_adaboost'
time_str = time.strftime("%Y%m%d_%H%M%S", time.localtime())
final_acc_str = str(int(pred_acc*10000))
run_name = project_name + '_' + step_name + '_' + time_str + '_' + final_acc_str
print(run_name)
cwd = os.getcwd()
pred_file = os.path.join(cwd, 'output', run_name + '.csv')
print(pred_file)

Titanic_StatML_adaboost_20180204_172325_8603
D:\Kaggle\titanic\output\Titanic_StatML_adaboost_20180204_172325_8603.csv


In [32]:
display(data_test0.head(2))
y_data_pred = ada.predict(data_test0.as_matrix())
print(y_data_pred.shape)
y_data_pred = np.squeeze(y_data_pred)
print(y_data_pred.shape)
y_data_pred = (y_data_pred > 0.5).astype(int)
print(y_data_pred)

print(data_test['PassengerId'].shape)
passenger_id = data_test['PassengerId']
output = pd.DataFrame( { 'PassengerId': passenger_id , 'Survived': y_data_pred })

output.to_csv(pred_file , index = False)

Unnamed: 0,a_Pclass,a_Name_Length,a_Sex,a_Age,a_Have_Age,a_Is_Child,a_Age_Level,a_FamilySize,a_IsAlone,a_Have_Ticket,...,a_Name_de,a_Name_del,a_Name_der,a_Name_hoef,a_Name_of,a_Name_the,a_Name_van,a_Name_y,a_Cabin_Word,a_Cabin_Number
0,3,16,1,34.5,1,0,2,1,1,1,...,0,0,0,0,0,0,0,0,-1,-1
1,3,32,0,47.0,1,0,2,2,0,1,...,0,0,0,0,0,0,0,0,-1,-1


(418,)
(418,)
[0 0 0 0 1 0 1 0 1 0 0 0 1 0 1 1 0 0 0 0 0 1 1 1 1 0 1 0 1 0 0 0 1 0 1 0 0
 0 0 0 0 1 0 1 1 0 0 0 1 1 1 0 1 1 0 0 0 0 0 1 0 0 0 1 0 1 0 0 1 1 1 0 0 1
 1 0 0 1 0 1 1 0 0 0 0 0 1 0 1 1 1 0 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0
 1 1 0 1 0 0 1 0 1 1 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0
 0 0 1 0 0 1 0 0 1 1 1 1 1 1 1 0 0 1 0 0 1 0 0 0 0 0 1 1 1 0 1 1 0 1 1 0 1
 0 1 0 0 0 0 0 1 0 1 0 1 0 0 0 1 1 0 1 0 0 1 0 1 0 0 0 0 1 1 0 1 0 1 0 1 0
 1 0 1 1 0 1 0 0 0 1 0 0 1 0 0 0 1 1 1 1 0 0 1 0 1 0 1 1 1 0 0 0 0 0 0 0 0
 0 0 0 1 1 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 1 1 1 1 0 0 0 0 0 1 1 0 0 0 0
 1 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 1 1 0
 1 0 0 0 0 0 1 0 0 0 1 1 0 0 0 0 1 1 0 0 0 1 1 1 0 0 1 0 1 1 0 1 0 0 0 1 0
 0 1 0 0 1 1 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 1 1 0 0 1 0 1 0 0 1 0 1 0 0 0 0
 0 1 1 1 1 1 0 1 0 0 1]
(418,)


In [33]:
print(run_name)
print('Done!')

Titanic_StatML_adaboost_20180204_172325_8603
Done!
