In [334]:
import pandas as pd
import numpy as np


In [335]:
df_dev = pd.read_pickle('df_dev.pkl')

In [336]:
zero_data = np.zeros(shape=(len(df_dev),1),dtype=np.int32)
d = pd.DataFrame(zero_data)
df_dev['group_id']= d

zero_data = np.zeros(shape=(len(df_dev),1),dtype=np.float32)
d = pd.DataFrame(zero_data)
df_dev['prob']=d

In [339]:
n_sample = len(df_dev)
n_fold = 10

shuffle_idx = np.random.permutation(n_sample)
split_array=np.array_split(shuffle_idx,n_fold)

In [341]:
for i in range(n_fold):
    idx_list = split_array[i]
    df_dev.ix[split_array[i],'group_id']=i
    

In [343]:
from sklearn.model_selection import train_test_split
import chainer
from chainer import training, Chain
from chainer.training import extensions
from chainer import links as L
from chainer import functions as F
from chainer.datasets import TupleDataset
from chainer import serializers

from chainer.functions.loss.mean_squared_error import mean_squared_error

In [344]:
class MyChain(Chain):
    def __init__(self):
        super(MyChain,self).__init__(
            l1 = L.Linear(None, 10),
            l2 = L.Linear(None, 6),
            l3 = L.Linear(None,2),
            bn1 = L.BatchNormalization(size = 14),
            bn2 = L.BatchNormalization(size = 10),
            bn3 = L.BatchNormalization(size = 6)
        )
        
    def __call__(self, x, train = True):
        h = F.dropout(self.bn1(x),ratio = 0.2, train=train)
        h = F.dropout(F.relu(self.bn2(self.l1(x))), train = train)
        h = F.dropout(F.relu(self.bn3(self.l2(h))), train = train)
        y = self.l3(h)
        
        return y

In [350]:
for i in range(n_fold):
    df_train = df_dev[df_dev.group_id != i]
    df_test = df_dev[df_dev.group_id == i]
    
    X_train = df_train.drop(['Survived','group_id','prob'],axis=1).as_matrix().astype(np.float32)
    y_train = df_train.Survived.as_matrix().astype(np.int32)
    X_test = df_test.drop(['Survived','group_id','prob'],axis=1).as_matrix().astype(np.float32)
    y_test = df_test.Survived.as_matrix().astype(np.int32)
    
    train = TupleDataset(X_train,y_train.reshape(1,-1)[0])
    test = TupleDataset(X_test,y_test.reshape(1,-1)[0])

    train_iter = chainer.iterators.SerialIterator(train, 100)
    test_iter = chainer.iterators.SerialIterator(test, 100,repeat=False, shuffle=False)
    
    model = L.Classifier(MyChain())
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    updater = training.StandardUpdater(train_iter, optimizer, device=-1)
    trainer = training.Trainer(updater, (100, 'epoch'), out="result")
    trainer.extend(extensions.Evaluator(test_iter, model, device=-1))
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy']))
    
    trainer.run()
    
    serializers.save_npz("model/model_"+str(i)+".npz",model)
    
    df_prob = pd.DataFrame(F.softmax(model.predictor(X_test,train=False)).data[:,1],index = df_test.index,columns=['prob'])
    df_dev.loc[df_test.index,'prob']=df_prob['prob']


epoch       main/loss   validation/main/loss  main/accuracy  validation/main/accuracy
[J1           0.792499    0.737569              0.514444       0.566667                  
[J2           0.76126     0.947032              0.52           0.455556                  
[J3           0.750951    0.747129              0.51375        0.577778                  
[J4           0.740654    0.742693              0.53125        0.577778                  
[J5           0.734704    0.793532              0.51           0.522222                  
[J6           0.711681    0.791413              0.53625        0.488889                  
[J7           0.712045    0.720781              0.55375        0.533333                  
[J8           0.691236    0.801383              0.575          0.5                       
[J9           0.683808    0.671732              0.58375        0.555556                  
[J10          0.694851    0.614439              0.58375        0.611111                  
[J1

In [None]:
df_train = df_dev[df_dev.group_id != i]
df_test = df_dev[df_dev.group_id == i]

X_train = df_train.drop(['Survived','group_id'],axis=1).as_matrix().astype(np.float32)
y_train = df_train.Survived.as_matrix().astype(np.int32)
X_test = df_test.drop(['Survived','group_id'],axis=1).as_matrix().astype(np.float32)
y_test = df_test.Survived.as_matrix().astype(np.int32)

train = TupleDataset(X_train,y_train.reshape(1,-1)[0])
test = TupleDataset(X_test,y_test.reshape(1,-1)[0])

train_iter = chainer.iterators.SerialIterator(train, 100)
test_iter = chainer.iterators.SerialIterator(test, 100,repeat=False, shuffle=False)

model = L.Classifier(MyChain())
optimizer = chainer.optimizers.Adam()
optimizer.setup(model)

updater = training.StandardUpdater(train_iter, optimizer, device=-1)
trainer = training.Trainer(updater, (100, 'epoch'), out="result")
trainer.extend(extensions.Evaluator(test_iter, model, device=-1))
trainer.extend(extensions.LogReport())
trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy']))

trainer.run()

serializers.save_hdf5("model/model_"+str(i)+".h5",model)

df_prob = pd.DataFrame(F.softmax(model.predictor(X_test,train=False)).data[:,1],index = df_test.index,columns=['prob'])
df_dev.loc[df_test.index,'prob']=df_prob['prob']

In [209]:

df_train = df_dev[df_dev.group_id != 0]
df_test = df_dev[df_dev.group_id == 0]

In [212]:
X_train = df_train.drop(['Survived','group_id','prob'],axis=1).as_matrix().astype(np.float32)
y_train = df_train.Survived.as_matrix().astype(np.int32)
X_test = df_test.drop(['Survived','group_id','prob'],axis=1).as_matrix().astype(np.float32)
y_test = df_test.Survived.as_matrix().astype(np.int32)


In [154]:
X_train.shape

(801, 14)

In [155]:
y_train.shape

(801,)

In [49]:
X_train.shape

(801, 14)

(90, 14)

In [329]:
train = TupleDataset(X_train,y_train)
test = TupleDataset(X_test,y_test)

train_iter = chainer.iterators.SerialIterator(train, 100)
test_iter = chainer.iterators.SerialIterator(test, 100,repeat=False, shuffle=False)

In [330]:
model = L.Classifier(MyChain())
optimizer = chainer.optimizers.Adam()
optimizer.setup(model)

updater = training.StandardUpdater(train_iter, optimizer, device=-1)
trainer = training.Trainer(updater, (100, 'epoch'), out="result")
trainer.extend(extensions.Evaluator(test_iter, model, device=-1))
trainer.extend(extensions.LogReport())
trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy']))

trainer.run()

epoch       main/loss   validation/main/loss  main/accuracy  validation/main/accuracy
[J1           0.747478    0.777518              0.601111       0.511111                  
[J2           0.757082    0.70246               0.55125        0.533333                  
[J3           0.739348    0.721762              0.6175         0.588889                  
[J4           0.754367    0.825071              0.6            0.566667                  
[J5           0.732101    0.767173              0.62125        0.622222                  
[J6           0.688599    0.738887              0.61           0.555556                  
[J7           0.72148     0.67057               0.615          0.555556                  
[J8           0.712022    0.758428              0.60375        0.588889                  
[J9           0.700454    0.7882                0.61125        0.511111                  
[J10          0.710756    0.730801              0.62875        0.544444                  
[J1

In [284]:
df_prob = pd.DataFrame(F.softmax(model.predictor(X_test)).data[:,1] ,index = df_test.index,columns=["prob"])

In [285]:
df_prob

Unnamed: 0,prob
2,0.378082
19,0.570397
22,0.862391
31,0.959352
39,0.497051
55,0.486316
60,0.071700
85,0.523071
92,0.283172
99,0.296113


In [324]:
df_prob = pd.DataFrame(F.softmax(model.predictor(X_test,train=False)).data[:,1],index = df_test.index,columns=['prob'])
df_dev.loc[df_test.index,'prob']=df_prob['prob']

In [333]:
df_test.index

Int64Index([  2,  19,  22,  31,  39,  55,  60,  85,  92,  99, 101, 110, 120,
            125, 139, 148, 157, 167, 184, 187, 194, 198, 199, 201, 211, 241,
            245, 256, 266, 274, 280, 281, 289, 299, 308, 323, 325, 335, 344,
            358, 361, 373, 376, 377, 382, 408, 416, 418, 435, 444, 451, 453,
            468, 474, 478, 497, 501, 566, 580, 591, 595, 599, 645, 647, 655,
            666, 674, 681, 703, 715, 716, 720, 729, 733, 750, 776, 778, 781,
            821, 833, 834, 835, 841, 857, 860, 863, 870, 873, 876, 888],
           dtype='int64')