In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
%matplotlib inline

In [33]:
train_data = pd.read_csv("../train.csv")
imgs_data = train_data.iloc[:,1:]
labels_data = train_data.iloc[:,0]

In [34]:
label = range(10)
lb = LabelBinarizer().fit(label)
length = len(labels_data)
label_list = lb.transform(np.array(labels_data))

In [35]:
img_list = []
for i in range(length):
    img_data = np.array(np.reshape(imgs_data.iloc[i,:],(28,28,1)))
    img_list.append(img_data)
imgs = np.array(img_list) 

In [36]:
x_train,x_val,y_train,y_val = train_test_split(imgs,label_list,test_size=0.2, random_state=42)

In [37]:
y_train = y_train*2 -1
y_val = y_val*2 -1

# BinaryNet

In [7]:
import sys,os
sys.path.insert(0,os.path.abspath(os.path.join(os.getcwd(),"./nn_playground/binarynet/")))

In [12]:
from __future__ import print_function
import numpy as np
import keras.backend as K
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, BatchNormalization, MaxPooling2D
from keras.layers import Flatten
from keras.optimizers import SGD, Adam, RMSprop
from keras.callbacks import LearningRateScheduler
from keras.utils import np_utils

from binary_ops import binary_tanh as binary_tanh_op
from binary_layers import BinaryDense, BinaryConv2D


In [13]:
def binary_tanh(x):
    return binary_tanh_op(x)

In [14]:
H = 1.
kernel_lr_multiplier = 'Glorot'

# nn
batch_size = 50
epochs = 20 
channels = 1
img_rows = 28 
img_cols = 28 
filters = 32 
kernel_size = (3, 3)
pool_size = (2, 2)
hidden_units = 128
classes = 10
use_bias = False

# learning rate schedule
lr_start = 1e-3
lr_end = 1e-4
lr_decay = (lr_end / lr_start)**(1. / epochs)

# BN
epsilon = 1e-6
momentum = 0.9

# dropout
p1 = 0.25
p2 = 0.5

In [38]:
model = Sequential()
# conv1
model.add(BinaryConv2D(128, kernel_size=kernel_size, input_shape=(img_rows, img_cols,channels),
                       data_format='channels_last',
                       H=H, kernel_lr_multiplier=kernel_lr_multiplier, 
                       padding='same', use_bias=use_bias, name='conv1'))
model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, axis=1, name='bn1'))
model.add(Activation(binary_tanh, name='act1'))
# conv2
model.add(BinaryConv2D(128, kernel_size=kernel_size, H=H, kernel_lr_multiplier=kernel_lr_multiplier, 
                       data_format='channels_last',
                       padding='same', use_bias=use_bias, name='conv2'))
model.add(MaxPooling2D(pool_size=pool_size, name='pool2', data_format='channels_last'))
model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, axis=1, name='bn2'))
model.add(Activation(binary_tanh, name='act2'))
# conv3
model.add(BinaryConv2D(256, kernel_size=kernel_size, H=H, kernel_lr_multiplier=kernel_lr_multiplier,
                       data_format='channels_last',
                       padding='same', use_bias=use_bias, name='conv3'))
model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, axis=1, name='bn3'))
model.add(Activation(binary_tanh, name='act3'))
# conv4
model.add(BinaryConv2D(256, kernel_size=kernel_size, H=H, kernel_lr_multiplier=kernel_lr_multiplier,
                       data_format='channels_last',
                       padding='same', use_bias=use_bias, name='conv4'))
model.add(MaxPooling2D(pool_size=pool_size, name='pool4', data_format='channels_last'))
model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, axis=1, name='bn4'))
model.add(Activation(binary_tanh, name='act4'))
model.add(Flatten())
# dense1
model.add(BinaryDense(1024, H=H, kernel_lr_multiplier=kernel_lr_multiplier, use_bias=use_bias, name='dense5'))
model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, name='bn5'))
model.add(Activation(binary_tanh, name='act5'))
# dense2
model.add(BinaryDense(classes, H=H, kernel_lr_multiplier=kernel_lr_multiplier, use_bias=use_bias, name='dense6'))
model.add(BatchNormalization(epsilon=epsilon, momentum=momentum, name='bn6'))

In [49]:
opt = Adam(lr=lr_start) 
model.compile(loss='squared_hinge', optimizer=opt, metrics=['acc'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1 (BinaryConv2D)         (None, 28, 28, 128)       1152      
_________________________________________________________________
bn1 (BatchNormalization)     (None, 28, 28, 128)       112       
_________________________________________________________________
act1 (Activation)            (None, 28, 28, 128)       0         
_________________________________________________________________
conv2 (BinaryConv2D)         (None, 28, 28, 128)       147456    
_________________________________________________________________
pool2 (MaxPooling2D)         (None, 14, 14, 128)       0         
_________________________________________________________________
bn2 (BatchNormalization)     (None, 14, 14, 128)       56        
_________________________________________________________________
act2 (Activation)            (None, 14, 14, 128)       0         
__________

In [50]:
# lr_scheduler = LearningRateScheduler(lambda e: lr_start * lr_decay ** e)
lr_scheduler = LearningRateScheduler(lambda x: 1e-3 * 0.9 **x)
history = model.fit(x_train, y_train,
                    batch_size=batch_size, nb_epoch=nb_epoch,
                    verbose=1, validation_data=(x_val, y_val),
                    callbacks=[lr_scheduler])

Train on 33600 samples, validate on 8400 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


 # Sequential Model

In [7]:
from keras.models import  Sequential,Model
from keras.optimizers import Adam
from keras.layers import Conv2D,Dense,MaxPooling2D,Flatten,InputLayer,Input
from keras.layers import BatchNormalization, Dropout,Activation
from keras.activations import  relu

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [55]:
import keras.backend as K
?? K.stop_gradient()

In [97]:
classfier_model = Sequential()
classfier_model.add(InputLayer(input_shape=(28,28,1),name="Input"))
classfier_model.add(BatchNormalization(axis=3))
classfier_model.add(Conv2D(16,(3,3),padding="same",name="block1_conv1"))
classfier_model.add(Conv2D(16,(3,3),padding="same",activation="relu",name="block1_con2"))
classfier_model.add(MaxPooling2D((2, 2), strides=(2, 2), name='pool1'))
classfier_model.add(Conv2D(48,(3,3),padding="same",name="block2_conv1"))
classfier_model.add(Conv2D(48,(3,3),padding="same",activation="relu",name="block2_conv2"))
classfier_model.add(MaxPooling2D((2, 2), strides=(2, 2), name='pool2'))
classfier_model.add(Flatten(name="flatten"))
classfier_model.add(Dense(512, activation='relu', name='fc1'))
classfier_model.add(Dense(10,activation="softmax",name="prediction"))

In [46]:
adam = Adam(lr=0.01)
classfier_model.compile(optimizer=adam, loss="categorical_crossentropy",metrics=["accuracy"])
classfier_model.fit(x_train,y_train,validation_data=(x_val,y_val),epochs=10)

NameError: name 'classfier_model' is not defined

## Functional Model

In [36]:
inputs = Input(shape=(28,28,1))
x = BatchNormalization(axis=3)
x = Conv2D(20,(5,5),padding="valid")(inputs)
x = MaxPooling2D((2,2),strides=(2,2))(x)
x = Conv2D(50,(5,5),padding="valid")(x)
x = MaxPooling2D((2,2),strides=(2,2))(x)
x = Flatten()(x)
x = Dense(60, activation="relu")(x)
x = Dense(48, activation="relu")(x)
predictions = Dense(10, activation="softmax")(x)
model = Model(inputs=inputs, outputs=predictions)

In [44]:
model.compile(optimizer="adam", loss="categorical_crossentropy",metrics=["accuracy"])
model.fit(x_train,y_train,validation_data=(x_val,y_val),verbose=1,epochs=20)

Train on 33600 samples, validate on 8400 samples
Epoch 1/20

KeyboardInterrupt: 

In [None]:
model.save("mnist_model.h5")

In [41]:
from keras.callbacks import LearningRateScheduler,EarlyStopping,ModelCheckpoint
batch_size = 32
annealer = LearningRateScheduler(lambda x: 1e-3 * 0.9 **x)
earlystop = EarlyStopping(patience=10)
model_save = ModelCheckpoint(
                filepath="mnist_model.h5", save_best_only=True, verbose=1
)
model.fit(x_train,y_train,batch_size=batch_size,
          epochs=20, validation_data=(x_val,y_val),
          callbacks=[annealer,earlystop,model_save]
         )

Train on 33600 samples, validate on 8400 samples
Epoch 1/20
Epoch 00001: val_loss improved from inf to 0.09565, saving model to mnist_model.h5
Epoch 2/20
Epoch 00002: val_loss did not improve
Epoch 3/20
Epoch 00003: val_loss did not improve
Epoch 4/20
Epoch 00004: val_loss did not improve
Epoch 5/20
Epoch 00005: val_loss did not improve
Epoch 6/20
Epoch 00006: val_loss did not improve
Epoch 7/20
Epoch 00007: val_loss improved from 0.09565 to 0.09252, saving model to mnist_model.h5
Epoch 8/20
Epoch 00008: val_loss did not improve
Epoch 9/20
Epoch 00009: val_loss did not improve
Epoch 10/20
Epoch 00010: val_loss did not improve
Epoch 11/20
Epoch 00011: val_loss did not improve
Epoch 12/20
Epoch 00012: val_loss improved from 0.09252 to 0.09172, saving model to mnist_model.h5
Epoch 13/20
Epoch 00013: val_loss did not improve
Epoch 14/20
Epoch 00014: val_loss did not improve
Epoch 15/20
Epoch 00015: val_loss did not improve
Epoch 16/20
Epoch 00016: val_loss did not improve
Epoch 17/20
Epoch

<keras.callbacks.History at 0x7fa2c07daf90>

In [41]:
test_data = pd.read_csv('../test.csv')
test_list = []
for i in range(test_data.shape[0]):
    test = np.array(np.reshape(test_data.iloc[i,:],(28,28,1)))
    test_list.append(test)
tests_data = np.array(test_list) 

In [51]:
predict = model.predict(tests_data)
predict_labels = lb.inverse_transform(predict)
values_data = pd.DataFrame({"ImageId":np.arange(1,28001),"Label":predict_labels})
values_data.head()

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,0
4,5,3


In [52]:
values_data.to_csv("submission.csv", index=False)