In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedShuffleSplit
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Convolution1D, Dropout
from keras.optimizers import SGD
from keras.utils import np_utils

train = pd.read_csv('./train.csv')
test = pd.read_csv('./test.csv')

def encode(train, test):
    """
    处理训练集和测试集
    
    Arguments:
    train -
    test -
    
    Returns:
    train - shape(990, 192) 
    labels - shape(900,) 数值
    test - shape(594, 192)
    classes - list 种类 len 99 
    """
    label_encoder = LabelEncoder().fit(train.species)
    labels = label_encoder.transform(train.species)
    classes = list(label_encoder.classes_)

    train = train.drop(['species', 'id'], axis=1)
    test = test.drop('id', axis=1)

    return train, labels, test, classes

train, labels, test, classes = encode(train, test)

# 标准化训练集
scaler = StandardScaler().fit(train.values)
scaled_train = scaler.transform(train.values)

# 训练集和验证集分离
sss = StratifiedShuffleSplit(test_size=0.1, random_state=23)
for train_index, valid_index in sss.split(scaled_train, labels):
    X_train, X_valid = scaled_train[train_index], scaled_train[valid_index]
    y_train, y_valid = labels[train_index], labels[valid_index]
    
nb_features = 64 # number of features per features type (shape, texture, margin)   
nb_class = len(classes)

# reshape train data
X_train_r = np.zeros((len(X_train), nb_features, 3))
X_train_r[:, :, 0] = X_train[:, :nb_features]
X_train_r[:, :, 1] = X_train[:, nb_features:128]
X_train_r[:, :, 2] = X_train[:, 128:]

# reshape validation data
X_valid_r = np.zeros((len(X_valid), nb_features, 3))
X_valid_r[:, :, 0] = X_valid[:, :nb_features]
X_valid_r[:, :, 1] = X_valid[:, nb_features:128]
X_valid_r[:, :, 2] = X_valid[:, 128:]

# # Keras model with one Convolution1D layer
# # unfortunately more number of covnolutional layers, filters and filters lenght 
# # don't give better accuracy
# model = Sequential()
# model.add(Convolution1D(nb_filter=512, filter_length=1, input_shape=(nb_features, 3)))
# model.add(Activation('relu'))
# model.add(Flatten())
# model.add(Dropout(0.4))
# model.add(Dense(2048, activation='relu'))
# model.add(Dense(1024, activation='relu'))
# model.add(Dense(nb_class))
# model.add(Activation('softmax'))


# y_train = np_utils.to_categorical(y_train, nb_class)
# y_valid = np_utils.to_categorical(y_valid, nb_class)

# sgd = SGD(lr=0.01, nesterov=True, decay=1e-6, momentum=0.9)
# model.compile(loss='categorical_crossentropy',optimizer=sgd,metrics=['accuracy'])

# nb_epoch = 15
# model.fit(X_train_r, y_train, nb_epoch=nb_epoch, validation_data=(X_valid_r, y_valid), batch_size=16)

Using TensorFlow backend.


float64
[ 3 49 65 94 84 40 54 78 53 89 98 16 74 50 58 31 43  4 75 44 83 84 13 66
 15  6 73 22 73 31 36 27 94 88 12 28 21 25 20 60 84 65 69 58 23 76 18 52
 54  9 48 47 64 81 83 36 58 21 81 20 62 88 34 92 79 82 20 32  4 84 36 35
 72 60 71 72 52 50 54 11 51 18 47  5  8 37 97 20 33  1 59  1 56  1  9 57
 20 79 29 16 32 54 93 10 46 59 84 76 15 10 15  0 69  4 51 51 94 36 39 62
  2 24 26 35 25 87  0 55 34 38  1 45  7 93 56 38 21 51 75 81 74 33 20 37
  9 40 60 31 83 50 71 67 30 66  1 43 61 23 65 84 87 46 57 16  2 28 12 96
 44 76 29 75 41 87 67 61 30  5 12 62  3 83 81  6 85  4 37 57 84 39 71 61
  6 76 14 31 98 40 17 51 16 42 63 86 37 69 86 71 80 78 14 35 25  5 39  8
  9 26 44 60 13 14 77 13 80 87 18 60 78 92 51 45 78 41 51 30 14 35 46 21
  8  6 92 38 40 15 32 17 93 71 92 27 78 15 19 60 21 38 36 49 74 67 95 31
 82 45 16 83 63 80 42 22 74 53 15 44 47 57 94 76 17 32 24 15 93 24 80 59
 46 12 51 77 79 70 69 16  2 63 83 55 12 53  1 67  0  2 36 42 10  9 52 59
  6 22 86 31 51 37 43 75 90 24 86 96 45 32 