In [1]:
import os
import csv
import pandas as pd
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.utils import to_categorical
from keras.callbacks import History 
from keras.preprocessing.text import Tokenizer
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import StratifiedShuffleSplit
from collections import Counter

Using TensorFlow backend.


In [2]:
history1 = History()
history2 = History()
history3 = History()

LABEL_PATH = 'data/'
TRAIN_FILE_NAME = 'train.csv'
TEST_FILE_NAME = 'test.csv'

In [4]:
train_data_frame = pd.read_csv(r'train.csv')

train_data_frame = train_data_frame.drop(['id'], axis=1)

y = train_data_frame.pop('species')
classes = np.unique(y)

y = to_categorical(LabelEncoder().fit(y).transform(y))

x = StandardScaler().fit(train_data_frame).transform(train_data_frame)


In [5]:
sss = StratifiedShuffleSplit(n_splits=10, test_size=0.2,random_state=12345)

train_index, validation_index = next(iter(sss.split(x, y)))
train_x, validate_x = x[train_index], x[validation_index]
train_y, validate_y = y[train_index], y[validation_index]
print("train_x dimention: ",train_x.shape)
print("validate_x dimention:   ",validate_x.shape)


train_x dimention:  (792, 192)
validate_x dimention:    (198, 192)


In [6]:
no_of_classes = len(np.unique(train_y, axis=0))

In [7]:
model1 = Sequential()

model1.add(Dense(250, activation='relu', input_dim = train_x.shape[1]))
model1.add(Dropout(0.2))
model1.add(Dense(150, activation='relu'))
model1.add(Dropout(0.4))
model1.add(Dense(no_of_classes, activation=tf.nn.softmax))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [8]:
model2 = Sequential()

model2.add(Dense(1000, activation='tanh', input_dim = train_x.shape[1]))
model2.add(Dense(1000, activation='relu'))
model2.add(Dense(1000, activation='relu'))
model2.add(Dense(no_of_classes, activation=tf.nn.softmax))

In [9]:
model3 = Sequential()

model3.add(Dense(500, activation='relu', input_dim = train_x.shape[1]))
model3.add(Dropout(0.4))
model3.add(Dense(500, activation='relu'))
model3.add(Dropout(0.2))
model3.add(Dense(no_of_classes, activation=tf.nn.softmax))

In [10]:
model1.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 250)               48250     
_________________________________________________________________
dropout_1 (Dropout)          (None, 250)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 150)               37650     
_________________________________________________________________
dropout_2 (Dropout)          (None, 150)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 99)                14949     
Total params: 100,849
Trainable params: 100,849
Non-trainable params: 0
_________________________________________________________________


In [11]:
model2.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 1000)              193000    
_________________________________________________________________
dense_5 (Dense)              (None, 1000)              1001000   
_________________________________________________________________
dense_6 (Dense)              (None, 1000)              1001000   
_________________________________________________________________
dense_7 (Dense)              (None, 99)                99099     
Total params: 2,294,099
Trainable params: 2,294,099
Non-trainable params: 0
_________________________________________________________________


In [12]:
model3.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_8 (Dense)              (None, 500)               96500     
_________________________________________________________________
dropout_3 (Dropout)          (None, 500)               0         
_________________________________________________________________
dense_9 (Dense)              (None, 500)               250500    
_________________________________________________________________
dropout_4 (Dropout)          (None, 500)               0         
_________________________________________________________________
dense_10 (Dense)             (None, 99)                49599     
Total params: 396,599
Trainable params: 396,599
Non-trainable params: 0
_________________________________________________________________


In [16]:
model1.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model1.fit(train_x, train_y, epochs = 10, verbose=0, validation_data=(validate_x, validate_y), callbacks=[history1])


<keras.callbacks.History at 0x7f32a50b7080>

In [17]:
model2.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model2.fit(train_x, train_y, epochs = 10, verbose=0, validation_data=(validate_x, validate_y), callbacks=[history2])

<keras.callbacks.History at 0x7f329c3cdc88>

In [18]:
model3.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model3.fit(train_x, train_y, epochs = 20, verbose=0, validation_data=(validate_x, validate_y), callbacks=[history3])


<keras.callbacks.History at 0x7f3253dab4a8>

In [27]:
model1.save('models/model_1_0.29073.h5')
model2.save('models/model_2_0.29073.h5')
model3.save('models/model_3_0.29073.h5')

OSError: Unable to create file (unable to open file: name = 'models/model_1_0.29073.h5', errno = 2, error message = 'No such file or directory', flags = 13, o_flags = 242)

In [28]:
test_data_frame = pd.read_csv(r'test.csv')

index = test_data_frame.pop('id')

test_x = StandardScaler().fit(test_data_frame).transform(test_data_frame)
#test_x = test_data_frame.get_values()


In [29]:
test_y_1 = model1.predict_classes(test_x)
test_y_2 = model2.predict_classes(test_x)
test_y_3 = model3.predict_classes(test_x)

In [30]:
test_y = [
    {
        'name': 'test_y_1',
        'predict': test_y_1,
        'loss': history1.history['loss'][-1]
    }, {
        'name': 'test_y_2',
        'predict': test_y_2,
        'loss': history2.history['loss'][-1]
    }, {
        'name': 'test_y_3',
        'predict': test_y_3,
        'loss': history3.history['loss'][-1]
    }, 
]

test_y = sorted(test_y, key=lambda k: k['loss'])

In [31]:
data_grid = np.zeros((len(test_y_1), len(classes)))

for i in range(len(test_y_1)):
    count = {}
    for test in test_y:
        if test['predict'][i] not in count:
            count[test['predict'][i]] = 1
        else:
            count[test['predict'][i]] += 1
    
    result = Counter(count)
    predicted = result.most_common(1)
    data_grid[i][predicted[0][0]] = 1

In [32]:
prediction = pd.DataFrame(data_grid, index = index, columns = classes)

In [33]:
with open('submission.csv','w') as file:
    file.write(prediction.to_csv())