In [2]:
from keras.preprocessing.image import img_to_array, load_img, array_to_img
import os
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.preprocessing import StandardScaler

In [17]:
def accuracy(predictions, labels):
    return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
            / predictions.shape[0])

def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.01)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)


def run_session(num_epochs, name, k_prob=1.0):

    with tf.Session(graph=graph) as session:
        merged = tf.merge_all_summaries()  
        writer = tf.train.SummaryWriter("/tmp/tensorflowlogs", session.graph)
        tf.initialize_all_variables().run()
        print("Initialized")
        for epoch in range(num_epochs):
            offset = (epoch * batch_size) % (y_train.shape[0] - batch_size)
            batch_data = X_train[offset:(offset + batch_size), :]
            batch_labels = y_train[offset:(offset + batch_size), :]
            feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels, keep_prob : k_prob}
            _, l, predictions, l_v = session.run([optimizer, loss, train_prediction, loss_v], feed_dict=feed_dict)
            
            if (epoch % 100 == 0):
                print("Minibatch loss at epoch {}: {}".format(epoch, l))
                print("Validation loss at epoch {}: {}".format(epoch, l_v))
                print("Minibatch accuracy: {:.1f}".format(accuracy(predictions, batch_labels)))
                print("Validation accuracy: {:.1f}".format(accuracy(valid_prediction.eval(), y_val)))
        
        test_prob = test_prediction.eval()
        return test_prob
    
    

split_by_half = lambda x,k : int(x/2**k)

In [35]:
train_feature_data = pd.read_csv("train.csv")
test_feature_data = pd.read_csv("test.csv")

need_species = ["Quercus_Brantii", "Quercus_Castaneifolia", "Quercus_Cerris"] 
n_specials = len(need_species)
train_feature_data = train_feature_data.loc[train_feature_data["species"].isin(need_species), : ]
test_feature_data = test_feature_data.loc[test_feature_data["id"] == 887, :]

ID = train_feature_data.pop("id")

train_labels = train_feature_data.pop('species')
le = preprocessing.LabelEncoder()
train_labels = le.fit(train_labels).transform(train_labels) 

# standardize the data by setting the mean to 0 and std to 1
scaler = StandardScaler().fit(train_feature_data)
train_feature_data = scaler.transform(train_feature_data)


test_feature_id = test_feature_data.pop("id")
test_feature_data = scaler.transform(test_feature_data)
test_feature_data = test_feature_data.astype(np.float32)

X_train, X_val, y_train, y_val = train_test_split(train_feature_data, train_labels, test_size=.1, random_state=1, stratify = train_labels)
y_train = (np.arange(n_specials) == y_train[:,None]).astype(np.float32)
y_val = (np.arange(n_specials) == y_val[:,None]).astype(np.float32)

X_train = X_train.astype(np.float32)
X_val = X_val.astype(np.float32)

print("Training Data", X_train.shape, y_train.shape)
print("Validated Data", X_val.shape, y_val.shape)
print("Test Data", test_feature_data.shape)

Training Data (27, 192) (27, 3)
Validated Data (3, 192) (3, 3)
Test Data (1, 192)


In [36]:
batch_size = 10
hidden_nodes = 100
lamb_reg = 0.0

graph = tf.Graph()
with graph.as_default():

    # Input data.
    tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, 192))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, n_specials))
    tf_valid_dataset = tf.constant(X_val)
    tf_valid_labels = tf.constant(y_val)
    tf_test_dataset = tf.constant(test_feature_data)

    # Variables.
    layer1_weights = weight_variable([192, hidden_nodes])
    layer1_biases = bias_variable([hidden_nodes])
    layer4_weights = weight_variable([hidden_nodes, n_specials])
    layer4_biases = bias_variable([n_specials])

    keep_prob = tf.placeholder("float")

    # Model with dropout
    def model(data, proba=keep_prob):
        layer1 = tf.matmul(data, layer1_weights) + layer1_biases
        hidden1 = tf.nn.dropout(tf.nn.relu(layer1), proba)  # dropout on hidden layer
        return tf.matmul(hidden1, layer4_weights) + layer4_biases

    # Training computation.
    logits = model(tf_train_dataset, keep_prob)

    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))

    regularizers = (tf.nn.l2_loss(layer1_weights) + tf.nn.l2_loss(layer1_biases) + \
                    tf.nn.l2_loss(layer4_weights) + tf.nn.l2_loss(layer4_biases))

    # Add the regularization term to the loss.
    loss = tf.reduce_mean(loss + lamb_reg * regularizers)

    # Optimizer.
    optimizer = tf.train.RMSPropOptimizer(1e-4).minimize(loss)


    # Predictions for the training, validation, and test data.
    train_prediction = tf.nn.softmax(logits)
    valid_prediction = tf.nn.softmax(model(tf_valid_dataset, 1.0))

    loss_v = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(model(tf_valid_dataset,1.0), tf_valid_labels))

    test_prediction = tf.nn.softmax(model(tf_test_dataset, 1.0))

In [37]:
test_prob = run_session(2000, "Deep_NN", 0.5)

Instructions for updating:
Please switch to tf.summary.merge_all.
Instructions for updating:
Please switch to tf.summary.FileWriter. The interface and behavior is the same; this is just a rename.
Instructions for updating:
Use `tf.global_variables_initializer` instead.
Initialized
Minibatch loss at epoch 0: 1.0940539836883545
Validation loss at epoch 0: 1.100120186805725
Minibatch accuracy: 40.0
Validation accuracy: 0.0
Minibatch loss at epoch 100: 1.0323878526687622
Validation loss at epoch 100: 1.025712251663208
Minibatch accuracy: 100.0
Validation accuracy: 100.0
Minibatch loss at epoch 200: 0.6471344232559204
Validation loss at epoch 200: 0.6057382822036743
Minibatch accuracy: 100.0
Validation accuracy: 100.0
Minibatch loss at epoch 300: 0.24607500433921814
Validation loss at epoch 300: 0.2461690902709961
Minibatch accuracy: 100.0
Validation accuracy: 100.0
Minibatch loss at epoch 400: 0.07835769653320312
Validation loss at epoch 400: 0.06977048516273499
Minibatch accuracy: 100.0
V

In [38]:
print(test_prob)
print(le.inverse_transform(np.argmax(test_prob, 1)))

[[ 0.97682697  0.01609214  0.00708086]]
['Quercus_Brantii']
