In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

  from ._conv import register_converters as _register_converters


In [2]:
df = pd.read_csv("train.csv")
df.head()

Unnamed: 0,Id,Elevation,Aspect,Slope,Horizontal_Distance_To_Hydrology,Vertical_Distance_To_Hydrology,Horizontal_Distance_To_Roadways,Hillshade_9am,Hillshade_Noon,Hillshade_3pm,...,Soil_Type32,Soil_Type33,Soil_Type34,Soil_Type35,Soil_Type36,Soil_Type37,Soil_Type38,Soil_Type39,Soil_Type40,Cover_Type
0,1,2596,51,3,258,0,510,221,232,148,...,0,0,0,0,0,0,0,0,0,5
1,2,2590,56,2,212,-6,390,220,235,151,...,0,0,0,0,0,0,0,0,0,5
2,3,2804,139,9,268,65,3180,234,238,135,...,0,0,0,0,0,0,0,0,0,2
3,4,2785,155,18,242,118,3090,238,238,122,...,0,0,0,0,0,0,0,0,0,2
4,5,2595,45,2,153,-1,391,220,234,150,...,0,0,0,0,0,0,0,0,0,5


### Feature Engineering & Train-Test-Split

In [3]:
df['Distance_To_Hydrology'] = np.sqrt(df['Horizontal_Distance_To_Hydrology']**2 + df['Vertical_Distance_To_Hydrology']**2)
df['Average_Hillshade'] = (df['Hillshade_9am'] + df['Hillshade_Noon'] + df['Hillshade_3pm'])/3

### Preprocessing Part I

In [4]:
y=pd.get_dummies(df['Cover_Type'])
x=df.drop(['Cover_Type', 'Id'], axis='columns')
x_train_1, x_test_1, y_train, y_test = train_test_split(x,y, test_size=0.2)
x_test_1, x_valid_1, y_test, y_valid = train_test_split(x_test_1,y_test, test_size=0.5)

In [5]:
numerical_var = [var for var in df.columns if not var.startswith('Soil') and not var.startswith('Id') and not var.startswith('Cover')]
dummy_var = [var for var in df.columns if var.startswith('Soil')]

In [6]:
scaler = StandardScaler()
x_train = np.concatenate((scaler.fit_transform(x_train_1[numerical_var]), x_train_1[dummy_var].values), 1)
x_test = np.concatenate((scaler.transform(x_test_1[numerical_var]), x_test_1[dummy_var].values), 1)
x_valid = np.concatenate((scaler.transform(x_valid_1[numerical_var]), x_valid_1[dummy_var].values), 1)

### Preprocessing Part II

In [7]:
train_dataset = x_train.astype(np.float32)
train_labels = y_train.values.astype(np.float32)
test_dataset = x_test.astype(np.float32)
test_labels = y_test.values.astype(np.float32)
valid_dataset = x_valid.astype(np.float32)
valid_labels = y_valid.values.astype(np.float32)
print('Training set', train_dataset.shape, train_labels.shape)
print('Test set',test_dataset.shape, test_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)

Training set (12096, 56) (12096, 7)
Test set (1512, 56) (1512, 7)
Validation set (1512, 56) (1512, 7)


In [8]:
def accuracy(predictions, labels):
    return (100.0*np.sum(np.argmax(predictions, axis=1) == np.argmax(labels, axis=1))
            / predictions.shape[0])

### Three-layer network  

In [9]:
n_features = 56 #784 
layer_1 = 35
layer_last = 7 #10
#epsilon = 1e-3

batch_size = 128

keep_prob = 0.75

graph = tf.Graph()
with graph.as_default():
    tf_train_dataset = tf.placeholder(tf.float32, shape=[batch_size, n_features])
    tf_train_labels = tf.placeholder(tf.float32, shape=([batch_size, layer_last]))
    tf_test_dataset = tf.constant(test_dataset)
    tf_valid_dataset = tf.constant(valid_dataset)
    
    #Variables
    weights_1 = tf.Variable(tf.truncated_normal([n_features, layer_1]))
    biases_1 = tf.Variable(tf.zeros([layer_1]))
    beta_1 = tf.Variable(tf.zeros([layer_1]))
    scale_1 = tf.Variable(tf.ones([layer_1]))
    
    weights_2 = tf.Variable(tf.truncated_normal([layer_1, layer_last]))
    biases_2 = tf.Variable(tf.zeros([layer_last]))
    
    # Training computation
    activation_1 = tf.nn.dropout(tf.nn.relu(tf.matmul(tf_train_dataset, weights_1) + biases_1),keep_prob)
    #batch_mean_1, batch_var_1 = tf.nn.moments(activation_1,[0]) #get mean and variance for batch normalization
    #activation_1 = tf.nn.batch_normalization(activation_1, batch_mean_1, batch_var_1, beta_1, scale_1, epsilon)
    activation_last = tf.matmul(activation_1, weights_2) + biases_2
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=activation_last, labels=tf_train_labels)) \
       + 0.01*tf.nn.l2_loss(weights_1) + 0.01*tf.nn.l2_loss(weights_2)
    
    # Optimizer
    starter_learning_rate = 0.01
    global_step = tf.Variable(0)
    decay_every_n_steps = 10000
    decay_rate = 0.96
    learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, decay_every_n_steps, decay_rate, staircase=True)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    
    # Predictions
    train_prediction = tf.nn.softmax(activation_last)
    test_prediction = tf.nn.softmax(tf.matmul(tf.nn.relu(tf.matmul(tf_test_dataset, weights_1) + biases_1), weights_2) + biases_2) 
    valid_logits = tf.matmul(tf.nn.relu(tf.matmul(tf_valid_dataset, weights_1) + biases_1), weights_2) + biases_2
    valid_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=valid_logits, labels=valid_labels))
    valid_prediction = tf.nn.softmax(tf.matmul(tf.nn.relu(tf.matmul(tf_valid_dataset, weights_1) + biases_1), weights_2) + biases_2)

### Deep Neural Network

In [10]:
n_features = 56 
layer_1 = 1024
layer_2 = 512
layer_last = 7 #10
#epsilon = 1e-3

batch_size = 128

keep_prob = 0.5

graph = tf.Graph()
with graph.as_default():
    tf_train_dataset = tf.placeholder(tf.float32, shape=[batch_size, n_features])
    tf_train_labels = tf.placeholder(tf.float32, shape=([batch_size, layer_last]))
    tf_test_dataset = tf.constant(test_dataset)
    tf_valid_dataset = tf.constant(valid_dataset)
    
    #Variables
    weights_1 = tf.Variable(tf.truncated_normal([n_features, layer_1], stddev=np.sqrt(1.0/n_features)))
    biases_1 = tf.Variable(tf.zeros([layer_1]))
    beta_1 = tf.Variable(tf.zeros([layer_1]))
    scale_1 = tf.Variable(tf.ones([layer_1]))
    
    weights_2 = tf.Variable(tf.truncated_normal([layer_1, layer_2], stddev=np.sqrt(1.0/layer_1)))
    biases_2 = tf.Variable(tf.zeros([layer_2]))
    
    weights_3 = tf.Variable(tf.truncated_normal([layer_2, layer_last], stddev=np.sqrt(1.0/layer_2)))
    biases_3 = tf.Variable(tf.zeros([layer_last]))
    
    # Training computation
    activation_1 = tf.nn.dropout(tf.nn.relu(tf.matmul(tf_train_dataset, weights_1) + biases_1), keep_prob)
    activation_2 = tf.nn.dropout(tf.nn.relu(tf.matmul(activation_1, weights_2) + biases_2), keep_prob)
    activation_last = tf.matmul(activation_2, weights_3) + biases_3
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=activation_last, labels=tf_train_labels)) 
    
    # Optimizer
    starter_learning_rate = 0.05
    global_step = tf.Variable(0)
    decay_every_n_steps = 100000
    decay_rate = 0.96
    learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, decay_every_n_steps, decay_rate, staircase=True)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)

    # Predictions
    train_prediction = tf.nn.softmax(activation_last)
    test_prediction = tf.nn.softmax(tf.matmul(tf.nn.relu(
        tf.matmul(tf.nn.relu(tf.matmul(tf_test_dataset, weights_1) + biases_1), weights_2) + biases_2), weights_3) +biases_3)
    valid_logits = tf.matmul(tf.nn.relu(tf.matmul(tf.nn.relu(tf.matmul(tf_valid_dataset, weights_1) + biases_1), weights_2) + biases_2), weights_3) +biases_3
    valid_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=valid_logits, labels=valid_labels))
    valid_prediction = tf.nn.softmax(valid_logits)

In [11]:
num_steps = 10001

with tf.Session(graph=graph) as session:
  tf.global_variables_initializer().run()
  print("Initialized")
  for step in range(num_steps):
    # Pick an offset within the training data, which has been randomized.
    # Note: we could use better randomization across epochs.
    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    # Generate a minibatch.
    batch_data = train_dataset[offset:(offset + batch_size), :]
    batch_labels = train_labels[offset:(offset + batch_size), :]
    # Prepare a dictionary telling the session where to feed the minibatch.
    # The key of the dictionary is the placeholder node of the graph to be fed,
    # and the value is the numpy array to feed to it.
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 500 == 0):
      print("Minibatch loss at step %d: %f" % (step, l))
      print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
      print("Validation loss at step %d: %f" % (step, valid_loss.eval()))
      print("Validation accuracy: %.1f%%" % accuracy(valid_prediction.eval(), valid_labels))
      
  print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))

Initialized
Minibatch loss at step 0: 1.992085
Minibatch accuracy: 12.5%
Validation loss at step 0: 1.927446
Validation accuracy: 22.8%
Minibatch loss at step 500: 0.834488
Minibatch accuracy: 64.1%
Validation loss at step 500: 0.734411
Validation accuracy: 70.0%
Minibatch loss at step 1000: 0.776254
Minibatch accuracy: 61.7%
Validation loss at step 1000: 0.662460
Validation accuracy: 72.2%
Minibatch loss at step 1500: 0.613852
Minibatch accuracy: 75.0%
Validation loss at step 1500: 0.640668
Validation accuracy: 71.8%
Minibatch loss at step 2000: 0.639938
Minibatch accuracy: 75.0%
Validation loss at step 2000: 0.618532
Validation accuracy: 74.6%
Minibatch loss at step 2500: 0.665593
Minibatch accuracy: 75.8%
Validation loss at step 2500: 0.595699
Validation accuracy: 75.8%
Minibatch loss at step 3000: 0.742637
Minibatch accuracy: 70.3%
Validation loss at step 3000: 0.599003
Validation accuracy: 74.6%
Minibatch loss at step 3500: 0.585541
Minibatch accuracy: 75.8%
Validation loss at ste