# Légszennyezettség - neurális háló tanítása

In [47]:
import pickle
import numpy as np
import tensorflow as tf

Load data:

In [2]:
def load_data(pickle_file):
    with open(pickle_file, 'rb') as f:
        return pickle.load(f)

In [39]:
def filter_input_data(cols_to_del, train_ds, valid_ds, test_ds):
    return tuple([np.delete(i, cols_to_del, axis=1) for i in [train_ds, valid_ds, test_ds]])

In [10]:
train_file_name = '../res/input_data/train.pickle'
valid_file_name = '../res/input_data/valid.pickle'
test_file_name = '../res/input_data/test.pickle'

train_data_full = load_data(train_file_name)
valid_data_full = load_data(valid_file_name)
test_data_full = load_data(test_file_name)

In [73]:
train_data = train_data_full[:, 0:-1].astype(np.float32)
train_labels = train_data_full[:, -1].reshape((-1, 1)).astype(np.float32)
valid_data = valid_data_full[:, 0:-1].astype(np.float32)
valid_labels = valid_data_full[:, -1].reshape((-1, 1)).astype(np.float32)
test_data = test_data_full[:, 0:-1].astype(np.float32)
test_labels = test_data_full[:, -1].reshape((-1, 1)).astype(np.float32)

train_labels_one_hot = np.hstack(((1 - train_labels), train_labels))
valid_labels_one_hot = np.hstack(((1 - valid_labels), valid_labels))
test_labels_one_hot = np.hstack(((1 - test_labels), test_labels))

cols_to_del = [0,4,13]
train_d_filtered, valid_d_filtered, test_d_filtered = filter_input_data(cols_to_del, train_data, valid_data, test_data)

In [85]:
def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

def train(train_dataset, train_labels, valid_dataset, valid_labels, test_dataset, test_labels,
          dim, num_labels=2, learning_rate=0.3, batch_size=128, num_steps=1001, hidden_nodes=46):
    graph = tf.Graph()
    with graph.as_default():

      # Input data. For the training data, we use a placeholder that will be fed
      # at run time with a training minibatch.
      tf_train_dataset = tf.placeholder(tf.float32,
                                        shape=(batch_size, dim))
      tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
      tf_valid_dataset = tf.constant(valid_dataset)
      tf_test_dataset = tf.constant(test_dataset)

      # Variables.    
      weights1 = tf.Variable(tf.truncated_normal([dim, hidden_nodes]))
      biases1 = tf.Variable(tf.zeros([hidden_nodes]))
      weights2 = tf.Variable(tf.truncated_normal([hidden_nodes, num_labels]))
      biases2 = tf.Variable(tf.zeros([num_labels]))

      # Training computation.        
      first = tf.matmul(tf_train_dataset, weights1) + biases1
      hidden = tf.nn.relu(first)
      logits = tf.matmul(hidden, weights2) + biases2
      loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))

      # Optimizer.
      optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)

      # Predictions for the training, validation, and test data.
      train_prediction = tf.nn.softmax(logits)
      valid_prediction = tf.nn.softmax(tf.matmul(tf.nn.relu(
          tf.matmul(tf_valid_dataset, weights1) + biases1), weights2) + biases2)
      test_prediction = tf.nn.softmax(tf.matmul(tf.nn.relu(
          tf.matmul(tf_test_dataset, weights1) + biases1), weights2) + biases2)

    with tf.Session(graph=graph) as session:
      tf.global_variables_initializer().run()
      print("Initialized")
      for step in range(num_steps):
        # Pick an offset within the training data, which has been randomized.
        # Note: we could use better randomization across epochs.
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        # Generate a minibatch.
        batch_data = train_dataset[offset:(offset + batch_size), :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        # Prepare a dictionary telling the session where to feed the minibatch.
        # The key of the dictionary is the placeholder node of the graph to be fed,
        # and the value is the numpy array to feed to it.
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        _, l, predictions = session.run(
          [optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 500 == 0):
          print("Minibatch loss at step %d: %f" % (step, l))
          print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
          print("Validation accuracy: %.1f%%" % accuracy(
            valid_prediction.eval(), valid_labels))
      print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))

In [87]:
batch_size = train_d_filtered.shape[0] - 1
dim = train_d_filtered.shape[1]
train(train_d_filtered, train_labels_one_hot, valid_d_filtered, valid_labels_one_hot, test_d_filtered, test_labels_one_hot,
      dim, num_steps=5000, learning_rate=0.5, batch_size=batch_size, hidden_nodes=100)

Initialized
Minibatch loss at step 0: 15222.566406
Minibatch accuracy: 50.0%
Validation accuracy: 50.0%
Minibatch loss at step 500: 0.693140
Minibatch accuracy: 50.0%
Validation accuracy: 50.0%
Minibatch loss at step 1000: 0.693140
Minibatch accuracy: 50.0%
Validation accuracy: 50.0%
Minibatch loss at step 1500: 0.693140
Minibatch accuracy: 50.0%
Validation accuracy: 50.0%
Minibatch loss at step 2000: 0.693140
Minibatch accuracy: 50.0%
Validation accuracy: 50.0%
Minibatch loss at step 2500: 0.693140
Minibatch accuracy: 50.0%
Validation accuracy: 50.0%
Minibatch loss at step 3000: 0.693140
Minibatch accuracy: 50.0%
Validation accuracy: 50.0%
Minibatch loss at step 3500: 0.693140
Minibatch accuracy: 50.0%
Validation accuracy: 50.0%
Minibatch loss at step 4000: 0.693140
Minibatch accuracy: 50.0%
Validation accuracy: 50.0%
Minibatch loss at step 4500: 0.693140
Minibatch accuracy: 50.0%
Validation accuracy: 50.0%
Test accuracy: 96.5%
