In [None]:
import numpy as np

In [None]:
class Dataset:
  def __init__(self, name, mode):
    self.name = name
    self.mode = mode
  
  def __str__(self):
    return '{}({}, {}+{}+{})'.format(self.name, self.mode, len(self.train_xs), len(self.test_xs), len(self.validate_xs))
  
  @property
  def train_count(self):
    return len(self.train_xs)

In [None]:
def dataset_get_train_data(self, batch_size, nth):
  from_index = nth * batch_size
  to_index = (nth+1) * batch_size

  train_X = self.train_xs[self.indices[from_index:to_index]]
  train_Y = self.train_ys[self.indices[from_index:to_index]]

  return train_X, train_Y

def dataset_shuffle_train_data(self, size):
  self.indices = np.arange(size)
  np.random.shuffle(self.indices)

Dataset.get_train_data = dataset_get_train_data
Dataset.shuffle_train_data = dataset_shuffle_train_data

In [None]:
def dataset_get_test_data(self):
  return self.test_xs, self.test_ys 
Dataset.get_test_data = dataset_get_test_data

In [None]:
def dataset_get_validate_data(self, count):
  self.validate_indices = np.arange(len(self.validate_xs))
  np.random.shuffle(self.validate_indices)
  validate_X = self.validate_xs[self.validate_indices[0:count]]
  validate_Y = self.validate_ys[self.validate_indices[0:count]]
  return validate_X, validate_Y

Dataset.get_validate_data = dataset_get_validate_data
Dataset.get_visualize_data = dataset_get_validate_data

In [None]:
def dataset_shuffle_data(self, xs, ys, train_ratio=0.8, validate_ratio=0.05):
  data_count = len(xs)

  train_cnt = int(data_count * train_ratio / 10) * 10
  validate_cnt = int(data_count * validate_ratio)
  test_cnt = data_count - train_cnt - validate_cnt
  
  train_from, train_to = 0, train_cnt
  validate_from, validate_to = train_cnt+1, train_cnt+validate_cnt
  test_from, test_to = train_cnt+validate_cnt, data_count

  indices = np.arange(data_count)
  np.random.shuffle(indices)

  self.train_xs = xs[indices[train_from:train_to]]
  self.train_ys = ys[indices[train_from:train_to]]
  self.validate_xs = xs[indices[validate_from:validate_to]]
  self.validate_ys = ys[indices[validate_from:validate_to]]
  self.test_xs = xs[indices[test_from:test_to]]
  self.test_ys = ys[indices[test_from:test_to]]

  self.input_shape = xs[0].shape
  self.output_shape = ys[0].shape

  return indices[train_from:train_to], indices[validate_from:validate_to], indices[test_from:test_to]

Dataset.shuffle_data = dataset_shuffle_data

In [None]:
def dataset_forward_postproc(self, output, y, mode=None):
  if mode is None:
    mode = self.mode
  if mode == 'regression':
    diff = output - y
    square = np.square(diff)
    loss = np.mean(square)
    aux = diff
  elif mode == 'binary':
    entropy = sigmoid_cross_entropy_with_logits(y, output)
    loss = np.mean(entropy)
    aux = [y, output]
  elif mode == 'select':
    entropy = softmax_cross_entropy_with_logits(y, output)
    loss = np.mean(entropy)
    aux = [output, y, entropy]
  
  return loss, aux

Dataset.forward_postproc = dataset_forward_postproc

In [None]:
def dataset_backprop_postproc(self, G_loss, aux, mode=None):
  if mode is None:
    mode = self.mode
  
  if mode == 'regression':
    diff = aux
    shape = diff.shape

    g_loss_square = np.ones(shape) / np.prod(shape)
    g_square_diff = 2 * diff
    g_diff_output = 1

    G_square = g_loss_square * G_loss
    G_diff = g_square_diff * G_square
    G_output = g_diff_output * G_diff
  elif mode == 'binary':
    y, output = aux
    shape = output.shape
    g_loss_entropy = np.ones(shape) / np.prod(shape)
    g_entropy_output = sigmoid_cross_entropy_with_logits_derv(y, output)

    G_entropy = g_loss_entropy * G_loss
    G_output = g_entropy_output * G_entropy
  elif mode == 'select':
    output, y, entropy = aux
    g_loss_entropy = 1.0 / np.prod(entropy.shape)
    g_entropy_output = softmax_cross_entropy_with_logits_derv(y, output)
    G_entropy = g_loss_entropy * G_loss
    G_output = g_entropy_output * G_entropy
  
  return G_output
Dataset.backprop_postproc = dataset_backprop_postproc

In [None]:
def dataset_eval_accuracy(self, x, y, output, mode=None):
  if mode is None:
    mode = self.mode

  if mode == 'regression':
    mse = np.mean(np.square(output - y))
    accuracy = 1 - np.sqrt(mse) / np.mean(y)
  elif mode == 'binary':
    estimate = np.greater(output, 0)
    answer = np.equal(y, 1.0)
    correct = np.equal(estimate, answer)
    accuracy = np.mean(correct)  
  elif mode == 'select':
    estimate = np.argmax(output, axis=1)
    answer = np.argmax(y, axis=1)
    correct = np.equal(estimate, answer)
    accuracy = np.mean(correct)
  return accuracy

Dataset.eval_accuracy = dataset_eval_accuracy

In [None]:
def dataset_get_estimate(self, output, mode=None):
  if mode is None: mode = self.mode

  if mode == 'regression':
    estimate = output
  elif mode == 'binary':
    estimate = sigmoid(output)
  elif mode == 'select':
    estimate = softmax(output)
  return estimate

Dataset.get_estimate = dataset_get_estimate

In [None]:
def dataset_train_prt_result(self, epoch, costs, accs, acc, time1, time2):
  print('Epoch {}: cost={:5.3f}, accuracy={:5.3f}/{:5.3f} ({}/{} secs)'.\
        format(epoch, np.mean(costs), np.mean(accs), acc, time1, time2))

def dataset_test_prt_result(self, name, acc, time):
  print('Model {} test report: accuracy={:5.3f}, ({} secs)\n'.format(name, acc, time))

Dataset.train_prt_result = dataset_train_prt_result
Dataset.test_prt_result = dataset_test_prt_result