In [1]:
import json
import os

import frame_level_models
import video_level_models
import readers
import utils
import export_model
import trainer

import tensorflow as tf
from tensorflow import logging
from tensorflow import flags

FLAGS = flags.FLAGS

In [2]:
## Maak alle flags aan

# Dataset flags
flags.DEFINE_string("train_dir", "models/youtube_model.ckpt",
                  "The directory to save the model files in.")
flags.DEFINE_string(
  "train_data_pattern", "models/youtube_model.ckpt",
  "File glob for the training dataset. If the files refer to Frame Level "
  "features (i.e. tensorflow.SequenceExample), then set --reader_type "
  "format. The (Sequence)Examples are expected to have 'rgb' byte array "
  "sequence feature as well as a 'labels' int64 context feature.")
flags.DEFINE_string("feature_names", "mean_audio", "Name of the feature "
                  "to use for training.")
flags.DEFINE_string("feature_sizes", "128", "Length of the feature vectors.")
flags.DEFINE_integer("num_classes", 8192, "Number of classes in dataset.")

# Model flags.
flags.DEFINE_bool(
  "frame_features", True,
  "If set, then --train_data_pattern must be frame-level features. "
  "Otherwise, --train_data_pattern must be aggregated video-level "
  "features. The model must also be set appropriately (i.e. to read 3D "
  "batches VS 4D batches.")
flags.DEFINE_string(
  "model", "LogisticModel",
  "Which architecture to use for the model. Models are defined "
  "in models.py.")
flags.DEFINE_bool(
  "start_new_model", True,
  "If set, this will not resume from a checkpoint and will instead create a"
  " new model instance.")

In [3]:
# Training flags
flags.DEFINE_integer("batch_size", 1024,
                   "How many examples to process per batch for training.")
flags.DEFINE_string("label_loss", "CrossEntropyLoss",
                  "Which loss function to use for training the model.")
flags.DEFINE_float(
  "regularization_penalty", 1.0,
  "How much weight to give to the regularization loss (the label loss has "
  "a weight of 1).")
flags.DEFINE_float("base_learning_rate", 0.01,
                 "Which learning rate to start with.")
flags.DEFINE_float("learning_rate_decay", 0.95,
                 "Learning rate decay factor to be applied every "
                 "learning_rate_decay_examples.")
flags.DEFINE_float("learning_rate_decay_examples", 4000000,
                 "Multiply current learning rate by learning_rate_decay "
                 "every learning_rate_decay_examples.")
flags.DEFINE_integer("num_epochs", 5,
                   "How many passes to make over the dataset before "
                   "halting training.")
flags.DEFINE_integer("max_steps", 10,
                   "The maximum number of iterations of the training loop.")
flags.DEFINE_integer("export_model_steps", 1000,
                   "The period, in number of steps, with which the model "
                   "is exported for batch prediction.")

In [4]:
# Other flags
flags.DEFINE_integer("num_readers", 8,
                   "How many threads to use for reading input files.")
flags.DEFINE_string("optimizer", "AdamOptimizer",
                  "What optimizer class to use.")
flags.DEFINE_float("clip_gradient_norm", 1.0, "Norm to clip gradients to.")
flags.DEFINE_bool(
  "log_device_placement", False,
  "Whether to write the device on which every op will run into the "
  "logs on startup.")

In [5]:
## Functies die straks wordt aangeroepen

def find_class_by_name(name, modules):
  """Searches the provided modules for the named class and returns it."""
  modules = [getattr(module, name, None) for module in modules]
  return next(a for a in modules if a)

def task_as_string(task):
  return "/job:%s/task:%s" % (task.type, task.index)

def get_reader():
  # Convert feature_names and feature_sizes to lists of values.
  feature_names, feature_sizes = utils.GetListOfFeatureNamesAndSizes(
      FLAGS.feature_names, FLAGS.feature_sizes)
  num_classes = FLAGS.num_classes

  if FLAGS.frame_features:
    reader = readers.YT8MFrameFeatureReader(
        num_classes=num_classes,
        feature_names=feature_names, feature_sizes=feature_sizes)
  else:
    reader = readers.YT8MAggregatedFeatureReader(
        num_classes=num_classes,
        feature_names=feature_names, feature_sizes=feature_sizes)

  return reader

In [6]:
## Onderstaande regels komen vanuit 'main'-functie onderaan

# Load the environment.
env = json.loads(os.environ.get("TF_CONFIG", "{}"))

# Load the cluster data from the environment.
cluster_data = env.get("cluster", None)
cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None

# Load the task data from the environment.
task_data = env.get("task", None) or {"type": "master", "index": 0}
task = type("TaskSpec", (object,), task_data)

# Logging the version.
logging.set_verbosity(tf.logging.INFO)
logging.info("%s: Tensorflow version: %s.",
           task_as_string(task), tf.__version__)

INFO:tensorflow:/job:master/task:0: Tensorflow version: 1.3.0.


In [8]:
# Dispatch to a master, a worker, or a parameter server.
if not cluster or task.type == "master" or task.type == "worker":
    model = find_class_by_name(FLAGS.model,
                               [frame_level_models, video_level_models])()

    reader = get_reader()

    model_exporter = export_model.ModelExporter(
        frame_features=FLAGS.frame_features,
        model=model,
        reader=reader)

#    trainer = trainer.Trainer(cluster, task, FLAGS.train_dir, model, reader, model_exporter,
#            FLAGS.log_device_placement, FLAGS.max_steps,
#            FLAGS.export_model_steps).run(start_new_model=FLAGS.start_new_model)

#elif task.type == "ps":
#ParameterServer(cluster, task).run()
#else:
#raise ValueError("%s: Invalid task_type: %s." %
#                 (task_as_string(task), task.type))