In [2]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [3]:
import os
import sys
sys.path.append(os.getcwd())

In [4]:
import argparse
import easydict
from pprint import pprint
from pythia.trainers.base_trainer import BaseTrainer

In [5]:
# Copyright (c) Facebook, Inc. and its affiliates.
import glob
import importlib
import os

from pythia.common.registry import registry
from pythia.utils.build_utils import build_trainer
from pythia.utils.distributed_utils import is_main_process
from pythia.utils.flags import flags


def setup_imports():
    # Automatically load all of the modules, so that
    # they register with registry
    root_folder = registry.get("pythia_root", no_warning=True)

    if root_folder is None:
        #         root_folder = os.path.dirname(os.path.abspath(__file__))
        root_folder = os.path.dirname(os.getcwd())
        root_folder = os.path.join(root_folder, "..")

        environment_pythia_path = os.environ.get("PYTHIA_PATH")

        if environment_pythia_path is not None:
            root_folder = environment_pythia_path

        root_folder = os.path.join(root_folder, "pythia")
        registry.register("pythia_path", root_folder)

    trainer_folder = os.path.join(root_folder, "trainers")
    trainer_pattern = os.path.join(trainer_folder, "**", "*.py")
    tasks_folder = os.path.join(root_folder, "tasks")
    tasks_pattern = os.path.join(tasks_folder, "**", "*.py")
    model_folder = os.path.join(root_folder, "models")
    model_pattern = os.path.join(model_folder, "**", "*.py")

    importlib.import_module("pythia.common.meter")

    files = glob.glob(tasks_pattern, recursive=True) + \
            glob.glob(model_pattern, recursive=True) + \
            glob.glob(trainer_pattern, recursive=True)

    for f in files:
        if f.endswith("task.py"):
            splits = f.split(os.sep)
            task_name = splits[-2]
            if task_name == "tasks":
                continue
            file_name = splits[-1]
            module_name = file_name[: file_name.find(".py")]
            importlib.import_module("pythia.tasks." + task_name + "." + module_name)
        elif f.find("models") != -1:
            splits = f.split(os.sep)
            file_name = splits[-1]
            module_name = file_name[: file_name.find(".py")]
            importlib.import_module("pythia.models." + module_name)
        elif f.find("trainer") != -1:
            splits = f.split(os.sep)
            file_name = splits[-1]
            module_name = file_name[: file_name.find(".py")]
            importlib.import_module("pythia.trainers." + module_name)
        elif f.endswith("builder.py"):
            splits = f.split(os.sep)
            task_name = splits[-3]
            dataset_name = splits[-2]
            if task_name == "tasks" or dataset_name == "tasks":
                continue
            file_name = splits[-1]
            module_name = file_name[: file_name.find(".py")]
            importlib.import_module(
                "pythia.tasks." + task_name + "." + dataset_name + "." + module_name
            )


# def run():
#     setup_imports()
#     parser = flags.get_parser()
#     args = parser.parse_args()
#     trainer = build_trainer(args)

#     # Log any errors that occur to log file
#     try:
#         trainer.load()
#         trainer.train()
#     except Exception as e:
#         writer = getattr(trainer, "writer", None)

#         if writer is not None:
#             writer.write(e, "error", donot_print=True)
#         if is_main_process():
#             raise


# if __name__ == "__main__":
#     run()


In [6]:
setup_imports()
parser = flags.get_parser()

In [7]:
# python tools/run.py --tasks captioning --datasets coco --model butd  --config configs/captioning/coco/my_butd_local.yml 
args = parser.parse_args(args=['--tasks', 'captioning', '--datasets', 'coco', '--model', 'butd', '--config', 'configs/captioning/coco/my_butd_local.yml'])

In [8]:
!cat configs/captioning/coco/my_butd_local.yml

includes:
- common/defaults/configs/tasks/captioning/coco.yml
model_attributes:
  butd: &butd
    model_data_dir: data/
    metrics:
    - type: caption_bleu4
    losses:
    - type: caption_cross_entropy
    classifier:
      type: language_decoder
      params:
        dropout: 0.5
        hidden_dim: 1024
        feature_dim: 2048
        fc_bias_init: 0
    image_feature_embeddings:
    - modal_combine:
        type: top_down_attention_lstm
        params:
          dropout: 0.5
          hidden_dim: 1024
          attention_dim: 1024
      normalization: softmax
      transform:
        type: linear
        params:
          out_dim: 1
    image_feature_dim: 2048
    embedding_dim: 300
    image_feature_encodings:
    - type: finetune_faster_rcnn_fpn_fc7
      params:
        bias_file: detectron/fc6/fc7_b.pkl
        weights_file: detectron/fc6/fc7_w.pkl
    inference:
      type: greedy
optimizer_attributes:
  type: Adamax
  params:
    ep

In [9]:
trainer = build_trainer(args)

In [10]:
trainer.load()

Logging to: ./save/captioning_coco_butd/logs/captioning_coco_butd_2020-03-19T10:43:03.log
2020-03-19T10:43:03 INFO: =====  Training Parameters    =====
2020-03-19T10:43:03 INFO: {
    "batch_size": 2,
    "clip_gradients": true,
    "clip_norm_mode": "all",
    "data_parallel": true,
    "device": "cuda",
    "distributed": false,
    "evalai_inference": false,
    "experiment_name": "run",
    "load_pretrained": false,
    "local_rank": null,
    "log_dir": "./logs",
    "log_interval": 100,
    "logger_level": "info",
    "lr_ratio": 0.1,
    "lr_scheduler": true,
    "lr_steps": [
        1,
        1,
        1,
        1
    ],
    "max_epochs": null,
    "max_grad_l2_norm": 0.25,
    "max_iterations": 1,
    "metric_minimize": false,
    "monitored_metric": "caption_bleu4",
    "num_workers": 0,
    "patience": 1,
    "pin_memory": false,
    "pretrained_mapping": {},
    "resume": false,
    "resume_file": null,
    "run_type": "train+inference",
    "save_dir": "./save",
    "s

ValueError: num_samples should be a positive integer value, but got num_samples=0

In [11]:
pprint(vars(trainer))

{'args': Namespace(batch_size=None, clip_gradients=None, config='configs/captioning/coco/my_butd_local.yml', config_override=None, config_overwrite=None, data_parallel=None, datasets='coco', device=None, distributed=None, evalai_inference=None, experiment_name=None, fast_read=None, force_restart=False, load_pretrained=None, local_rank=None, log_dir=None, log_interval=None, logger_level=None, lr_scheduler=None, max_epochs=None, max_iterations=None, model='butd', num_workers=None, opts=[], patience=None, resume=None, resume_file=None, run_type=None, save_dir='./save', seed=None, should_not_log=False, snapshot_interval=None, tasks='captioning', verbose_dump=None),
 'config': {'datasets': 'coco',
            'model': 'butd',
            'model_attributes': {'butd': {'classifier': {'params': {'dropout': 0.5,
                                                                    'fc_bias_init': 0,
                                                                    'feature_dim': 2048,
         

# trainer.train()

```python
    def train(self):
        self.writer.write("===== Model =====")
        self.writer.write(self.model)

        if "train" not in self.run_type:
            self.inference()
            return

        should_break = False

        if self.max_epochs is None:
            self.max_epochs = math.inf
        else:
            self.max_iterations = math.inf

        self.model.train()
        self.train_timer = Timer()
        self.snapshot_timer = Timer()

        self.profile("Setup Time")

        torch.autograd.set_detect_anomaly(True)

        self.writer.write("Starting training...")
        while self.current_iteration < self.max_iterations and not should_break:
            self.current_epoch += 1
            registry.register("current_epoch", self.current_epoch)

            # Seed the sampler in case if it is distributed
            self.task_loader.seed_sampler("train", self.current_epoch)

            if self.current_epoch > self.max_epochs:
                break

            for batch in self.train_loader:
                self.profile("Batch load time")
                self.current_iteration += 1
                self.writer.write(self.current_iteration, "debug")

                registry.register("current_iteration", self.current_iteration)

                if self.current_iteration > self.max_iterations:
                    break

                self._run_scheduler()
                report = self._forward_pass(batch)
                self._update_meter(report, self.meter)
                loss = self._extract_loss(report)
                self._backward(loss)
                should_break = self._logistics(report)

                if should_break:
                    break

        self.finalize()
```

In [12]:
trainer.

AttributeError: 'BaseTrainer' object has no attribute 'model'

## trainer.inference()

```python
def inference(self):
    if "val" in self.run_type:
        self._inference_run("val")

    if "inference" in self.run_type or "predict" in self.run_type:
        self._inference_run("test")
        
def _inference_run(self, dataset_type):
    if self.config.training_parameters.evalai_inference is True:
        self.predict_for_evalai(dataset_type)
        return

    self.writer.write("Starting inference on {} set".format(dataset_type))

    report, meter = self.evaluate(
        getattr(self, "{}_loader".format(dataset_type)), use_tqdm=True
    )
    prefix = "{}: full {}".format(report.dataset_name, dataset_type)
    self._summarize_report(meter, prefix)
```

In [60]:
trainer.inference()

2020-03-19T10:42:28 INFO: Starting inference on test set




  0%|          | 0/2500 [00:00<?, ?it/s][A[A

FileNotFoundError: [Errno 2] No such file or directory: '/tf/notebooks/jwhwang/vqa/pythia/pythia/data/coco/detectron_fix_100/fc6/train_val_2014/COCO_val2014_000000060623.npy'