Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
196 lines (182 sloc) 8.11 KB
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
AutoDL definition
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import argparse
import numpy as np
import subprocess
import paddle.fluid as fluid
from reinforce_policy_gradient import ReinforcePolicyGradient
from policy_model import PolicyModel
from autodl_agent import AutoDLAgent
import utils
import collections
class AutoDL(object):
"""
AutoDL class
"""
def __init__(self):
"""
init
"""
self.parse_args = self._init_parser()
self.bl_decay = self.parse_args.bl_decay
self.log_dir = self.parse_args.log_dir
self.early_stop = self.parse_args.early_stop
self.data_path = self.parse_args.data_path
self.num_models = self.parse_args.num_models
self.batch_size = self.parse_args.batch_size
self.chunk_size= self.parse_args.chunk_size
self._init_dir_path()
self.model = PolicyModel(self.parse_args)
algo_hyperparas = {'lr': self.parse_args.learning_rate}
self.algorithm = ReinforcePolicyGradient(self.model,
hyperparas=algo_hyperparas)
self.autodl_agent = AutoDLAgent(self.algorithm, self.parse_args)
self.total_reward = 0
def _init_dir_path(self):
"""
init dir path
"""
utils.prepare(self.log_dir)
utils.prepare(self.log_dir, "actions")
utils.prepare(self.log_dir, "rewards")
utils.prepare(self.log_dir, "checkpoints")
def _init_parser(self):
"""
init parser
"""
parser = argparse.ArgumentParser(description='AutoDL Parser',
prog='AutoDL')
parser.add_argument('-v', '--version', action='version',
version='%(prog)s 0.1')
parser.add_argument('--num_nodes', dest="num_nodes", nargs="?",
type=int, const=10, default=10,
help="number of nodes")
parser.add_argument('--num_tokens', dest="num_tokens", nargs="?",
type=int, const=10, default=10,
help="number of tokens")
parser.add_argument('--learning_rate', dest="learning_rate", nargs="?",
type=float, default=1e-3,
help="learning rate")
parser.add_argument('--batch_size', dest="batch_size", nargs="?",
type=int, const=10, default=10, help="batch size")
parser.add_argument('--num_models', dest="num_models", nargs="?",
type=int, const=32000, default=32000,
help="maximum number of models sampled")
parser.add_argument('--early_stop', dest="early_stop", nargs="?",
type=int, const=20, default=20, help="early stop")
parser.add_argument('--log_dir', dest="log_dir", nargs="?", type=str,
const="./log", default="./log",
help="directory of log")
parser.add_argument('--input_size', dest="input_size", nargs="?",
type=int, const=10, default=10, help="input size")
parser.add_argument('--hidden_size', dest="hidden_size", nargs="?",
type=int, const=64, default=64, help="hidden size")
parser.add_argument('--num_layers', dest="num_layers", nargs="?",
type=int, const=2, default=2, help="num layers")
parser.add_argument('--bl_decay', dest="bl_decay", nargs="?",
type=float, const=0.9, default=0.9,
help="base line decay")
# inception train config
parser.add_argument('--data_path', dest="data_path", nargs="?",
type=str, default="./cifar/pickle-cifar-10",
help="path of data files")
parser.add_argument('--chunk_size', dest="chunk_size", nargs="?",
type=int, const=100, default=100,
help="chunk size")
parse_args = parser.parse_args()
return parse_args
def supervisor(self, mid):
"""
execute cnn training
sample cmd: python -u inception_train/train.py --mid=9 \
--early_stop=20 --data_path=./cifar/pickle-cifar-10
"""
tokens, adjvec = utils.load_action(mid, self.log_dir)
cmd = ("CUDA_VISIBLE_DEVICES=1 python -u inception_train/train.py \
--mid=%d --early_stop=%d --logdir=%s --data_path=%s --chunk_size=%d") % \
(mid, self.early_stop, self.log_dir, self.data_path, self.chunk_size)
print("cmd:{}".format(cmd))
while True:
try:
subprocess.check_call(cmd, shell=True)
break
except subprocess.CalledProcessError as e:
print("[%s] training model #%d exits with exit code %d" %
(utils.stime(), mid, e.returncode), file=sys.stderr)
return
def simple_run(self):
"""
simple run
"""
print("Simple run target is 20")
mid = 0
shadow = 0
is_first = True
while mid <= self.num_models:
actions_to, actions_ad = self.autodl_agent.sample()
rewards = np.count_nonzero(actions_to == 1, axis=1).astype("int32")
# moving average
current_mean_reward = np.mean(rewards)
if is_first:
shadow = current_mean_reward
is_first = False
else:
shadow = shadow * self.bl_decay \
+ current_mean_reward * (1 - self.bl_decay)
self.autodl_agent.learn((np.array(actions_to).astype("int32"),
np.array(actions_ad).astype("int32")),
rewards - shadow)
if mid % 10 == 0:
print('mid=%d, average rewards=%.3f' % (mid, np.mean(rewards)))
mid += 1
def run(self):
"""
run
"""
rewards = []
mid = 0
while mid <= self.num_models:
actions_to, actions_ad = self.autodl_agent.sample()
for action in zip(actions_to, actions_ad):
utils.dump_action(mid, action, self.log_dir)
self.supervisor(mid)
current_reward = utils.load_reward(mid, self.log_dir)
if not np.isnan(current_reward):
rewards.append(current_reward.item())
mid += 1
if len(rewards) % self.batch_size == 0:
print("[%s] step = %d, average accuracy = %.3f" %
(utils.stime(), self.autodl_agent.global_step,
np.mean(rewards)))
rewards_array = np.array(rewards).astype("float32")
if self.total_reward == 0:
self.total_reward = rewards_array.mean()
else:
self.total_reward = self.total_reward * self.bl_decay \
+ (1 - self.bl_decay) * rewards_array.mean()
rewards_array = rewards_array - self.total_reward
self.autodl_agent.learn([actions_to.astype("int32"),
actions_ad.astype("int32")],
rewards_array ** 3)
rewards = []
You can’t perform that action at this time.