From 537164877e021c07f74c93040a04a622b7326230 Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Tue, 6 Jun 2017 19:53:18 +0800 Subject: [PATCH 1/5] init dataset and train python file --- demo/fit_a_line/train.py | 49 +++++++++++++++++++ docker/cloud-job/build_docker.sh | 8 +++ docker/cloud-job/python/.gitignore | 3 ++ docker/cloud-job/python/paddle/__init__.py | 1 + .../cloud-job/python/paddle/cloud/__init__.py | 1 + .../python/paddle/cloud/dataset/__init__.py | 3 ++ .../python/paddle/cloud/dataset/common.py | 11 +++++ .../paddle/cloud/dataset/uci_housing.py | 30 ++++++++++++ .../cloud-job/python/pcloud.egg-info/PKG-INFO | 10 ++++ .../python/pcloud.egg-info/SOURCES.txt | 10 ++++ .../pcloud.egg-info/dependency_links.txt | 1 + .../python/pcloud.egg-info/top_level.txt | 1 + docker/cloud-job/python/setup.py | 12 +++++ k8s/prepare_dataset.yaml | 13 ++--- paddlecloud/paddlejob/paddle_job.py | 7 ++- paddlecloud/paddlejob/views.py | 5 +- 16 files changed, 157 insertions(+), 8 deletions(-) create mode 100644 demo/fit_a_line/train.py create mode 100644 docker/cloud-job/python/.gitignore create mode 100644 docker/cloud-job/python/paddle/__init__.py create mode 100644 docker/cloud-job/python/paddle/cloud/__init__.py create mode 100644 docker/cloud-job/python/paddle/cloud/dataset/__init__.py create mode 100644 docker/cloud-job/python/paddle/cloud/dataset/common.py create mode 100644 docker/cloud-job/python/paddle/cloud/dataset/uci_housing.py create mode 100644 docker/cloud-job/python/pcloud.egg-info/PKG-INFO create mode 100644 docker/cloud-job/python/pcloud.egg-info/SOURCES.txt create mode 100644 docker/cloud-job/python/pcloud.egg-info/dependency_links.txt create mode 100644 docker/cloud-job/python/pcloud.egg-info/top_level.txt create mode 100644 docker/cloud-job/python/setup.py diff --git a/demo/fit_a_line/train.py b/demo/fit_a_line/train.py new file mode 100644 index 00000000..255a57d1 --- /dev/null +++ b/demo/fit_a_line/train.py @@ -0,0 +1,49 @@ +import paddle.v2 as paddle +import paddle.cloud.dataset.uci_housing as uci_housing + +def main(): + # init + paddle.init() + + # network config + x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13)) + y_predict = paddle.layer.fc(input=x, size=1, act=paddle.activation.Linear()) + y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1)) + cost = paddle.layer.mse_cost(input=y_predict, label=y) + + # create parameters + parameters = paddle.parameters.create(cost) + + # create optimizer + optimizer = paddle.optimizer.Momentum(momentum=0) + + trainer = paddle.trainer.SGD( + cost=cost, parameters=parameters, update_equation=optimizer) + + feeding = {'x': 0, 'y': 1} + + # event_handler to print training and testing info + def event_handler(event): + if isinstance(event, paddle.event.EndIteration): + if event.batch_id % 100 == 0: + print "Pass %d, Batch %d, Cost %f" % ( + event.pass_id, event.batch_id, event.cost) + + if isinstance(event, paddle.event.EndPass): + result = trainer.test( + reader=paddle.batch(uci_housing.test(), batch_size=2), + feeding=feeding) + print "Test %d, Cost %f" % (event.pass_id, result.cost) + + # training + trainer.train( + reader=paddle.batch( + paddle.reader.shuffle(uci_housing.train(), buf_size=500), + batch_size=2), + feeding=feeding, + event_handler=event_handler, + num_passes=30) + + +if __name__ == '__main__': + main() diff --git a/docker/cloud-job/build_docker.sh b/docker/cloud-job/build_docker.sh index 00f2f1cb..ba825c29 100755 --- a/docker/cloud-job/build_docker.sh +++ b/docker/cloud-job/build_docker.sh @@ -18,12 +18,20 @@ fi echo "base_image": $base_image echo "pcloudjob_image": $pcloudjob_image +#Build Python Package +docker run --rm -it -v $PWD:/cloud $base_image \ + bash -c "cd /cloud/python && python setup.py bdist_wheel" + #Build Docker Image cat > Dockerfile < Date: Wed, 7 Jun 2017 11:47:23 +0800 Subject: [PATCH 2/5] update yaml --- k8s/prepare_dataset.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/k8s/prepare_dataset.yaml b/k8s/prepare_dataset.yaml index 01a9cd0b..a7036a7e 100644 --- a/k8s/prepare_dataset.yaml +++ b/k8s/prepare_dataset.yaml @@ -1,11 +1,11 @@ apiVersion: batch/v1 kind: Job metadata: - name: dataset-prepare + name: prepare-dataset spec: template: metadata: - name: dataset-prepare + name: prepare-dataset spec: volumes: - name: data-storage From 99647e61c1f1242d6ca8c8e86a8d51bafe968722 Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Wed, 7 Jun 2017 11:59:46 +0800 Subject: [PATCH 3/5] delete unused package --- docker/{cloud-job => }/.gitignore | 1 + docker/{cloud-job => }/README.md | 0 docker/{cloud-job => }/build_docker.sh | 0 docker/{cloud-job => }/k8s_tools.py | 0 docker/{cloud-job => }/paddle_k8s | 0 docker/prepare_dataset/Dockerfile | 4 -- docker/prepare_dataset/prepare.py | 39 ------------------- docker/{cloud-job => }/python/.gitignore | 0 .../{cloud-job => }/python/paddle/__init__.py | 0 .../python/paddle/cloud/__init__.py | 0 .../python/paddle/cloud/dataset/__init__.py | 0 .../python/paddle/cloud/dataset/common.py | 0 .../paddle/cloud/dataset/uci_housing.py | 0 .../python/pcloud.egg-info/PKG-INFO | 0 .../python/pcloud.egg-info/SOURCES.txt | 0 .../pcloud.egg-info/dependency_links.txt | 0 .../python/pcloud.egg-info/top_level.txt | 0 docker/{cloud-job => }/python/setup.py | 0 18 files changed, 1 insertion(+), 43 deletions(-) rename docker/{cloud-job => }/.gitignore (64%) rename docker/{cloud-job => }/README.md (100%) rename docker/{cloud-job => }/build_docker.sh (100%) rename docker/{cloud-job => }/k8s_tools.py (100%) rename docker/{cloud-job => }/paddle_k8s (100%) delete mode 100644 docker/prepare_dataset/Dockerfile delete mode 100644 docker/prepare_dataset/prepare.py rename docker/{cloud-job => }/python/.gitignore (100%) rename docker/{cloud-job => }/python/paddle/__init__.py (100%) rename docker/{cloud-job => }/python/paddle/cloud/__init__.py (100%) rename docker/{cloud-job => }/python/paddle/cloud/dataset/__init__.py (100%) rename docker/{cloud-job => }/python/paddle/cloud/dataset/common.py (100%) rename docker/{cloud-job => }/python/paddle/cloud/dataset/uci_housing.py (100%) rename docker/{cloud-job => }/python/pcloud.egg-info/PKG-INFO (100%) rename docker/{cloud-job => }/python/pcloud.egg-info/SOURCES.txt (100%) rename docker/{cloud-job => }/python/pcloud.egg-info/dependency_links.txt (100%) rename docker/{cloud-job => }/python/pcloud.egg-info/top_level.txt (100%) rename docker/{cloud-job => }/python/setup.py (100%) diff --git a/docker/cloud-job/.gitignore b/docker/.gitignore similarity index 64% rename from docker/cloud-job/.gitignore rename to docker/.gitignore index 94143827..d7a2f354 100644 --- a/docker/cloud-job/.gitignore +++ b/docker/.gitignore @@ -1 +1,2 @@ Dockerfile +*.pyc diff --git a/docker/cloud-job/README.md b/docker/README.md similarity index 100% rename from docker/cloud-job/README.md rename to docker/README.md diff --git a/docker/cloud-job/build_docker.sh b/docker/build_docker.sh similarity index 100% rename from docker/cloud-job/build_docker.sh rename to docker/build_docker.sh diff --git a/docker/cloud-job/k8s_tools.py b/docker/k8s_tools.py similarity index 100% rename from docker/cloud-job/k8s_tools.py rename to docker/k8s_tools.py diff --git a/docker/cloud-job/paddle_k8s b/docker/paddle_k8s similarity index 100% rename from docker/cloud-job/paddle_k8s rename to docker/paddle_k8s diff --git a/docker/prepare_dataset/Dockerfile b/docker/prepare_dataset/Dockerfile deleted file mode 100644 index 18f9d51f..00000000 --- a/docker/prepare_dataset/Dockerfile +++ /dev/null @@ -1,4 +0,0 @@ -FROM paddlepaddle/paddle:latest -RUN python -c "import paddle.v2.dataset.common as common; common.fetch_all()" -ADD ./prepare.py /root/ -CMD ["python", "/root/prepare.py"] diff --git a/docker/prepare_dataset/prepare.py b/docker/prepare_dataset/prepare.py deleted file mode 100644 index 2e918d59..00000000 --- a/docker/prepare_dataset/prepare.py +++ /dev/null @@ -1,39 +0,0 @@ -import paddle.v2.dataset as dataset -import shutil -import os -dataset_home = os.getenv("DATASET_HOME") -os.system("mv %s %s" % (dataset.common.DATA_HOME, dataset_home)) - -dataset.common.DATA_HOME = dataset_home -dataset.common.split(dataset.uci_housing.train(), - line_count = 500, - suffix=dataset_home + "/uci_housing/train-%05d.pickle") -dataset.common.split(dataset.mnist.train(), - line_count = 500, - suffix=dataset_home + "/mnist/train-%05d.pickle") -dataset.common.split(dataset.cifar.train10(), - line_count = 500, - suffix=dataset_home + "/cifar/train10-%05d.pickel") - -N = 5 -word_dict = dataset.imikolov.build_dict() -dict_size = len(word_dict) -dataset.common.split(dataset.imikolov.train(word_dict, 5), - line_count = 500, - suffix = dataset_home + "/imikolov/train-%05d.pickle") - -dataset.common.split(dataset.movielens.train(), - line_count = 500, - suffix = dataset_home + "/movielens/train-%05d.pickle") - -dataset.common.split(lambda: dataset.imdb.train(dataset.imdb.word_dict()), - line_count = 500, - suffix = dataset_home + "/imdb/train-%05d.pickle") - -dataset.common.split(dataset.conll05.test(), - line_count = 500, - suffix = dataset_home + "/conll05/test-%05d.pickle") - -dataset.common.split(dataset.wmt14.train(30000), - line_count = 500, - suffix = dataset_home + "wmt14/train-%05d.pickle") diff --git a/docker/cloud-job/python/.gitignore b/docker/python/.gitignore similarity index 100% rename from docker/cloud-job/python/.gitignore rename to docker/python/.gitignore diff --git a/docker/cloud-job/python/paddle/__init__.py b/docker/python/paddle/__init__.py similarity index 100% rename from docker/cloud-job/python/paddle/__init__.py rename to docker/python/paddle/__init__.py diff --git a/docker/cloud-job/python/paddle/cloud/__init__.py b/docker/python/paddle/cloud/__init__.py similarity index 100% rename from docker/cloud-job/python/paddle/cloud/__init__.py rename to docker/python/paddle/cloud/__init__.py diff --git a/docker/cloud-job/python/paddle/cloud/dataset/__init__.py b/docker/python/paddle/cloud/dataset/__init__.py similarity index 100% rename from docker/cloud-job/python/paddle/cloud/dataset/__init__.py rename to docker/python/paddle/cloud/dataset/__init__.py diff --git a/docker/cloud-job/python/paddle/cloud/dataset/common.py b/docker/python/paddle/cloud/dataset/common.py similarity index 100% rename from docker/cloud-job/python/paddle/cloud/dataset/common.py rename to docker/python/paddle/cloud/dataset/common.py diff --git a/docker/cloud-job/python/paddle/cloud/dataset/uci_housing.py b/docker/python/paddle/cloud/dataset/uci_housing.py similarity index 100% rename from docker/cloud-job/python/paddle/cloud/dataset/uci_housing.py rename to docker/python/paddle/cloud/dataset/uci_housing.py diff --git a/docker/cloud-job/python/pcloud.egg-info/PKG-INFO b/docker/python/pcloud.egg-info/PKG-INFO similarity index 100% rename from docker/cloud-job/python/pcloud.egg-info/PKG-INFO rename to docker/python/pcloud.egg-info/PKG-INFO diff --git a/docker/cloud-job/python/pcloud.egg-info/SOURCES.txt b/docker/python/pcloud.egg-info/SOURCES.txt similarity index 100% rename from docker/cloud-job/python/pcloud.egg-info/SOURCES.txt rename to docker/python/pcloud.egg-info/SOURCES.txt diff --git a/docker/cloud-job/python/pcloud.egg-info/dependency_links.txt b/docker/python/pcloud.egg-info/dependency_links.txt similarity index 100% rename from docker/cloud-job/python/pcloud.egg-info/dependency_links.txt rename to docker/python/pcloud.egg-info/dependency_links.txt diff --git a/docker/cloud-job/python/pcloud.egg-info/top_level.txt b/docker/python/pcloud.egg-info/top_level.txt similarity index 100% rename from docker/cloud-job/python/pcloud.egg-info/top_level.txt rename to docker/python/pcloud.egg-info/top_level.txt diff --git a/docker/cloud-job/python/setup.py b/docker/python/setup.py similarity index 100% rename from docker/cloud-job/python/setup.py rename to docker/python/setup.py From 3caa7460a286dcdfa416fbf9350a44a646018575 Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Wed, 7 Jun 2017 18:07:55 +0800 Subject: [PATCH 4/5] update --- demo/fit_a_line/train.py | 2 +- docker/.gitignore | 1 + docker/python/paddle/__init__.py | 1 - docker/python/{paddle/cloud => pcloud}/__init__.py | 0 docker/python/{paddle/cloud => pcloud}/dataset/__init__.py | 0 docker/python/{paddle/cloud => pcloud}/dataset/common.py | 4 ++-- .../python/{paddle/cloud => pcloud}/dataset/uci_housing.py | 7 ++++--- docker/python/setup.py | 5 ++--- paddlecloud/paddlejob/views.py | 2 +- 9 files changed, 11 insertions(+), 11 deletions(-) delete mode 100644 docker/python/paddle/__init__.py rename docker/python/{paddle/cloud => pcloud}/__init__.py (100%) rename docker/python/{paddle/cloud => pcloud}/dataset/__init__.py (100%) rename docker/python/{paddle/cloud => pcloud}/dataset/common.py (61%) rename docker/python/{paddle/cloud => pcloud}/dataset/uci_housing.py (74%) diff --git a/demo/fit_a_line/train.py b/demo/fit_a_line/train.py index 255a57d1..8449e456 100644 --- a/demo/fit_a_line/train.py +++ b/demo/fit_a_line/train.py @@ -1,5 +1,5 @@ import paddle.v2 as paddle -import paddle.cloud.dataset.uci_housing as uci_housing +import pcloud.dataset.uci_housing as uci_housing def main(): # init diff --git a/docker/.gitignore b/docker/.gitignore index d7a2f354..f3c1a937 100644 --- a/docker/.gitignore +++ b/docker/.gitignore @@ -1,2 +1,3 @@ Dockerfile *.pyc +pcloud.egg-info diff --git a/docker/python/paddle/__init__.py b/docker/python/paddle/__init__.py deleted file mode 100644 index 710acbe0..00000000 --- a/docker/python/paddle/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__all__ = ["cloud"] diff --git a/docker/python/paddle/cloud/__init__.py b/docker/python/pcloud/__init__.py similarity index 100% rename from docker/python/paddle/cloud/__init__.py rename to docker/python/pcloud/__init__.py diff --git a/docker/python/paddle/cloud/dataset/__init__.py b/docker/python/pcloud/dataset/__init__.py similarity index 100% rename from docker/python/paddle/cloud/dataset/__init__.py rename to docker/python/pcloud/dataset/__init__.py diff --git a/docker/python/paddle/cloud/dataset/common.py b/docker/python/pcloud/dataset/common.py similarity index 61% rename from docker/python/paddle/cloud/dataset/common.py rename to docker/python/pcloud/dataset/common.py index 0ac4b171..a01e697d 100644 --- a/docker/python/paddle/cloud/dataset/common.py +++ b/docker/python/pcloud/dataset/common.py @@ -5,7 +5,7 @@ def fetch_all(): for module_name in filter(lambda x: not x.startswith("__"), dir(paddle.cloud.dataset)): if "fetch" in dir( - importlib.import_module("paddle.cloud.dataset.%s" % module_name)): + importlib.import_module("pcloud.dataset.%s" % module_name)): getattr( - importlib.import_module("paddle.cloud.dataset.%s" % module_name), + importlib.import_module("pcloud.dataset.%s" % module_name), "fetch")() diff --git a/docker/python/paddle/cloud/dataset/uci_housing.py b/docker/python/pcloud/dataset/uci_housing.py similarity index 74% rename from docker/python/paddle/cloud/dataset/uci_housing.py rename to docker/python/pcloud/dataset/uci_housing.py index 6109c20b..cded2408 100644 --- a/docker/python/paddle/cloud/dataset/uci_housing.py +++ b/docker/python/pcloud/dataset/uci_housing.py @@ -4,9 +4,10 @@ __all__=["train", "test", "fetch"] -CURRENT_DATACENTER = os.getenv("CURRENT_DATACENTER") +dc = os.getenv("PADDLE_CLOUD_CURRENT_DATACENTER") -common.DATA_HOME = "/pfs/%s/public/dataset" % CURRENT_DATACENTER +#The default public directory on PaddleCloud is /pfs/${DATACENTER}/public/ +common.DATA_HOME = "/pfs/%s/public/dataset" % dc TRAIN_FILES_PATTERN = os.path.join(common.DATA_HOME, "uci_housing/train-*.pickle") @@ -17,7 +18,7 @@ def train(): return common.cluster_files_reader( TRAIN_FILES_PATTERN, - trainer_count = int(os.getenv("TRAINERS", "1")), + trainer_count = int(os.getenv("PADDLE_INIT_NUM_GRADIENT_SERVERS", "1")), trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID", "0"))) def test(): diff --git a/docker/python/setup.py b/docker/python/setup.py index 0cd863ee..c6f8c814 100644 --- a/docker/python/setup.py +++ b/docker/python/setup.py @@ -1,9 +1,8 @@ from setuptools import setup packages=[ - 'paddle', - 'paddle.cloud', - 'paddle.cloud.dataset'] + 'pcloud', + 'pcloud.dataset'] setup(name='pcloud', version='0.1.1', diff --git a/paddlecloud/paddlejob/views.py b/paddlecloud/paddlejob/views.py index 08cc1721..cec17931 100644 --- a/paddlecloud/paddlejob/views.py +++ b/paddlecloud/paddlejob/views.py @@ -105,7 +105,7 @@ def post(self, request, format=None): host_path = settings.NVIDIA_LIB_PATH )) envs = {} - envs.update({"CURRENT_DATACENTER": dc}) + envs.update({"PADDLE_CLOUD_CURRENT_DATACENTER": dc}) paddle_job = PaddleJob( name = obj.get("name", "paddle-cluster-job"), From c69d9e3d27986887f09e945e76f4d4d350d92cdb Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Wed, 7 Jun 2017 18:09:34 +0800 Subject: [PATCH 5/5] update --- docker/python/pcloud.egg-info/PKG-INFO | 10 ---------- docker/python/pcloud.egg-info/SOURCES.txt | 10 ---------- docker/python/pcloud.egg-info/dependency_links.txt | 1 - docker/python/pcloud.egg-info/top_level.txt | 1 - 4 files changed, 22 deletions(-) delete mode 100644 docker/python/pcloud.egg-info/PKG-INFO delete mode 100644 docker/python/pcloud.egg-info/SOURCES.txt delete mode 100644 docker/python/pcloud.egg-info/dependency_links.txt delete mode 100644 docker/python/pcloud.egg-info/top_level.txt diff --git a/docker/python/pcloud.egg-info/PKG-INFO b/docker/python/pcloud.egg-info/PKG-INFO deleted file mode 100644 index 671cc494..00000000 --- a/docker/python/pcloud.egg-info/PKG-INFO +++ /dev/null @@ -1,10 +0,0 @@ -Metadata-Version: 1.0 -Name: pcloud -Version: 0.1.1 -Summary: PaddlePaddle Cloud -Home-page: UNKNOWN -Author: UNKNOWN -Author-email: UNKNOWN -License: UNKNOWN -Description: UNKNOWN -Platform: UNKNOWN diff --git a/docker/python/pcloud.egg-info/SOURCES.txt b/docker/python/pcloud.egg-info/SOURCES.txt deleted file mode 100644 index 4ba49904..00000000 --- a/docker/python/pcloud.egg-info/SOURCES.txt +++ /dev/null @@ -1,10 +0,0 @@ -setup.py -paddle/__init__.py -paddle/cloud/__init__.py -paddle/cloud/dataset/__init__.py -paddle/cloud/dataset/common.py -paddle/cloud/dataset/uci_housing.py -pcloud.egg-info/PKG-INFO -pcloud.egg-info/SOURCES.txt -pcloud.egg-info/dependency_links.txt -pcloud.egg-info/top_level.txt \ No newline at end of file diff --git a/docker/python/pcloud.egg-info/dependency_links.txt b/docker/python/pcloud.egg-info/dependency_links.txt deleted file mode 100644 index 8b137891..00000000 --- a/docker/python/pcloud.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/docker/python/pcloud.egg-info/top_level.txt b/docker/python/pcloud.egg-info/top_level.txt deleted file mode 100644 index 8a96081c..00000000 --- a/docker/python/pcloud.egg-info/top_level.txt +++ /dev/null @@ -1 +0,0 @@ -paddle