Skip to content

Commit

Permalink
Merge pull request #130 from Yancey1989/demo_fit_a_line
Browse files Browse the repository at this point in the history
Add demo:  fit a line
  • Loading branch information
Yancey1989 committed Jun 8, 2017
2 parents fdf23ac + c69d9e3 commit cb92ec3
Show file tree
Hide file tree
Showing 18 changed files with 137 additions and 52 deletions.
49 changes: 49 additions & 0 deletions demo/fit_a_line/train.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import paddle.v2 as paddle
import pcloud.dataset.uci_housing as uci_housing

def main():
# init
paddle.init()

# network config
x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13))
y_predict = paddle.layer.fc(input=x, size=1, act=paddle.activation.Linear())
y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1))
cost = paddle.layer.mse_cost(input=y_predict, label=y)

# create parameters
parameters = paddle.parameters.create(cost)

# create optimizer
optimizer = paddle.optimizer.Momentum(momentum=0)

trainer = paddle.trainer.SGD(
cost=cost, parameters=parameters, update_equation=optimizer)

feeding = {'x': 0, 'y': 1}

# event_handler to print training and testing info
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "Pass %d, Batch %d, Cost %f" % (
event.pass_id, event.batch_id, event.cost)

if isinstance(event, paddle.event.EndPass):
result = trainer.test(
reader=paddle.batch(uci_housing.test(), batch_size=2),
feeding=feeding)
print "Test %d, Cost %f" % (event.pass_id, result.cost)

# training
trainer.train(
reader=paddle.batch(
paddle.reader.shuffle(uci_housing.train(), buf_size=500),
batch_size=2),
feeding=feeding,
event_handler=event_handler,
num_passes=30)


if __name__ == '__main__':
main()
3 changes: 3 additions & 0 deletions docker/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Dockerfile
*.pyc
pcloud.egg-info
File renamed without changes.
8 changes: 8 additions & 0 deletions docker/cloud-job/build_docker.sh → docker/build_docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,20 @@ fi
echo "base_image": $base_image
echo "pcloudjob_image": $pcloudjob_image

#Build Python Package
docker run --rm -it -v $PWD:/cloud $base_image \
bash -c "cd /cloud/python && python setup.py bdist_wheel"

#Build Docker Image
cat > Dockerfile <<EOF
FROM ${base_image}
RUN pip install -U kubernetes && apt-get install -y iputils-ping
ADD ./paddle_k8s /usr/bin
ADD ./k8s_tools.py /root/
ADD ./python/dist/pcloud-0.1.1-py2-none-any.whl /tmp/
#RUN pip install /tmp/pcloud-0.1.1-py2-none-any.whl && \
# rm /tmp/pcloud-0.1.1-py2-none-any.whl
RUN pip install /tmp/pcloud-0.1.1-py2-none-any.whl
CMD ["paddle_k8s"]
EOF
Expand Down
1 change: 0 additions & 1 deletion docker/cloud-job/.gitignore

This file was deleted.

File renamed without changes.
File renamed without changes.
4 changes: 0 additions & 4 deletions docker/prepare_dataset/Dockerfile

This file was deleted.

39 changes: 0 additions & 39 deletions docker/prepare_dataset/prepare.py

This file was deleted.

3 changes: 3 additions & 0 deletions docker/python/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
build
dist
paddlecloud.egg-info
1 change: 1 addition & 0 deletions docker/python/pcloud/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__all__ = ["dataset"]
3 changes: 3 additions & 0 deletions docker/python/pcloud/dataset/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import uci_housing
import common
__all__ = ["uci_housing", "common"]
11 changes: 11 additions & 0 deletions docker/python/pcloud/dataset/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import uci_housing
import paddle
import importlib
def fetch_all():
for module_name in filter(lambda x: not x.startswith("__"),
dir(paddle.cloud.dataset)):
if "fetch" in dir(
importlib.import_module("pcloud.dataset.%s" % module_name)):
getattr(
importlib.import_module("pcloud.dataset.%s" % module_name),
"fetch")()
31 changes: 31 additions & 0 deletions docker/python/pcloud/dataset/uci_housing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import paddle.v2.dataset.uci_housing as uci_housing
import paddle.v2.dataset.common as common
import os

__all__=["train", "test", "fetch"]

dc = os.getenv("PADDLE_CLOUD_CURRENT_DATACENTER")

#The default public directory on PaddleCloud is /pfs/${DATACENTER}/public/
common.DATA_HOME = "/pfs/%s/public/dataset" % dc

TRAIN_FILES_PATTERN = os.path.join(common.DATA_HOME,
"uci_housing/train-*.pickle")
TRAIN_FILES_SUFFIX = os.path.join(common.DATA_HOME,
"uci_housing/train-%05d.pickle")


def train():
return common.cluster_files_reader(
TRAIN_FILES_PATTERN,
trainer_count = int(os.getenv("PADDLE_INIT_NUM_GRADIENT_SERVERS", "1")),
trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID", "0")))

def test():
return uci_housing.test()

def fetch():
print "fetch cluster files: %s" % TRAIN_FILES_SUFFIX
common.split(uci_housing.train(),
line_count = 500,
suffix=TRAIN_FILES_SUFFIX)
11 changes: 11 additions & 0 deletions docker/python/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from setuptools import setup

packages=[
'pcloud',
'pcloud.dataset']

setup(name='pcloud',
version='0.1.1',
description="PaddlePaddle Cloud",
packages=packages
)
13 changes: 7 additions & 6 deletions k8s/prepare_dataset.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
apiVersion: batch/v1
kind: Job
metadata:
name: paddle-prepare
name: prepare-dataset
spec:
template:
metadata:
name: paddle-prepare
name: prepare-dataset
spec:
volumes:
- name: data-storage
Expand All @@ -18,11 +18,12 @@ spec:
name: ceph-secret
containers:
- name: prepare
image: yancey1989/paddlecloud-prepare
image: yancey1989/paddlecloud-job
env:
- name: DATASET_HOME
value: "/pfs/public/dataset"
- name: CURRENT_DATACENTER
value: "meiyan"
command: ["python", "-c", "\"import paddle.cloud.dataset.common as common; common.fetch_all()\""]
volumeMounts:
- name: data-storage
mountPath: /pfs/public
mountPath: /pfs/meiyan/public
restartPolicy: Never
7 changes: 6 additions & 1 deletion paddlecloud/paddlejob/paddle_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ def __init__(self,
passes,
gpu=0,
volumes=[],
registry_secret=None):
registry_secret=None,
envs = {}):

self._ports_num=1
self._ports_num_for_sparse=1
Expand All @@ -44,6 +45,7 @@ def __init__(self,
self._volumes = volumes
self._registry_secret = registry_secret
self._passes = passes
self._usr_envs = envs

@property
def pservers(self):
Expand Down Expand Up @@ -86,6 +88,9 @@ def get_env(self):
envs.append({"name":"PADDLE_INIT_USE_GPU", "value":str("0")})
envs.append({"name":"NAMESPACE", "valueFrom":{
"fieldRef":{"fieldPath":"metadata.namespace"}}})
if self._usr_envs:
for k, v in self._usr_envs.items():
envs.append({"name": k, "value": v})
return envs

def _get_pserver_container_ports(self):
Expand Down
5 changes: 4 additions & 1 deletion paddlecloud/paddlejob/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ def post(self, request, format=None):
mount_path = "/usr/local/nvidia/lib64",
host_path = settings.NVIDIA_LIB_PATH
))
envs = {}
envs.update({"PADDLE_CLOUD_CURRENT_DATACENTER": dc})

paddle_job = PaddleJob(
name = job_name,
Expand All @@ -132,7 +134,8 @@ def post(self, request, format=None):
image = job_image,
passes = obj.get("passes", 1),
registry_secret = registry_secret,
volumes = volumes
volumes = volumes,
envs = envs
)
try:
ret = client.ExtensionsV1beta1Api(api_client=api_client).create_namespaced_replica_set(
Expand Down

0 comments on commit cb92ec3

Please sign in to comment.