Merge pull request #27 from dayhaha/develop

PR for recognize_digits
PaddlePaddle · Jan 19, 2017 · 2506ff1 · 2506ff1
2 parents 09d8aad + 40fd793
commit 2506ff1
Show file tree

Hide file tree

Showing 19 changed files with 842 additions and 1 deletion.
diff --git a/recognize_digits/.gitignore b/recognize_digits/.gitignore
@@ -0,0 +1,6 @@
+data/raw_data
+data/train.list
+data/test.list
+*.log
+*.pyc
+plot.png
diff --git a/recognize_digits/README.md b/recognize_digits/README.md
diff --git a/recognize_digits/data/get_mnist_data.sh b/recognize_digits/data/get_mnist_data.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env sh
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This scripts downloads the mnist data and unzips it.
+set -e
+DIR="$( cd "$(dirname "$0")" ; pwd -P )"
+rm -rf "$DIR/raw_data"
+mkdir "$DIR/raw_data"
+cd "$DIR/raw_data"
+
+echo "Downloading..."
+
+for fname in train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte t10k-labels-idx1-ubyte
+do
+    if [ ! -e $fname ]; then
+        wget --no-check-certificate http://yann.lecun.com/exdb/mnist/${fname}.gz
+        gunzip ${fname}.gz
+    fi
+done
+
+cd $DIR
+rm -f *.list
+echo "./data/raw_data/train" > "$DIR/train.list"
+echo "./data/raw_data/t10k" > "$DIR/test.list"
diff --git a/recognize_digits/evaluate.py b/recognize_digits/evaluate.py
@@ -0,0 +1,35 @@
+#!/usr/bin/python
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+import re
+import math
+
+
+def get_best_pass(filename):
+    with open(filename, 'r') as f:
+        text = f.read()
+        pattern = re.compile(
+            'Test.*? cost=([0-9]+\.[0-9]+).*?classification_error_evaluator=([0-9]+\.[0-9]+).*?pass-([0-9]+)',
+            re.S)
+        results = re.findall(pattern, text)
+        sorted_results = sorted(results, key=lambda result: float(result[0]))
+        return sorted_results[0]
+
+
+filename = sys.argv[1]
+log = get_best_pass(filename)
+classification_accuracy = (1 - float(log[1])) * 100
+print 'Best pass is %s, testing Avgcost is %s' % (log[2], log[0])
+print 'The classification accuracy is %.2f%%' % classification_accuracy
diff --git a/recognize_digits/image/cnn.png b/recognize_digits/image/cnn.png
diff --git a/recognize_digits/image/cnn_train_log.png b/recognize_digits/image/cnn_train_log.png
diff --git a/recognize_digits/image/conv_layer.png b/recognize_digits/image/conv_layer.png
diff --git a/recognize_digits/image/max_pooling.png b/recognize_digits/image/max_pooling.png
diff --git a/recognize_digits/image/mlp.png b/recognize_digits/image/mlp.png
diff --git a/recognize_digits/image/mlp_train_log.png b/recognize_digits/image/mlp_train_log.png
diff --git a/recognize_digits/image/mnist_example_image.png b/recognize_digits/image/mnist_example_image.png
diff --git a/recognize_digits/image/softmax_regression.png b/recognize_digits/image/softmax_regression.png
diff --git a/recognize_digits/image/softmax_train_log.png b/recognize_digits/image/softmax_train_log.png
diff --git a/recognize_digits/load_data.py b/recognize_digits/load_data.py
@@ -0,0 +1,47 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import matplotlib.pyplot as plt
+import random
+import struct
+
+
+def read_data(path, filename):
+    with open(path + filename + "-images-idx3-ubyte",
+              "rb") as f:  # open picture file
+        magic, n, rows, cols = struct.unpack(">IIII", f.read(16))
+        images = np.fromfile(
+            f, 'ubyte',
+            count=n * rows * cols).reshape(n, rows, cols).astype('float32')
+
+    with open(path + filename + "-labels-idx1-ubyte",
+              "rb") as l:  # open label file
+        magic, n = struct.unpack(">II", l.read(8))
+        labels = np.fromfile(l, 'ubyte', count=n).astype("int")
+
+    return images, labels
+
+
+if __name__ == "__main__":
+    train_images, train_labels = read_data("./data/raw_data/", "train")
+    test_images, test_labels = read_data("./data/raw_data/", "t10k")
+    label_list = []
+    for i in range(10):
+        index = random.randint(0, train_images.shape[0] - 1)
+        label_list.append(train_labels[index])
+        plt.subplot(1, 10, i + 1)
+        plt.imshow(train_images[index], cmap="Greys_r")
+        plt.axis('off')
+    print('label: %s' % (label_list, ))
+    plt.show()
diff --git a/recognize_digits/mnist_model.py b/recognize_digits/mnist_model.py
@@ -0,0 +1,95 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle.trainer_config_helpers import *
+
+is_predict = get_config_arg("is_predict", bool, False)
+
+####################Data Configuration ##################
+
+if not is_predict:
+    data_dir = './data/'
+    define_py_data_sources2(
+        train_list=data_dir + 'train.list',
+        test_list=data_dir + 'test.list',
+        module='mnist_provider',
+        obj='process')
+
+######################Algorithm Configuration #############
+settings(
+    batch_size=128,
+    learning_rate=0.1 / 128.0,
+    learning_method=MomentumOptimizer(0.9),
+    regularization=L2Regularization(0.0005 * 128))
+
+#######################Network Configuration #############
+
+data_size = 1 * 28 * 28
+label_size = 10
+img = data_layer(name='pixel', size=data_size)
+
+
+def softmax_regression(img):
+    predict = fc_layer(input=img, size=10, act=SoftmaxActivation())
+    return predict
+
+
+def multilayer_perceptron(img):
+    # The first fully-connected layer
+    hidden1 = fc_layer(input=img, size=128, act=ReluActivation())
+    # The second fully-connected layer and the according activation function
+    hidden2 = fc_layer(input=hidden1, size=64, act=ReluActivation())
+    # The thrid fully-connected layer, note that the hidden size should be 10,
+    # which is the number of unique digits
+    predict = fc_layer(input=hidden2, size=10, act=SoftmaxActivation())
+    return predict
+
+
+def convolutional_neural_network(img):
+    # first conv layer
+    conv_pool_1 = simple_img_conv_pool(
+        input=img,
+        filter_size=5,
+        num_filters=20,
+        num_channel=1,
+        pool_size=2,
+        pool_stride=2,
+        act=TanhActivation())
+    # second conv layer
+    conv_pool_2 = simple_img_conv_pool(
+        input=conv_pool_1,
+        filter_size=5,
+        num_filters=50,
+        num_channel=20,
+        pool_size=2,
+        pool_stride=2,
+        act=TanhActivation())
+    # The first fully-connected layer
+    fc1 = fc_layer(input=conv_pool_2, size=128, act=TanhActivation())
+    # The softmax layer, note that the hidden size should be 10,
+    # which is the number of unique digits
+    predict = fc_layer(input=fc1, size=10, act=SoftmaxActivation())
+    return predict
+
+
+predict = softmax_regression(img)
+#predict = multilayer_perceptron(img)
+#predict = convolutional_neural_network(img)
+
+if not is_predict:
+    lbl = data_layer(name="label", size=label_size)
+    inputs(img, lbl)
+    outputs(classification_cost(input=predict, label=lbl))
+else:
+    outputs(predict)
diff --git a/recognize_digits/mnist_provider.py b/recognize_digits/mnist_provider.py
@@ -0,0 +1,37 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle.trainer.PyDataProvider2 import *
+import numpy as np
+import struct
+
+
+# Define a py data provider
+@provider(
+    input_types={'pixel': dense_vector(28 * 28),
+                 'label': integer_value(10)})
+def process(settings, filename):  # settings is not used currently.
+    with open(filename + "-images-idx3-ubyte", "rb") as f:  # open picture file
+        magic, n, rows, cols = struct.unpack(">IIII", f.read(16))
+        images = np.fromfile(
+            f, 'ubyte',
+            count=n * rows * cols).reshape(n, rows, cols).astype('float32')
+        images = images / 255.0 * 2.0 - 1.0  # normalized to [-1,1]
+
+    with open(filename + "-labels-idx1-ubyte", "rb") as l:  # open label file
+        magic, n = struct.unpack(">II", l.read(8))
+        labels = np.fromfile(l, 'ubyte', count=n).astype("int")
+
+    for i in xrange(n):
+        yield {"pixel": images[i, :], 'label': labels[i]}
diff --git a/recognize_digits/plot_cost.py b/recognize_digits/plot_cost.py
@@ -0,0 +1,54 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import matplotlib.pyplot as plt
+import re
+import sys
+
+
+def plot_log(filename):
+    with open(filename, 'r') as f:
+        text = f.read()
+        pattern = re.compile(
+            'AvgCost=([0-9]+\.[0-9]+).*?Test.*? cost=([0-9]+\.[0-9]+).*?pass-([0-9]+)',
+            re.S)
+        results = re.findall(pattern, text)
+        train_cost, test_cost, pass_ = zip(*results)
+        train_cost_float = map(float, train_cost)
+        test_cost_float = map(float, test_cost)
+        pass_int = map(int, pass_)
+        plt.plot(pass_int, train_cost_float, 'red', label='Train')
+        plt.plot(pass_int, test_cost_float, 'g--', label='Test')
+        plt.ylabel('AvgCost')
+        plt.xlabel('Epoch')
+
+        # Now add the legend with some customizations.
+        legend = plt.legend(loc='upper right', shadow=False)
+
+        # The frame is matplotlib.patches.Rectangle instance surrounding the legend.
+        frame = legend.get_frame()
+        frame.set_facecolor('0.90')
+
+        # Set the fontsize
+        for label in legend.get_texts():
+            label.set_fontsize('large')
+
+        for label in legend.get_lines():
+            label.set_linewidth(1.5)  # the legend line width
+
+        plt.show()
+
+
+if __name__ == '__main__':
+    plot_log(sys.argv[1])
diff --git a/recognize_digits/predict.py b/recognize_digits/predict.py
@@ -0,0 +1,79 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Usage: predict.py -c CONF -d ./data/raw_data/  -m MODEL
+
+
+Arguments:
+    CONF        train conf
+    DATA        MNIST Data
+    MODEL       Model
+
+Options:
+    -h      --help
+    -c      conf
+    -d      data
+    -m      model
+"""
+
+import os
+import sys
+from docopt import docopt
+import matplotlib.pyplot as plt
+import numpy as np
+
+from py_paddle import swig_paddle, DataProviderConverter
+from paddle.trainer.PyDataProvider2 import dense_vector
+from paddle.trainer.config_parser import parse_config
+
+from load_data import read_data
+
+
+class Prediction():
+    def __init__(self, train_conf, data_dir, model_dir):
+
+        conf = parse_config(train_conf, 'is_predict=1')
+        self.network = swig_paddle.GradientMachine.createFromConfigProto(
+            conf.model_config)
+        self.network.loadParameters(model_dir)
+
+        self.images, self.labels = read_data(data_dir, "t10k")
+
+        slots = [dense_vector(28 * 28)]
+        self.converter = DataProviderConverter(slots)
+
+    def predict(self, index):
+        input = self.converter([[self.images[index].flatten().tolist()]])
+        output = self.network.forwardTest(input)
+        prob = output[0]["value"]
+        predict = np.argsort(-prob)
+        print "Predicted probability of each digit:"
+        print prob
+        print "Predict Number: %d" % predict[0][0]
+        print "Actual Number: %d" % self.labels[index]
+
+
+def main():
+    arguments = docopt(__doc__)
+    train_conf = arguments['CONF']
+    data_dir = arguments['DATA']
+    model_dir = arguments['MODEL']
+    swig_paddle.initPaddle("--use_gpu=0")
+    predictor = Prediction(train_conf, data_dir, model_dir)
+    while True:
+        index = int(raw_input("Input image_id [0~9999]: "))
+        predictor.predict(index)
+
+
+if __name__ == '__main__':
+    main()