In [1]:
# test pgm reading
import re
import numpy as np

def read_pgm(filename, byteorder='>'):
    """Return image data from a raw PGM file as numpy array.

    Format specification: http://netpbm.sourceforge.net/doc/pgm.html

    """
    with open(filename, 'rb') as f:
        buffer = f.read()
    try:
        header, width, height, maxval = re.search(
            b"(^P5\s(?:\s*#.*[\r\n])*"
            b"(\d+)\s(?:\s*#.*[\r\n])*"
            b"(\d+)\s(?:\s*#.*[\r\n])*"
            b"(\d+)\s(?:\s*#.*[\r\n]\s)*)", buffer).groups()
    except AttributeError:
        raise ValueError("Not a raw PGM file: '%s'" % filename)
    return np.frombuffer(buffer,
                            dtype='u1' if int(maxval) < 256 else byteorder+'u2',
                            count=int(width)*int(height),
                            offset=len(header)
                            ).reshape((int(height), int(width)))


filename='./gestures/A/A_down_2.pgm'

if __name__ == "__main__":
    from matplotlib import pyplot
    image = read_pgm(filename, byteorder='<')
#     print type(image), image
#     pyplot.imshow(image, pyplot.cm.gray)
#     pyplot.show()
    
    image = image.astype('float32')
    image /= np.max(image)
#     print image
    pyplot.imshow(image, pyplot.cm.gray)
    pyplot.show()

<Figure size 640x480 with 1 Axes>

In [2]:
# definition of neural network
class SingeHiddenLayer(object):

    def __init__(self, X, y, num_hidden):
        self.data_x = np.atleast_2d(X)  # 判断输入训练集是否大于等于二维; 把x_train()取下来
        # a.flatten()把a放在一维数组中，不写参数默认是“C”，也就是先行后列的方式，也有“F”先列后行的方式； 把 y_train取下来
        self.data_y = np.array(y).flatten()
        self.num_data = len(self.data_x)  # 训练数据个数
        # shape[] 读取矩阵的长度，比如shape[0]就是读取矩阵第一维度的长度 (120行，4列，所以shape[0]==120,shapep[1]==4)
        self.num_feature = self.data_x.shape[1]
        self.num_hidden = num_hidden  # 隐藏层节点个数

        # 随机生产权重（从-1，到1，生成（num_feature行,num_hidden列））
        self.w = np.random.uniform(-0.01, 0.01, (self.num_feature, self.num_hidden))

        # 随机生成偏置，一个隐藏层节点对应一个偏置
        for i in range(self.num_hidden):
            b = np.random.uniform(-0.01, 0.01, (1, self.num_hidden))
            self.first_b = b

        # 生成偏置矩阵，以隐藏层节点个数4为行，样本数120为列
        for i in range(self.num_data - 1):
            b = np.row_stack((b, self.first_b))  # row_stack 以叠加行的方式填充数组
        self.b = b
    # 定义sigmoid函数

    def sigmoid(self, x):
        return 1.0 / (1 + np.exp(-x))

    def train(self, x_train, y_train, classes=1):
        mul = np.dot(self.data_x, self.w)  # 输入乘以权重
        print(self.data_x.shape)
        add = mul + self.b  # 加偏置
        H = self.sigmoid(add)  # 激活函数

        H_ = np.linalg.pinv(H)  # 求广义逆矩阵
        # print(type(H_.shape))

        # 将只有一列的Label矩阵转换，例如，iris的label中共有三个值，则转换为3列，以行为单位，label值对应位置标记为1，其它位置标记为0
        # self.train_y = np.zeros((self.num_data,classes))  #初始化一个120行，3列的全0矩阵
        # for i in range(0,self.num_data):
        # self.train_y[i,y_train[i]] = 1   #对应位置标记为1
        self.train_y = y_train

        self.out_w = np.dot(H_, self.train_y)  # 求输出权重

    def predict(self, x_test):
        self.t_data = np.atleast_2d(x_test)  # 测试数据集
        self.num_tdata = len(self.t_data)  # 测试集的样本数
        self.pred_Y = np.zeros((x_test.shape[0]))  # 初始化

        b = self.first_b

        # 扩充偏置矩阵，以隐藏层节点个数4为行，样本数30为列
        for i in range(self.num_tdata - 1):
            b = np.row_stack((b, self.first_b))  # 以叠加行的方式填充数组

         # 预测
        self.pred_Y = np.dot(self.sigmoid(
            np.dot(self.t_data, self.w) + b), self.out_w)

        return(self.pred_Y)

        # self.output=np.sum(self.pred_Y)

        # #取输出节点中值最大的类别作为预测值
        # self.predy = []
        # for i in self.pred_Y:
        #     L = i.tolist()
        #     self.predy.append(L.index(max(L)))

In [3]:
# get training data
train_filelist = 'downgesture_train.list'
x_train=[]
y_train=[]
with open(train_filelist, 'r') as train_fl:
    for train_fn in train_fl.readlines():
        
        image = read_pgm(train_fn[:-1], byteorder='<')
        image = image.astype('float32')
        image /= np.max(image)
        
        x_train.append(np.squeeze(image.reshape(1,-1)))
        
        y_train.append([0. if re.match(string=train_fn, pattern='.*?down.*?')==None else 1.])
        
x_train = np.array(x_train)
y_train = np.array(y_train)
print len(y_train), y_train.T
# print len(x_train), x_train

184 [[1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1.
  1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]


In [4]:
# get testing data
test_filelist = 'downgesture_test.list'
x_test=[]
y_test=[]
with open(test_filelist, 'r') as test_fl:
    for test_fn in test_fl.readlines():
        
        image = read_pgm(test_fn[:-1], byteorder='<')
        image = image.astype('float32')
        image /= np.max(image)
        
        x_test.append(np.squeeze(image.reshape(1,-1)))
        
        y_test.append([0. if re.match(string=test_fn, pattern='.*?down.*?')==None else 1.])
        
x_test = np.array(x_test)
y_test = np.array(y_test)
print len(y_test), y_test.T

83 [[1. 1. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 1.
  1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 1. 1. 0.
  0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0.
  0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0.]]


In [5]:
# neural network training
NN = SingeHiddenLayer(x_train, y_train, 500)  # 训练数据集，隐藏层节点个数
NN.train(x_train, y_train)
y_pred = np.abs(np.round(NN.predict(x_train)))
print 'training accuracy:',1.0-np.sum(np.abs(y_pred-y_train))/len(y_pred)

(184, 960)
training accuracy: 1.0


In [6]:
# neural network testing
y_pred = np.abs(np.round(NN.predict(x_test)))
print y_pred.T
print 'testing accuracy:',1.0-np.sum(np.abs(y_pred-y_test))/len(y_pred)

[[1. 1. 0. 0. 1. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 1. 2. 0. 0. 1. 0. 0. 1.
  0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1.
  1. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
  0. 0. 1. 1. 0. 0. 1. 0. 0. 0. 0.]]
testing accuracy: 0.7590361445783133
