In [4]:
import numpy as np
import neurolab as nl

# 输入文件
input_file = 'letter.data'
# 在用神经网络处理大量数据时，需要花费很多的时间来做荀兰
# 为了展示如何创建这个系统，这里只适用20个数据点
num_datapoints = 20
# 观察数据，可以看到在前20行有7个不同的字符，其定义如下：
# 不同的字符
orig_labels = 'omandig'
# 不同字符的数量
num_output = len(orig_labels)
# 定义训练和测试参数
num_train = int(0.9 * num_datapoints)
num_test = num_datapoints - num_train
# 定义数据集提取参数
start_index = 6
end_index = -1
# 生成数据集
data = []
labels = []
with open(input_file, 'r') as f:
    for line in f.readlines():
        # 按Tab键分割
        list_vals = line.split('\t')

        # 如果字符不再标签列表中，跳过
        if list_vals[1] not in orig_labels:
            continue

        # 提取标签，并将其添加到主列表的后面
        label = np.zeros((num_output, 1))
        label[orig_labels.index(list_vals[1])] = 1
        labels.append(label)

        # 提取字符，并将其添加到主列表的后面
        cur_char = np.array([float(x) for x in list_vals[start_index:end_index]])
        data.append(cur_char)

        # 当有足够多数据时就跳出循环
        if len(data) >= num_datapoints:
            break
            
# 将数据转换成numpy数组
data = np.asfarray(data)
labels = np.array(labels).reshape(num_datapoints, num_output)
# 提取数据维度信息
num_dims = len(data[0])
# 用10000次迭代来训练神经网络
net = nl.net.newff([[0, 1] for _ in range(len(data[0]))], [128, 16, num_output])
net.trainf = nl.train.train_gd
error = net.train(data[:num_train,:], labels[:num_train,:], epochs=10000, 
        show=500, goal=0.01)
# 为测试数据预测输出结构
predicted_output = net.sim(data[num_train:, :])
print("\nTesting on unknown data:")
for i in range(num_test):
    print("\nOriginal:", orig_labels[np.argmax(labels[i])])
    print("Predicted:", orig_labels[np.argmax(predicted_output[i])])

Epoch: 500; Error: 0.16587884059047234;
Epoch: 1000; Error: 0.04422656575701798;
Epoch: 1500; Error: 0.02610979303614146;
Epoch: 2000; Error: 0.01987000580305627;
Epoch: 2500; Error: 0.016840897458780674;
The goal of learning is reached

Testing on unknown data:

Original: o
Predicted: o

Original: m
Predicted: m
