<a href="https://colab.research.google.com/github/YapingWu/GoogleColab/blob/main/keras/CharCNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 加载数据

In [None]:
from keras.models import Sequential, load_model
from keras.layers import Embedding, Dense, Conv1D, MaxPool1D, AlphaDropout, Flatten
from keras.initializers import RandomNormal
from sklearn.model_selection import train_test_split

In [None]:
SEED = 7

In [None]:
import numpy as np
x = np.loadtxt('/content/x.txt')
y = np.loadtxt('/content/y.txt')

In [None]:
logger.info("划分训练集、验证集和测试集")
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=SEED)
x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size=0.5, random_state=SEED)

print("x_train Shape: %s, y_train Shape: %s" % (x_train.shape, y_train.shape))
print("x_val Shape: %s, y_val Shape: %s" % (x_val.shape, y_val.shape))
print("x_test Shape: %s, y_test Shape: %s" % (x_test.shape, y_test.shape))

# 创建和训练模型

In [None]:
# 超參數
data_names = ['myspace,' 'phpbb,' 'rockyou']
alphabet = "abcdefghijklmnopqrstuvwxyz0123456789-;.!?:'\"/\\|_@#$%^&*~`+-=<>()[]{}"
alphabet_size = len(alphabet)
length = 100
no_of_classes = 3
batch_size = 128
epochs = 2
conv_layers = [
                  [256, 7, 3],
                  [256, 7, 3],
                  [256, 3, None],
                  [256, 3, None],
                  [256, 3, None],
                  [256, 3, 3]
              ]
fully_layers = [1024, 1024]
embedding_size = 50
th = 1e-6
dropout_p = 0.5
optimizer = 'adam'
initializer_stddev = 0.05
loss = 'categorical_crossentropy'

In [None]:
print('创建CNN模型...')
model = Sequential()
# Embedding layers
model.add(Embedding(input_dim=alphabet_size + 1, output_dim=embedding_size, input_length=length))
# Convolution layers
for num_filters, filter_width, pool_size in conv_layers:
    model.add(Conv1D(filters=num_filters,
                      kernel_size=filter_width,
                      kernel_initializer=RandomNormal(mean=0, stddev=initializer_stddev),
                      activation='tanh'))
    if pool_size is not None:
        model.add(MaxPool1D(pool_size))
# Fully connected layers
model.add(Flatten())
for units in fully_layers:
    model.add(Dense(units, activation='selu', kernel_initializer='lecun_normal'))
    model.add(AlphaDropout(dropout_p))
model.add(Dense(no_of_classes, activation='softmax'))

# Build and compile model
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
print('创建模型：\n')
model.summary()

print('开始训练CNN模型')
model.fit(x_train, y_train,
          validation_data=(x_val, y_val),
          epochs=epochs,
          batch_size=batch_size,
          verbose=1)
loss, accuracy = model.evaluate(x_test, y_test, verbose=0)
print("Model Accuracy on test: %.2f%%, Loss: %.2f" % (accuracy * 100, loss))
model_file = './model/cnn.h5'
print("保存模型：%s" % model_file)
model.save(model_file)

# 其他命令

In [None]:
!unzip cnn.zip