<a href="https://colab.research.google.com/github/YapingWu/GoogleColab/blob/main/adjust_param.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 导入需要的包

In [None]:
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Embedding
from keras import optimizers
from keras.callbacks import TensorBoard
from sklearn.model_selection import train_test_split
import time
import numpy as np

# 加载数据




In [None]:
# 数据相关参数
data_names = ['myspace', 'phpbb', 'rockyou']
vocab_sizes = [73， 56， 251]
max_lengths = [35， 21， 41]

i = 0
data_name = data_names[i]
vocab_size = vocab_sizes[i]
max_length = max_lengths[i]

In [None]:
print("加载数据：")
X = np.loadtxt('/content/' + data_name + 'x.txt')
y = np.loadtxt('/content/' + data_name + 'y.txt')

print("划分训练集、验证集和测试集：")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=SEED)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=SEED)
print("X_train Shape: %s, y_train Shape: %s" % (X_train.shape, y_train.shape))
print("X_val Shape: %s, y_val Shape: %s" % (X_val.shape, y_val.shape))
print("X_test Shape: %s, y_test Shape: %s" % (X_test.shape, y_test.shape))

# 调整网络参数

## 调整全连接层个数、lstm层个数和每层神经元个数

In [None]:
# 设置参数
epochs = 30
batch_size = 128
# 全连接层个数
dense_layers = [1, 2]
# 每层神经元个数
layer_sizes = [32, 64, 128]
# lstm层数
lstm_layers = [1, 2, 3]

log_dir = '/content/logs/' + data_name +'/'

In [None]:
# 建立和训练模型
for dense_layer in dense_layers:
  for layer_size in layer_sizes:
    for lstm_layer in lstm_layers:
      
      NAME = "{}-lstm-{}-notes-{}-dense".format(lstm_layer, layer_size, dense_layer)
      tensorboard = TensorBoard(log_dir=log_dir+NAME)
      print(NAME)
      
      model = Sequential()
      model.add(Embedding(input_dim=vocab_size, output_dim=10, input_length=max_length-1))
      
      for l in range(lstm_layer - 1):
        model.add(LSTM(layer_size, return_sequences=True))
      model.add(LSTM(layer_size, return_sequences=False))
      
      for l in range(dense_layer - 1):
        model.add(Dense(units=layer_size, activation='relu'))
      model.add(Dense(vocab_size, activation='softmax'))

      model.summary()
      
      adam = optimizers.Adam(lr = 0.01)
      model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])
      
      model.fit(X_train, y_train, 
                validation_data=(X_val, y_val), 
                epochs=epochs, 
                batch_size=batch_size, 
                verbose=1,
                callbacks=[tensorboard])
      loss, accuracy = model.evaluate(self.X_test, self.y_test, verbose=0)
      print("Model Accuracy: %.2f%%, Loss: %.2f" % (accuracy * 100, loss))

## 调整batch_size、epoch

### [1, 32, 1]
dense_layer、layer_size和lstm_layer的组合为[1, 32, 1]

In [None]:
# 设置参数
epochs = 100

dense_layer = 1
layer_size = 32
lstm_layer = 1
log_dir = '/content/logs/1-32-1/' + data_name +'/'

batch_sizes = [32, 64, 128, 256, 512]

In [None]:
# 建立和训练模型
for batch_size in batch_sizes:

  NAME = log_dir + batch_size
  tensorboard = TensorBoard(log_dir=NAME)
  print(NAME)
  
  model = Sequential()
  model.add(Embedding(input_dim=vocab_size, output_dim=10, input_length=max_length-1))
  
  for l in range(lstm_layer - 1):
    model.add(LSTM(layer_size, return_sequences=True))
  model.add(LSTM(layer_size, return_sequences=False))
  
  for l in range(dense_layer - 1):
    model.add(Dense(units=layer_size, activation='relu'))
  model.add(Dense(vocab_size, activation='softmax'))

  model.summary()
  
  adam = optimizers.Adam(lr = 0.01)
  model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])
  
  model.fit(X_train, y_train, 
            validation_data=(X_val, y_val), 
            epochs=epochs, 
            batch_size=batch_size, 
            verbose=1,
            callbacks=[tensorboard])
  loss, accuracy = model.evaluate(self.X_test, self.y_test, verbose=0)
  print("Model Accuracy: %.2f%%, Loss: %.2f" % (accuracy * 100, loss))

### [2, 128, 3]
dense_layer、layer_size和lstm_layer的组合为[2, 128, 3]

In [None]:
# 设置参数
epochs = 50

dense_layer = 2
layer_size = 128
lstm_layer = 3
log_dir = '/content/logs/2-128-3/' + data_name +'/'

batch_sizes = [32, 64, 128, 256, 512]

In [None]:
# 建立和训练模型
for batch_size in batch_sizes:
  NAME = log_dir + batch_size
  tensorboard = TensorBoard(log_dir=NAME)
  print(NAME)
  
  model = Sequential()
  model.add(Embedding(input_dim=vocab_size, output_dim=10, input_length=max_length-1))
  
  for l in range(lstm_layer - 1):
    model.add(LSTM(layer_size, return_sequences=True))
  model.add(LSTM(layer_size, return_sequences=False))
  
  for l in range(dense_layer - 1):
    model.add(Dense(units=layer_size, activation='relu'))
  model.add(Dense(vocab_size, activation='softmax'))

  model.summary()
  
  adam = optimizers.Adam(lr = 0.01)
  model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])
  
  model.fit(X_train, y_train, 
            validation_data=(X_val, y_val), 
            epochs=epochs, 
            batch_size=batch_size, 
            verbose=1,
            callbacks=[tensorboard])
  loss, accuracy = model.evaluate(self.X_test, self.y_test, verbose=0)
  print("Model Accuracy: %.2f%%, Loss: %.2f" % (accuracy * 100, loss))

# 需要的命令

## 解压文件

In [1]:
!unzip /content/myspace.zip

Archive:  /content/myspace.zip
  inflating: myspacey.txt            
  inflating: myspacex.txt            


In [2]:
!unzip /content/phpbb.zip

Archive:  /content/phpbb.zip
  inflating: phpbby.txt              
  inflating: phpbbx.txt              


In [3]:
!unzip /content/rockyou.zip

Archive:  /content/rockyou.zip
  inflating: rockyouy.txt            
  inflating: rockyoux.txt            


## 使用tensorboard

In [None]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir logs