# Подключение диска и установка keras.tunes

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
pip install -U keras-tuner

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting keras-tuner
  Downloading keras_tuner-1.3.5-py3-none-any.whl (176 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m176.1/176.1 kB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting kt-legacy
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.3.5 kt-legacy-1.0.5


# Инициализация

In [3]:
%tensorflow_version 2.x
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, MaxPooling1D, Dropout, LSTM, Bidirectional, SpatialDropout1D, Flatten
from tensorflow.keras import utils
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras import utils
from tensorflow import keras
from kerastuner.tuners import RandomSearch, Hyperband, BayesianOptimization
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

Colab only includes TensorFlow 2.x; %tensorflow_version has no effect.


  from kerastuner.tuners import RandomSearch, Hyperband, BayesianOptimization


In [4]:
# Максимальное количество слов
num_words = 10000
# Максимальная длина экшена
max_action_len = 50
#Число классов
num_classes = 15

# Загрузка тренировочных данных

In [5]:
train = pd.read_csv('drive/MyDrive/actionsNPCDataSet/trainActions.csv',header=None,
names=['class', 'text'])

In [6]:
#Получение экшенов
actions = train['text']

In [7]:
y_train = utils.to_categorical(train['class'] - 1, num_classes)

# Токенизация

In [8]:
tokenizer = Tokenizer(num_words=num_words)

In [9]:
#Обучение токенайзера
tokenizer.fit_on_texts(actions)

In [10]:
train_sequences = tokenizer.texts_to_sequences(actions)

In [11]:
x_train = pad_sequences(train_sequences, maxlen=max_action_len)

# Оптимизация гиперпараметров

In [12]:
def build_model(hp):
    activation_choice = hp.Choice('activation', values=['sigmoid', 'tanh', 'relu', 'elu', 'selu'])
    optimizer_choice = hp.Choice('optimizer', values=['SGD', 'adam', 'adagrad', 'adadelta', 'rmsprop'])
    layers_num = hp.Int('layers_num', 1, 4)
    neurons_num = hp.Int('neurons_num', min_value=128, max_value=1024, step=32)
    embedding_num = hp.Int('embedding_num', min_value=4, max_value=128, step=16)
    embeddings_regularizer = keras.regularizers.l2(
        hp.Choice('embeddings_regularizer', values=[1e-2, 1e-3, 1e-4], default=1e-3))
    kernel_regularizer = keras.regularizers.l2(hp.Choice('kernel_regularizer', values=[1e-2, 1e-3, 1e-4], default=1e-3))

    model = Sequential()
    model.add(Embedding(input_dim=num_words, output_dim=embedding_num, input_length=max_action_len,
                        embeddings_regularizer=embeddings_regularizer))
    for i in range(layers_num):
        model.add(Dense(units=neurons_num, activation=activation_choice, kernel_regularizer=kernel_regularizer))
    model.add(Flatten())
    model.add(Dense(num_classes, activation='softmax'))

    model.compile(optimizer=optimizer_choice, loss='categorical_crossentropy', metrics=['accuracy'])

    return model

In [13]:
tuner = BayesianOptimization(
    build_model,
    objective='val_accuracy',
    max_trials=75,
    directory='fnn_actions_dir'
)

In [14]:
tuner.search_space_summary()

Search space summary
Default search space size: 7
activation (Choice)
{'default': 'sigmoid', 'conditions': [], 'values': ['sigmoid', 'tanh', 'relu', 'elu', 'selu'], 'ordered': False}
optimizer (Choice)
{'default': 'SGD', 'conditions': [], 'values': ['SGD', 'adam', 'adagrad', 'adadelta', 'rmsprop'], 'ordered': False}
layers_num (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 4, 'step': 1, 'sampling': 'linear'}
neurons_num (Int)
{'default': None, 'conditions': [], 'min_value': 128, 'max_value': 1024, 'step': 32, 'sampling': 'linear'}
embedding_num (Int)
{'default': None, 'conditions': [], 'min_value': 4, 'max_value': 128, 'step': 16, 'sampling': 'linear'}
embeddings_regularizer (Choice)
{'default': 0.001, 'conditions': [], 'values': [0.01, 0.001, 0.0001], 'ordered': True}
kernel_regularizer (Choice)
{'default': 0.001, 'conditions': [], 'values': [0.01, 0.001, 0.0001], 'ordered': True}


In [15]:
tuner.search(x_train, y_train, batch_size=128, epochs=5, validation_split=0.2)

Trial 75 Complete [00h 00m 04s]
val_accuracy: 0.967399001121521

Best val_accuracy So Far: 0.9808646440505981
Total elapsed time: 00h 09m 00s


In [16]:
tuner.results_summary(num_trials=74)

Results summary
Results in fnn_actions_dir/untitled_project
Showing 74 best trials
Objective(name="val_accuracy", direction="max")

Trial 26 summary
Hyperparameters:
activation: tanh
optimizer: adam
layers_num: 1
neurons_num: 288
embedding_num: 100
embeddings_regularizer: 0.0001
kernel_regularizer: 0.0001
Score: 0.9808646440505981

Trial 34 summary
Hyperparameters:
activation: tanh
optimizer: adam
layers_num: 1
neurons_num: 288
embedding_num: 116
embeddings_regularizer: 0.0001
kernel_regularizer: 0.0001
Score: 0.9808646440505981

Trial 35 summary
Hyperparameters:
activation: tanh
optimizer: adam
layers_num: 1
neurons_num: 288
embedding_num: 116
embeddings_regularizer: 0.0001
kernel_regularizer: 0.0001
Score: 0.9787384867668152

Trial 28 summary
Hyperparameters:
activation: tanh
optimizer: adam
layers_num: 1
neurons_num: 256
embedding_num: 116
embeddings_regularizer: 0.0001
kernel_regularizer: 0.0001
Score: 0.9780297875404358

Trial 12 summary
Hyperparameters:
activation: tanh
optimizer

# Загрузка тестовых данных

In [17]:
test_actions = pd.read_csv('drive/MyDrive/actionsNPCDataSet/actionsTest.csv',header=None,
names=['class', 'text'])

In [18]:
#Получение экшенов
test_actions = train['text']

In [19]:
y_test = utils.to_categorical(train['class'] - 1, num_classes)

# Токенизация

In [20]:
test_sequences = tokenizer.texts_to_sequences(test_actions)

In [21]:
x_test = pad_sequences(test_sequences, maxlen=max_action_len)

# Проверка лучшей модели

In [22]:
model = tuner.get_best_models(num_models=1)

In [23]:
model[0].evaluate(x_test, y_test)



[0.09117404371500015, 0.9958871006965637]

# Сохранение модели

In [24]:
model[0].save('drive/MyDrive/actionsNPCDataSet/actionsFNNModel.h5')