In [1]:
# ライブラリのインポート
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras import layers, models
from keras.callbacks import TensorBoard, EarlyStopping
from keras.layers import Normalization

In [2]:
# Transformerブロック
class TransformerBlock(layers.Layer):
    # 初期化
    def __init__(self, input_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=input_dim)
        self.ffn = keras.Sequential([
            layers.Dense(ff_dim, activation="relu"),
            layers.Dense(input_dim),
        ])
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    # 呼び出し
    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)                      # マルチヘッドアテンション層
        attn_output = self.dropout1(attn_output, training=training) # ドロップアウト
        out1 = self.layernorm1(inputs + attn_output)                # レイヤー正規化
        ffn_output = self.ffn(out1)                                 # フィードフォワードネットワーク層
        ffn_output = self.dropout2(ffn_output, training=training)   # ドロップアウト
        return self.layernorm2(out1 + ffn_output)                   # レイヤー正規化

# Transformerモデル
class TransformerModel(models.Model):
    # 初期化
    def __init__(self, config):
        super(TransformerModel, self).__init__()
        self.config = config
        self.normalizer = Normalization(axis=-1)
        self.transformer_blocks = [
            TransformerBlock(config['input_dim'], config['num_heads'], config['ff_dim'], config['dropout_rate'])
            for _ in range(config['num_transformer_blocks'])
        ]
        self.global_average_pooling = layers.GlobalAveragePooling1D()
        self.dropout = layers.Dropout(config['dropout_rate'])
        self.dense_layers = [layers.Dense(units, activation="relu") for units in config['dense_units']]
        self.output_layer = layers.Dense(2)

    # 呼び出し
    def call(self, inputs):
        x = self.normalizer(inputs)                         # 訓練データの正規化
        for transformer_block in self.transformer_blocks:   # Transformerブロック
            x = transformer_block(x)
        x = self.global_average_pooling(x)                  # 1次元に
        x = self.dropout(x)                                 # ドロップアウト
        for dense_layer in self.dense_layers:               # 全結合層
            x = dense_layer(x)
            x = self.dropout(x)
        return self.output_layer(x)                         # 出力層(2次元)

In [3]:
# 訓練
class TransformerTrainer:
    # 初期化
    def __init__(self, config):
        self.config = config
        self.model = None

    # データの前処理
    def preprocess_data(self, dataset):
        data = dataset.iloc[:, list(range(4, 12)) + [14] + list(range(19, 27))]
        label = dataset[['x_2', 'y_2']]
        return data.values.reshape(-1, 1, 17), label.values

    # モデルのビルドとコンパイル
    def build_model(self, input_shape):
        self.model = TransformerModel(self.config)
        self.model.build(input_shape)
        self.model.summary()
        
        # 学習率のスケジューリング
        lr_schedule = keras.optimizers.schedules.ExponentialDecay(
            self.config['initial_learning_rate'],
            decay_steps=self.config['decay_steps'],
            decay_rate=self.config['decay_rate'],
            staircase=True)

        optimizer = keras.optimizers.Adam(learning_rate=lr_schedule)
        self.model.compile(optimizer=optimizer, loss="mean_squared_error", metrics=["accuracy"])

    # 訓練
    def train(self, train_dataset, validation_split=0.2):
        # 訓練データの前処理
        data, label = self.preprocess_data(train_dataset)
        
        # サマリーの表示
        if self.model is None:
            self.build_model(data.shape)
        else:
            self.model.summary()
        
        # 訓練データの正規化
        self.model.normalizer.adapt(data)
        
        # コールバック関数の定義
        callbacks = [
            EarlyStopping(monitor='val_loss', patience=self.config['patience']),
            TensorBoard(log_dir=self.config['log_dir'])
        ]

        # データの保存
        history = self.model.fit(
            data, label,
            validation_split=validation_split,
            batch_size=self.config['batch_size'],
            epochs=self.config['epochs'],
            callbacks=callbacks,
            verbose=1
        )
        
        self.model.save(self.config['model_path'])
        return history

    # 予測
    def predict(self, data):
        preprocessed_data, _ = self.preprocess_data(data)
        return self.model.predict(preprocessed_data)

In [4]:
# 設定
config = {
    'input_dim': 17,
    'num_heads': 4,
    'ff_dim': 68,
    'num_transformer_blocks': 2,
    'dropout_rate': 0.1,
    'dense_units': [64, 32],
    'batch_size': 1024,
    'epochs': 1000,
    'patience': 20,
    'initial_learning_rate': 1e-3,
    'decay_steps': 10000,
    'decay_rate': 0.9,
    'log_dir': R'C:/Users/sukegawa/Desktop/study/logs/2tinvfp8_1m_2',
    'model_path': R'C:/Users/sukegawa/Desktop/study/model/2tinvfp8_1m_2'
}

# 訓練データの訓練
estimator = TransformerTrainer(config)
train_dataset = pd.read_csv(R'C:/Users/sukegawa/Desktop/study/datasets/tinvfp/2tinvfp8_1m.csv')
history = estimator.train(train_dataset)

# テストデータの評価
test_data = pd.read_csv(R'C:/Users/sukegawa/Desktop/study/datasets/tinvfp/2tinvfp8_1m_test.csv')
predictions = estimator.predict(test_data)

Model: "transformer_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 normalization (Normalizatio  multiple                 35        
 n)                                                              
                                                                 
 transformer_block (Transfor  multiple                 7310      
 merBlock)                                                       
                                                                 
 transformer_block_1 (Transf  multiple                 7310      
 ormerBlock)                                                     
                                                                 
 global_average_pooling1d (G  multiple                 0         
 lobalAveragePooling1D)                                          
                                                                 
 dropout_4 (Dropout)         multiple            



INFO:tensorflow:Assets written to: C:/Users/sukegawa/Desktop/study/model/2tinvfp8_1m_2\assets


INFO:tensorflow:Assets written to: C:/Users/sukegawa/Desktop/study/model/2tinvfp8_1m_2\assets


