In [1]:
# 必要なライブラリのインポート
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras import layers, models
from keras.callbacks import TensorBoard
from keras.layers import Normalization
from keras.layers import PositionEmbedding

In [3]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 6990200809883498685
 xla_global_id: -1,
 name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 6750236672
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 12651911330773846834
 physical_device_desc: "device: 0, name: NVIDIA GeForce GTX 1080, pci bus id: 0000:01:00.0, compute capability: 6.1"
 xla_global_id: 416903419]

In [5]:
# パラメータの設定
EMBED_DIM = 256
NUM_HEADS = 8
FF_DIM = 4 * EMBED_DIM
BATCH_SIZE = 1024
STEPS_PER_EPOCH = 10
PATIENCE = 40
EPOCHS = 10
l2_reg = 0.001

In [6]:
def preprocess_data(dataset):
    # 特徴量とラベルの選択・前処理
    data = dataset.iloc[:, list(range(4, 12)) + [29] + list(range(19, 27))]
    label = dataset[['x_2', 'y_2']]

    return data, label

In [7]:
# データセットの読み込み
train_dataset = pd.read_csv(R'C:/Users/sukegawa/Desktop/study/datasets/tinvfp/tinvfp_train0.csv')
logdir = R'C:/Users/sukegawa/Desktop/study/logs/test'
modelname = R'C:/Users/sukegawa/Desktop/study/model/test'

In [8]:
# 現在のエポック数,損失の表示
class CustomMetricsCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        print(f"Epoch {epoch + 1}/{self.params['epochs']} - LOSS: {logs['loss']:.4f} - Val_LOSS: {logs['val_loss']:.4f}")

In [10]:
# MultiHeadSelfAttentionクラス
class MultiHeadSelfAttention(layers.Layer):
    def __init__(self, embed_dim, num_heads):
        super(MultiHeadSelfAttention, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        if embed_dim % num_heads != 0:
            raise ValueError(f"embedding dimension = {embed_dim} should be divisible by number of heads = {num_heads}")
        self.projection_dim = embed_dim // num_heads
        self.query_dense = layers.Dense(embed_dim)
        self.key_dense = layers.Dense(embed_dim)
        self.value_dense = layers.Dense(embed_dim)
        self.combine_heads = layers.Dense(embed_dim)

    def attention(self, query, key, value):
        score = tf.matmul(query, key, transpose_b=True)
        dim_key = tf.cast(tf.shape(key)[-1], tf.float32)
        scaled_score = score / tf.math.sqrt(dim_key)
        weights = tf.nn.softmax(scaled_score, axis=-1)
        output = tf.matmul(weights, value)
        return output, weights

    def separate_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, inputs):
        batch_size = tf.shape(inputs)[0]
        query = self.query_dense(inputs)
        key = self.key_dense(inputs)
        value = self.value_dense(inputs)
        query = self.separate_heads(query, batch_size)
        key = self.separate_heads(key, batch_size)
        value = self.separate_heads(value, batch_size)
        attention, _ = self.attention(query, key, value)
        attention = tf.transpose(attention, perm=[0, 2, 1, 3])
        concat_attention = tf.reshape(attention, (batch_size, -1, self.embed_dim))
        output = self.combine_heads(concat_attention)
        return output

# TransformerBlockクラス
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadSelfAttention(embed_dim, num_heads)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu", kernel_regularizer=keras.regularizers.l2(l2_reg)),
             layers.Dense(embed_dim, kernel_regularizer=keras.regularizers.l2(l2_reg))]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(0.1)
        self.dropout2 = layers.Dropout(0.1)

    def call(self, inputs, training):
        attn_output = self.att(inputs, training=training)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

# PositionEmbeddingクラス
class PositionEmbedding(layers.Layer):
    def __init__(self, sequence_length, embed_dim):
        super(PositionEmbedding, self).__init__()
        self.pos_emb = layers.Embedding(input_dim=sequence_length, output_dim=embed_dim)
        self.embed_dim = embed_dim

    def call(self, x):
        positions = tf.range(start=0, limit=tf.shape(x)[-1], delta=1)
        positions = self.pos_emb(positions)
        x = tf.cast(x, tf.float32)
        x = x[..., tf.newaxis]
        x = tf.tile(x, [1, 1, self.embed_dim])
        return x + positions

# TransformerModelクラス
class TransformerModel(models.Model):
    def __init__(self, sequence_length, embed_dim, num_heads, ff_dim, normalizer):
        super(TransformerModel, self).__init__()
        self.normalizer = normalizer
        self.pos_emb = PositionEmbedding(sequence_length, embed_dim)
        self.transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
        self.global_average_pooling = layers.GlobalAveragePooling1D()
        self.dropout = layers.Dropout(0.1)
        self.dense1 = layers.Dense(20, activation="relu", kernel_regularizer=keras.regularizers.l2(l2_reg))
        self.dense2 = layers.Dense(2, kernel_regularizer=keras.regularizers.l2(l2_reg))  # 2次元座標の出力

    def call(self, inputs):
        x = self.normalizer(inputs)
        x = self.pos_emb(x)
        x = self.transformer_block(x)
        x = self.global_average_pooling(x)
        x = self.dropout(x)
        x = self.dense1(x)
        x = self.dropout(x)
        return self.dense2(x)

In [12]:
class TRANSTrainer:
    def __init__(self):
        self.embed_dim = EMBED_DIM
        self.num_heads = NUM_HEADS
        self.ff_dim = FF_DIM
        self.batch_size = BATCH_SIZE
        self.steps_per_epoch = STEPS_PER_EPOCH
        self.patience = PATIENCE
        self.epochs = EPOCHS

    def setupnormalizer(self, data):
        # 標準化レイヤーの設定
        normalizer = Normalization(axis=-1)
        normalizer.adapt(np.array(data))
        return normalizer

    def build_model(self, data, normalizer):
        # 学習率スケジューリング
        lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
            0.001,
            decay_steps=STEPS_PER_EPOCH*1000,
            decay_rate=0.9,
            staircase=False)

        # データの長さを指定
        sequence_length = data.shape[1]

        # モデルの構築
        model = TransformerModel(sequence_length, self.embed_dim, self.num_heads, self.ff_dim, normalizer)
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule), loss="mean_squared_error", metrics=["mean_squared_error"])
        return model

    def train(self, train_dataset):
        data, label = preprocess_data(train_dataset)

        normalizer = self.setupnormalizer(data)

        model = self.build_model(data, normalizer)

        if not model.built:
            sample_input = data[:1]  # 最初の1サンプルを使用
            model(sample_input)
        

        # Tensorboardの設定
        log_dir = logdir    # ログディレクトリの指定
        tensorboard = TensorBoard(log_dir=log_dir, histogram_freq=1)

        # モデルのサマリーを表示
        model.summary()

        model.fit(
            data,
            label,
            validation_split=0.2,
            batch_size=self.batch_size,
            epochs=self.epochs,
            callbacks=[
                tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=self.patience),
                CustomMetricsCallback(),
            ],
            verbose=0
        )

        # モデルの保存
        model.save(modelname)


In [13]:
# TRANSTrainerインスタンスの作成と訓練の実行
trainer = TRANSTrainer()
trainer.train(train_dataset)

Model: "transformer_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 normalization (Normalizatio  multiple                 35        
 n)                                                              
                                                                 
 position_embedding (Positio  multiple                 4352      
 nEmbedding)                                                     
                                                                 
 transformer_block (Transfor  multiple                 789760    
 merBlock)                                                       
                                                                 
 global_average_pooling1d (G  multiple                 0         
 lobalAveragePooling1D)                                          
                                                                 
 dropout_2 (Dropout)         multiple            



INFO:tensorflow:Assets written to: C:/Users/sukegawa/Desktop/study/model/test\assets


INFO:tensorflow:Assets written to: C:/Users/sukegawa/Desktop/study/model/test\assets
