In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np

# エンコーダの定義
class Encoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers=3):
        super(Encoder, self).__init__()

        self.layers = nn.ModuleList()
        self.layers.append(nn.Linear(input_dim, hidden_dim[0]))
        for i in range(num_layers - 2):
            self.layers.append(nn.Linear(hidden_dim[i], hidden_dim[i + 1]))
        self.layers.append(nn.Linear(hidden_dim[-1], output_dim))

    # def forward(self, x):
    #     for layer in self.layers[:-1]:
    #         x = torch.relu(layer(x))
    #     x = self.layers[-1](x)
    #     return x

    def forward(self, x):
        for layer in self.layers:
            x = torch.relu(layer(x))
        return x

# データ拡張（ノイズの追加）
def data_augmentation(x):
    noise = torch.randn_like(x) * 0.05  # ノイズの強度は調整可能
    return x + noise

# 対照損失関数（NT-Xent Loss）
def nt_xent_loss(z_i, z_j, temperature=0.5):
    z_i = F.normalize(z_i, dim=1)
    z_j = F.normalize(z_j, dim=1)
    representations = torch.cat([z_i, z_j], dim=0)
    similarity_matrix = torch.matmul(representations, representations.T) / temperature

    # ラベルの作成
    batch_size = z_i.size(0)
    labels = torch.arange(batch_size).to(z_i.device)
    labels = torch.cat([labels, labels], dim=0)

    # 対角成分を無視するマスク
    mask = ~torch.eye(2 * batch_size, dtype=torch.bool).to(z_i.device)
    similarity_matrix = similarity_matrix.masked_select(mask).view(2 * batch_size, -1)

    loss = F.cross_entropy(similarity_matrix, labels)
    return loss

# 行動の評価（例:環境でのシミュレーション）
def evaluate_behavior(agent):
    # ここでは簡単な2次元行動記述子を仮定し、ランダムに生成
    behavior_descriptor = np.random.rand(2)  # 2次元の行動記述子
    fitness = np.random.rand()  # 適応度も仮定
    return behavior_descriptor, fitness

# 新奇性の計算
def calculate_novelty(behavior, random_encoder, learned_encoder):
    with torch.no_grad():
        behavior_tensor = torch.tensor(behavior, dtype=torch.float32).unsqueeze(0)
        rand_embedding = random_encoder(behavior_tensor)
        learned_embedding = learned_encoder(behavior_tensor)
    novelty = torch.norm(rand_embedding - learned_embedding, p=2).item()  # ユークリッド距離
    return novelty

# エージェントのクラス
class Agent:
    def __init__(self):
        self.behavior = None
        self.fitness = 0
        self.novelty = 0

# 世代の進化
def evolve_population(population, random_encoder, learned_encoder, generations=50, population_size=100, offspring_size=50, novelty_threshold=1.0, learning_rate=1e-2):
    optimizer_random = optim.Adam(random_encoder.parameters(), lr=learning_rate)
    optimizer_learned = optim.Adam(learned_encoder.parameters(), lr=learning_rate)

    for generation in range(generations):
        print(f"Generation {generation + 1}")

        # 各個体の行動と新奇性を評価
        for agent in population:
            agent.behavior, agent.fitness = evaluate_behavior(agent)
            agent.novelty = calculate_novelty(agent.behavior, random_encoder, learned_encoder)

        # 新奇性に基づく選択（新奇性が高い順に選択）
        population = sorted(population, key=lambda x: x.novelty, reverse=True)
        survivors = population[:population_size]  # 上位個体を次世代に選択

        # 新奇な個体を追加
        offsprings = [Agent() for _ in range(offspring_size)]
        for offspring in offsprings:
            offspring.behavior, offspring.fitness = evaluate_behavior(offspring)
            offspring.novelty = calculate_novelty(offspring.behavior, random_encoder, learned_encoder)

        population = survivors + offsprings

        # エンコーダを訓練
        behaviors = [agent.behavior for agent in population]
        behaviors = torch.tensor(behaviors, dtype=torch.float32)

        # データ拡張（2つのビューを生成）
        x_i = data_augmentation(behaviors)
        x_j = data_augmentation(behaviors)

        # ランダムエンコーダの訓練（対照学習）
        optimizer_random.zero_grad()
        z_i = random_encoder(x_i)
        z_j = random_encoder(x_j)
        loss_random = nt_xent_loss(z_i, z_j)
        loss_random.backward()
        optimizer_random.step()

        # 学習エンコーダの訓練（MSEロスを維持）
        optimizer_learned.zero_grad()
        rand_embeddings = random_encoder(behaviors)
        learned_embeddings = learned_encoder(behaviors)
        loss_learned = torch.nn.functional.mse_loss(learned_embeddings, rand_embeddings)
        loss_learned.backward()
        optimizer_learned.step()

        print(f"Best novelty: {population[0].novelty:.4f}, Loss Random: {loss_random.item():.4f}, Loss Learned: {loss_learned.item():.4f}")

    return population

# メイン
if __name__ == "__main__":
    # 行動空間次元、隠れ層次元、エンコーダ出力次元の設定
    input_dim = 2  # 行動記述子の次元（ここでは2次元）
    hidden_dim_rand = [16, 16]
    hidden_dim_learned = [10, 12, 14, 16]
    output_dim = 2  # 埋め込み空間の次元

    # ランダムエンコーダと学習エンコーダを初期化
    random_encoder = Encoder(input_dim, hidden_dim_rand, output_dim, num_layers=3)
    learned_encoder = Encoder(input_dim, hidden_dim_learned, output_dim, num_layers=5)

    # ランダムエンコーダのパラメータを学習可能に設定（変更点）

    # 初期個体群の作成
    population_size = 100
    population = [Agent() for _ in range(population_size)]

    # BR-NSアルゴリズムの実行
    evolved_population = evolve_population(
        population, random_encoder, learned_encoder,
        generations=100, population_size=100, offspring_size=50
    )


Generation 1
Best novelty: 0.4353, Loss Random: 5.7002, Loss Learned: 0.0516
Generation 2
Best novelty: 0.3474, Loss Random: 5.6961, Loss Learned: 0.0380
Generation 3
Best novelty: 0.3057, Loss Random: 5.6820, Loss Learned: 0.0315
Generation 4
Best novelty: 0.2791, Loss Random: 5.6512, Loss Learned: 0.0264
Generation 5
Best novelty: 0.2597, Loss Random: 5.6207, Loss Learned: 0.0210
Generation 6
Best novelty: 0.2386, Loss Random: 5.5468, Loss Learned: 0.0159
Generation 7
Best novelty: 0.2274, Loss Random: 5.4794, Loss Learned: 0.0137
Generation 8
Best novelty: 0.2132, Loss Random: 5.5251, Loss Learned: 0.0126
Generation 9
Best novelty: 0.2151, Loss Random: 5.4893, Loss Learned: 0.0129
Generation 10
Best novelty: 0.2161, Loss Random: 5.5070, Loss Learned: 0.0145
Generation 11
Best novelty: 0.2323, Loss Random: 5.5838, Loss Learned: 0.0164
Generation 12
Best novelty: 0.2607, Loss Random: 5.5786, Loss Learned: 0.0162
Generation 13
Best novelty: 0.2718, Loss Random: 5.5603, Loss Learned: 0.