In [12]:
import torch
import json
import os
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import optuna
from torchsummary import summary
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import joblib  # スケーラーの保存用
import sys
import io
from torchviz import make_dot

In [13]:
# CPUスレッド数を設定
torch.set_num_threads(8)

In [14]:
# JSONデータの正規化と保存
def normalize_json_data(input_path, output_path):
    """データを正規化して保存"""
    try:
        with open(input_path, 'r') as f:
            data = json.load(f)
    except FileNotFoundError:
        print(f"Error: {input_path} not found.")
        return

    inputs = np.array(data["inputs"])
    outputs = np.array(data["outputs"])

    # スケーラーのインスタンス化
    input_scaler = MinMaxScaler()
    output_scaler = MinMaxScaler()

    # 入力と出力のスケーリング
    inputs_normalized = input_scaler.fit_transform(inputs)
    outputs_normalized = output_scaler.fit_transform(outputs)

    # 正規化されたデータを保存
    normalized_data = {
        "inputs": inputs_normalized.tolist(),
        "outputs": outputs_normalized.tolist()
    }

    try:
        with open(output_path, 'w') as f:
            json.dump(normalized_data, f)
    except IOError:
        print(f"Error: Unable to write to {output_path}.")
        return

    # スケーラーを保存
    joblib.dump(input_scaler, '../data/input_scaler.pkl')
    joblib.dump(output_scaler, '../data/output_scaler.pkl')
    print("Data normalization completed and scalers saved.")

In [15]:
# カスタムデータセットクラス
class CustomDataset(Dataset):
    def __init__(self, json_path):
        with open(json_path, 'r') as f:
            data = json.load(f)
        self.inputs = data["inputs"]
        self.outputs = data["outputs"]

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        x = torch.tensor(self.inputs[idx], dtype=torch.float32)
        y = torch.tensor(self.outputs[idx], dtype=torch.float32)
        return x, y

# データローダーの生成
def get_dataloader(json_path, batch_size=16):
    dataset = CustomDataset(json_path)
    return DataLoader(dataset, batch_size=batch_size, shuffle=True)


In [16]:
# モデル定義
class SimpleModel(nn.Module):
    def __init__(self, input_dim=3, output_dim=2):
        super(SimpleModel, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.Linear(256, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, output_dim)
        )

    def forward(self, x):
        return self.network(x)

In [17]:
# モデルサマリーの表示
def display_model_summary(model):
    """モデルのサマリーを表示し、ファイルに保存"""
    stdout_backup = sys.stdout  # 標準出力をバックアップ
    sys.stdout = io.StringIO()  # 新しい出力先を設定

    summary(model, (1, 3))

    # サマリーの内容を取得
    summary_str = sys.stdout.getvalue()

    # 標準出力を元に戻す
    sys.stdout = stdout_backup

    # サマリーをファイルに保存
    with open("../data/model_summary.txt", "w") as f:
        f.write(summary_str)

# ダミーの入力テンソルを作成 (バッチサイズ1)
x = torch.randn(1, 3)  # 例: 入力が3次元のデータ

# モデルを通して予測を取得
model_show = SimpleModel()  # モデルインスタンスの作成
y = model_show(x)

# 計算グラフを作成
dot = make_dot(y, params=dict(model_show.named_parameters()))

# PNG形式で保存
dot.format = "png"
dot.render("../images/simple_model_graph")  # "simple_model_graph.png" として保存

'../images/simple_model_graph.png'

In [None]:
# モデルのトレーニング
def train_model(trial):
    """Optunaによるハイパーパラメータ最適化のためのトレーニング"""
    # ベイズ最適化でハイパーパラメータを探索
    lr = trial.suggest_loguniform('lr', 1e-6, 1e-1)
    batch_size = trial.suggest_int('batch_size', 4, 64)

    model = SimpleModel()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    dataloader = get_dataloader('../data/data_normalized.json', batch_size=batch_size)

    model.train()
    best_loss = float('inf')
    best_model = None

    num_epochs = 10
    progress_bar = tqdm(range(num_epochs), desc="Training Progress", unit="epoch")
    for epoch in progress_bar:
        epoch_loss = 0
        for x, y in dataloader:
            optimizer.zero_grad()
            y_pred = model(x)
            loss = criterion(y_pred, y)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

        avg_loss = epoch_loss / len(dataloader)
        if avg_loss < best_loss:
            best_loss = avg_loss
            best_model = model.state_dict()

        progress_bar.set_description(f"Epoch {epoch+1} | Loss: {avg_loss:.6f}")

    if not os.path.exists('../results'):
        os.makedirs('../results')
    torch.save(best_model, '../results/best_model.pth')

    return best_loss

In [19]:
# Optunaのベイズ最適化
def perform_bayesian_optimization():
    """Optunaによるベイズ最適化の実行"""
    study = optuna.create_study(direction='minimize')
    n_trials = 10
    progress_bar = tqdm(total=n_trials, desc="Bayesian Optimization Progress", unit="trial")

    def callback(study, trial):
        progress_bar.set_description(f"Trial {trial.number+1} | Best Loss: {study.best_value:.6f}")
        progress_bar.update(1)

    study.optimize(train_model, n_trials=n_trials, callbacks=[callback])

    progress_bar.close()
    print("Best Hyperparameters:", study.best_params)
    print("Best Loss:", study.best_value)

    return study.best_params

In [20]:
# 推論用関数
def generate_output(input_data, model_path='../results/best_model.pth'):
    """入力データに基づいて推論を生成"""
    model = SimpleModel()
    model.load_state_dict(torch.load(model_path))
    model.eval()

    input_scaler = joblib.load('../data/input_scaler.pkl')
    output_scaler = joblib.load('../data/output_scaler.pkl')

    input_data_scaled = input_scaler.transform([input_data])
    input_tensor = torch.tensor(input_data_scaled, dtype=torch.float32)

    with torch.no_grad():
        output_scaled = model(input_tensor)
        output = output_scaler.inverse_transform(output_scaled.numpy())

    return output[0]

In [21]:
if __name__ == '__main__':
    # JSONデータの正規化
    normalize_json_data('../data/data.json', '../data/data_normalized.json')

    # Optunaによるベイズ最適化の実行
    best_params = perform_bayesian_optimization()
    print("Optimization completed. Best parameters:", best_params)

[I 2024-11-26 19:25:48,595] A new study created in memory with name: no-name-6a2dbd1b-4078-43e8-ac34-7062fb185acd


Data normalization completed and scalers saved.


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)
Epoch 10 | Loss: 0.061074: 100%|██████████| 10/10 [00:00<00:00, 150.40epoch/s]
[I 2024-11-26 19:25:48,680] Trial 0 finished with value: 0.05787527933716774 and parameters: {'lr': 0.00457259129635686, 'batch_size': 55}. Best is trial 0 with value: 0.05787527933716774.
Epoch 10 | Loss: 0.359010: 100%|██████████| 10/10 [00:00<00:00, 151.50epoch/s]
[I 2024-11-26 19:25:48,752] Trial 1 finished with value: 0.3589381277561188 and parameters: {'lr': 1.725002522690253e-05, 'batch_size': 62}. Best is trial 0 with value: 0.05787527933716774.
Epoch 10 | Loss: 0.705803: 100%|██████████| 10/10 [00:00<00:00, 151.20epoch/s]s]
[I 2024-11-26 19:25:48,826] Trial 2 finished with value: 0.10866852104663849 and parameters: {'lr': 0.05605553213501442, 'batch_size': 33}. Best is trial 0 with value: 0.05787527933716774.
Epoch 10 | Loss: 0.312286: 100%|██████████| 10/10 [00:00<00:00, 197.35epoch/s]s]
[I 2024-11-26 19:25:48,886] Trial 3 finished with value: 0.312

Best Hyperparameters: {'lr': 0.00457259129635686, 'batch_size': 55}
Best Loss: 0.05787527933716774
Optimization completed. Best parameters: {'lr': 0.00457259129635686, 'batch_size': 55}





In [22]:
if __name__ == '__main__':
    # 推論テスト
    sample_input = [73, 22, 22]
    output = generate_output(sample_input)
    print(f"Generated Output: {output}")

  model.load_state_dict(torch.load(model_path))


Generated Output: [3.5542031e-04 4.1440835e+03]
