In [None]:
import os
import gymnasium as gym

os.environ["JULIA_NUM_THREADS"] = "8"
from julia import Julia
jl = Julia(compiled_modules=False)

from julia import Main
print(Main.eval("Threads.nthreads()"))
from model.VIV_gym import JuliaEnv

"""

Lib支持

"""
import numpy as np
import torch.nn as nn
from gymnasium.wrappers import RescaleAction
from stable_baselines3 import PPO
from stable_baselines3 import SAC
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback, CallbackList

# dir videos
video_folder = "./videos/"
os.makedirs(video_folder, exist_ok=True)

In [None]:
"""

反馈reward和建立checkpoint

"""
class RewardLoggerCallback(BaseCallback):
    def __init__(self, verbose=0):
        super().__init__(verbose)
        self.episode_rewards = []
        self.current_rewards = None
        self.episode_steps = []          # 存每个 episode 的 step 数
        self.current_steps = None

    def _on_training_start(self) -> None:
        self.current_rewards = np.zeros(self.training_env.num_envs)
        self.current_steps = np.zeros(self.training_env.num_envs, dtype=int)

    def _on_step(self) -> bool:
        rewards = self.locals["rewards"]
        dones = self.locals["dones"]
        self.current_rewards += rewards
        self.current_steps += 1   # 每个 step 累加


        for i, done in enumerate(dones):
            if done:
                self.episode_rewards.append(self.current_rewards[i])
                self.episode_steps.append(self.current_steps[i])  # 记录步数

                print(f"Episode finished after {self.current_steps[i]} steps")
                print(f"Episode reward: {self.current_rewards[i]:.2f}")
                # reset
                self.current_rewards[i] = 0.0
                self.current_steps[i] = 0

        return True

checkpoint_callback = CheckpointCallback(
    save_freq= 1000,
    save_path="./checkpoints/",
    name_prefix="ppo_model",
    save_replay_buffer=True,
    save_vecnormalize=True
)

In [None]:
"""

训练用参数(VIV)

"""
diameter = 16
def pos_generator():
    return [0.0, np.random.uniform(- diameter/6, diameter/6)]

# static parameters
statics = {
    "L_unit": diameter,
    "action_scale": 0.5,
    "size": [10, 8],
    "location": [3, 4]
}
#variable parameters
variables = {
    "position":[0.0, diameter/6],
    "velocity":[0.0, 0.0]
}
# size of action sapce and observation spaces
spaces = {
    "action":1,
    "observation":3
}

from model.VIV_gym import VIVEnv

In [None]:
"""

训练用参数(FOIL)

"""
diameter = 16

# static parameters
statics = {
    "L_unit": diameter,
    "F_scale": 10,
    "size": [8, 6],
    "nose": [1, 4],
    "rot_center":[0.25,0]
}
#variable parameters
variables = {
    "position":[0.0, 0.0],
    "velocity":[0.0, 0.0],
    "theta":0.05 * np.pi,
    "rot_vel": 0.0,
    "rot_acc": 0.0
}
# size of action sapce and observation spaces
spaces = {
    "action":1,
    "observation":5
}

from model.VIV_gym import FoilEnv

In [None]:
"""

训练用参数(Drag)

"""
diameter = 48

def ksi_generator():
    return np.random.uniform(3.0, 4.0)

# static parameters
statics = {
    "L_unit": diameter,
    "F_scale": 8,
    "L_ratio": 0.15,
    "L_gap":0.05,
    "location": [2, 0],
    "size": [6, 2]
}
# variable parameters
variables = {
    "ksi": ksi_generator
}
# size of action sapce and observation spaces
spaces = {
    "action":1,
    "observation":2
}

from model.VIV_gym import DragEnv

In [None]:
"""

单线程环境建立，训练，保持

"""
env = DummyVecEnv([lambda: JuliaEnv(render_mode="rgb_array", env = VIVEnv, max_episode_steps=2000, statics = statics, 
                                    variables = variables, spaces = spaces, verbose=1)])

model = SAC(
    "MlpPolicy",
    env=env,
    verbose=1,
    device = 'cpu'
)
reward_callback = RewardLoggerCallback()
callback = CallbackList([checkpoint_callback, reward_callback])
model.learn(total_timesteps=150_000, callback = callback)
model.save("SAC_model")
rewards = np.array(reward_callback.episode_rewards)
env.close()

In [None]:
"""

单线程环境建立，训练，保持(注册表)

"""
env = DummyVecEnv([lambda: gym.make("VIV-v0")])

model = SAC(
    "MlpPolicy",
    env=env,
    verbose=1,
    device = 'cpu'
)
reward_callback = RewardLoggerCallback()
callback = CallbackList([checkpoint_callback, reward_callback])
model.learn(total_timesteps=2_000, callback = callback)
model.save("PPO_model")
rewards = np.array(reward_callback.episode_rewards)
env.close()

In [None]:
"""

多线程环境建立

"""
def make_env(rank: int, seed: int = 0):
    def _init():
        env = JuliaVIVEnv(render_mode=None, max_episode_steps=200, verbose=1)
        env.reset(seed=seed+rank)
        return env
    set_random_seed(seed)
    return _init

num_envs = 4
env = SubprocVecEnv([make_env(i) for i in range(num_envs)])
model = PPO(
    "MlpPolicy",
    env=env,
    verbose=1,
    device = 'cpu'
)
reward_callback = RewardLoggerCallback()
callback = CallbackList([checkpoint_callback, reward_callback])

model.learn(total_timesteps=20_000, callback = callback)
model.save("ppo_model")
rewards = np.array(reward_callback.episode_rewards)
env.close()

In [None]:
rewards = np.array(reward_callback.episode_rewards)

In [None]:
"""

加载checkpoint并继续训练

"""
def make_env(rank: int, seed: int = 0):
    def _init():
        env = JuliaDragEnv(render_mode=None, max_episode_steps=30, verbose=1)
        env.reset(seed=seed+rank)
        return env
    set_random_seed(seed)
    return _init

num_envs = 4
env = SubprocVecEnv([make_env(i) for i in range(num_envs)])

reward_callback = RewardLoggerCallback()
callback = CallbackList([checkpoint_callback, reward_callback])

model = PPO.load("./checkpoints/ppo_model_40000_steps", env=env, device='cpu')
model.learn(total_timesteps=20_000, callback = callback)
rewards_ex = np.array(reward_callback.episode_rewards)
rewards = [rewards, rewards_ex]
model.save("./model_stage/ppo_model_40k")
env.close()


In [None]:
"""

绘图功能

"""
import matplotlib.pyplot as plt
import numpy as np

# 参数：滑动窗口大小
window = 10

def plot_rewards(rewards, window=100):
    episode = np.arange(len(rewards))

    # 计算滑动均值和标准差
    def moving_avg(x, w):
        return np.convolve(x, np.ones(w)/w, mode='valid')

    mean = moving_avg(rewards, window)
    std = np.array([
        np.std(rewards[max(0, i - window + 1):i + 1])
        for i in range(window - 1, len(rewards))
    ])

    # 对应 x 轴
    x = np.arange(window - 1, len(rewards))

    # 绘图
    plt.figure(figsize=(12, 6))
    plt.plot(x, mean, label='Mean Reward')
    plt.fill_between(x, mean - std, mean + std, alpha=0.3, label='±1 Std Dev')
    plt.xlabel("Episode")
    plt.ylabel("Reward")
    plt.title("Episode Reward over Training")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

plot_rewards(rewards,window)

In [None]:
#训练结束后出gif

import numpy as np
import matplotlib
# matplotlib.use("Agg")
import matplotlib.pyplot as plt
from stable_baselines3 import PPO
from stable_baselines3 import SAC
from model.VIV_gym import JuliaEnv
from gif import create_GIF

infos = []

# 创建开启渲染的环境
env = JuliaEnv(render_mode="rgb_array", env = VIVEnv, max_episode_steps=2000, statics = statics, variables = variables, spaces = spaces, verbose=True)

# 加载训练好的模型
model = SAC.load("SAC_model", env=env)

# 视频帧列表
frames = []

obs, _ = env.reset()
done = False
truncated = False

while not done and not truncated:
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, done, truncated, info = env.step(action)

# 保存为GIF（也可以保存为MP4）
input_frame = "images"
output_gif = "train_policy_demo.gif"
create_GIF(input_frame, output_gif)
env.close()

np.save("info_SAC.npy", info["info"])


In [None]:
import matplotlib.pyplot as plt

info = np.load("info_SAC.npy", allow_pickle=True)
# force = info
force = [f["F"] for f in info[0:]]
y_force = [f["fluid_force_y"] for f in info[0:]]
x_force = [f["fluid_force_x"] for f in info[0:]]
y_dis = [f["y_dis"] for f in info[0:]]
x_dis = [f["x_dis"] for f in info[0:]]

# info2 = np.load("info2_SAC.npy", allow_pickle=True)
# y_dis2 = [f["y_dis"] for f in info2[0:]]


x = np.arange(len(y_force))
# x2 = np.arange(len(y_dis2))

# 画图
plt.figure(figsize=(8, 5))
# plt.plot(x, force, label="ratio", color="red")
plt.plot(x, force, label="x_force", color="red")
plt.plot(x, x_force, label="x_fluid", color="blue")
plt.plot(x, x_dis, label="x_displacement", color="green")
# plt.plot(x2, y_dis2, label="init_displacement", color="yellow")

# 图例、标签、标题
plt.xlabel("step")
plt.ylabel("force & displacement")
plt.title("Force and Displacement in x direction")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
from gif import create_GIF
# 保存为GIF（也可以保存为MP4）
input_frame = "images"
output_gif = "train_policy_demo.gif"
create_GIF(input_frame, output_gif)

In [None]:
import matplotlib.pyplot as plt

nx_no = np.load("none_train.npy")
nx_tr = np.load("train.npy")
y_cons = np.load("x_cons.npy")

x = np.arange(len(nx_no))

# 画图
plt.figure(figsize=(8, 5))
plt.plot(x, y_cons, label="Y_Constrained", color="yellow")
plt.plot(x, nx_no, label="Init_VIV", color="blue")
plt.plot(x, nx_tr, label="SAC_Trained", color="red")

# 图例、标签、标题
plt.xlabel("Step")
plt.ylabel("X - Value")
plt.title("Comparison of Displacement in X-Direction")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
from VIV_gym import VIVEnv
# ===== Register to Gym =====
gym.register(
    id="VIV-v0",
    entry_point=JuliaEnv,
    kwargs={
        "env": VIVEnv,
        "statics": {"L_unit": 16, "F_scale": 1.0, "size": (10, 8), "location": [3, 4]},  # 这里要替换成你实际的参数
        "variables": {"position":[0.0, -1.0], "velocity":[0.0, 0.0]},
        "spaces": {"action": 1, "observation": 3},
        "verbose": True
    }
)

In [None]:
import gymnasium as gym

env = gym.make("VIV-v0")
obs, info = env.reset()
for _ in range(10):
    action = env.action_space.sample()
    obs, reward, terminated, truncated, info = env.step(action)
    if terminated or truncated:
        break
env.close()