# install

In [2]:
!pip install stable-baselines3 gym numpy matplotlib shimmy
!pip install --upgrade torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install --upgrade stable-baselines3 gymnasium
!pip install scipy matplotlib pandas

Collecting stable-baselines3
  Downloading stable_baselines3-2.6.0-py3-none-any.whl.metadata (4.8 kB)
Collecting shimmy
  Downloading Shimmy-2.0.0-py3-none-any.whl.metadata (3.5 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3.0,>=2.3->stable-baselines3)
  Down

# Temp / Humid

In [8]:
import gym
import numpy as np
import pandas as pd
from gym import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv

# 環境（使用 CSV）
class THBLDC(gym.Env):
    def __init__(self, csv_path):
        self.data = pd.read_csv(csv_path)
        self.max_steps = len(self.data)

        # 狀態空間：2 維連續數值（標準化）
        self.observation_space = spaces.Box(low=0, high=1, shape=(2,), dtype=np.float32)

        # 動作空間：0 to 1650 rpm
        self.action_space = spaces.Discrete(1651)
        self.current_step = 0

    def reset(self):
        self.current_step = 0
        return self._get_obs()

    def _get_obs(self):
        row = self.data.iloc[self.current_step]
        return np.array([
            row['Temperature'] / 100,
            row['Humidity'] / 100
        ], dtype=np.float32)

    def step(self, action):
        row = self.data.iloc[self.current_step]
        done = False
        reward = 0



        temp = row['Temperature']
        humid = row['Humidity']

        # RPM rawl
        kwh = ((1650 * (action / 1650) ** 3) * 0.1) / 1000
        tr_fan = 0.006
        save_rate = (kwh - tr_fan) / tr_fan


        # 節能效果（希望 kWh 越少越好）
        reward -= kwh  # 直接懲罰功耗

        # 溫度舒適度（22–28°C 為舒適範圍）
        reward -= abs(temp - 25) * 0.1  # 越接近 25°C 越好

        # 濕度舒適度（40–60% 為舒適範圍）
        reward -= abs(humid - 50) * 0.05  # 越接近 50% 越好

        # 若功耗比基準省電 30% 以上，額外加分
        if save_rate >= 0.3:
            reward += 0.5

        # 若風速在舒適風速區間（ex. 500~900 RPM），額外加分
        if 500 <= action <= 900:
            reward += 0.2

        # 安全風速 <= 1300 RPM
        if action <= 1300:
            reward += 0.2

        self.current_step += 1
        if self.current_step >= self.max_steps - 1:
            done = True

        return self._get_obs(), reward, done, {}

    def render(self, mode='human'):
        row = self.data.iloc[self.current_step]
        print(f"Step {self.current_step} - Temp: {row['temp']}°C, Humid: {row['humid']}%")

# === 訓練模型 ===
csv_path = "/content/temperature_humidity_log.csv"
# csv_path = "/content/sample_data/temperature_humidity_2000.csv"  # 替換為你的 CSV 檔路徑
env = DummyVecEnv([lambda: THBLDC(csv_path)])
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=50000)

# === 儲存模型 ===
model.save("ppo_model")
print("✅ 模型已儲存為 ppo_model.zip")



Using cpu device
-----------------------------
| time/              |      |
|    fps             | 592  |
|    iterations      | 1    |
|    time_elapsed    | 3    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 423         |
|    iterations           | 2           |
|    time_elapsed         | 9           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.030457307 |
|    clip_fraction        | 0.328       |
|    clip_range           | 0.2         |
|    entropy_loss         | -7.4        |
|    explained_variance   | 0.000297    |
|    learning_rate        | 0.0003      |
|    loss                 | 1.88        |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.065      |
|    value_loss           | 16.2        |
-----------------------------------------
-----------------

In [24]:
def test_model(model, env, episodes=500):
    total_rewards = []
    for ep in range(episodes):
        obs = env.reset()
        done = False
        ep_reward = 0
        while not done:
            action, _ = model.predict(obs)
            obs, reward, done, _ = env.step(action)
            ep_reward += abs(reward)
        total_rewards.append(ep_reward)
        print(f"Episode {ep+1}: Score = {abs(ep_reward)}")
    print(f"\n📊 Average Score: {np.mean(total_rewards)}")

# === 測試模型執行 ===
test_model(model, env)

Episode 1: Score = [38.79066]
Episode 2: Score = [40.368664]
Episode 3: Score = [43.55522]
Episode 4: Score = [41.380486]
Episode 5: Score = [41.456947]
Episode 6: Score = [39.39894]
Episode 7: Score = [41.37857]
Episode 8: Score = [40.042313]
Episode 9: Score = [36.746834]
Episode 10: Score = [40.385784]
Episode 11: Score = [39.08086]
Episode 12: Score = [42.406887]
Episode 13: Score = [42.829746]
Episode 14: Score = [36.438553]
Episode 15: Score = [41.181076]
Episode 16: Score = [41.56693]
Episode 17: Score = [41.987316]
Episode 18: Score = [40.3747]
Episode 19: Score = [38.032944]
Episode 20: Score = [44.949486]
Episode 21: Score = [39.888226]
Episode 22: Score = [41.351353]
Episode 23: Score = [39.60436]
Episode 24: Score = [42.872025]
Episode 25: Score = [41.38357]
Episode 26: Score = [37.01795]
Episode 27: Score = [40.106552]
Episode 28: Score = [39.089695]
Episode 29: Score = [41.850258]
Episode 30: Score = [39.27116]
Episode 31: Score = [42.340008]
Episode 32: Score = [43.64401

In [25]:
# 讀取模型
model = PPO.load("ppo_model.zip")

# 單筆資料（範例）
#  Temperature, Humidity
current_status = [26, 40]


obs = np.array([
    current_status[0] / 100,
    current_status[1] / 100
], dtype=np.float32)



  # 模型預測
action, _ = model.predict(obs, deterministic=True)


print(obs, action)

[0.26 0.4 ] 782


In [None]:
import gym
import numpy as np
import pandas as pd
from gym import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv

# 環境（使用 CSV）
class THBLDC(gym.Env):
    def __init__(self, csv_path):
        self.data = pd.read_csv(csv_path)
        self.max_steps = len(self.data)

        # 狀態空間：2 維連續數值（標準化）
        self.observation_space = spaces.Box(low=0, high=1, shape=(2,), dtype=np.float32)

        # 動作空間：0 to 1650 rpm
        self.action_space = spaces.Discrete(1651)
        self.current_step = 0

    def reset(self):
        self.current_step = 0
        return self._get_obs()

    def _get_obs(self):
        row = self.data.iloc[self.current_step]
        return np.array([
            row['Temperature'] / 100,
            row['Humidity'] / 100
        ], dtype=np.float32)

    def step(self, action):
        row = self.data.iloc[self.current_step]
        done = False
        reward = 0

        temp = row['Temperature']
        humid = row['Humidity']

        # RPM 計算電力消耗
        rpm = action
        power_watt = (rpm / 1650) ** 3 * 100  # 假設最大功率為 100W
        kwh = (power_watt * 0.1) / 1000       # 0.1 小時為每步長
        tr_fan = 0.006                        # 傳統風扇能耗為 0.006 kWh
        save_rate = (tr_fan - kwh) / tr_fan

        # 節能效果（希望 kWh 越少越好）
        reward += -kwh  # 直接懲罰功耗

        # 溫度舒適度（22–28°C 為舒適範圍）
        reward += -abs(temp - 25) * 0.1  # 越接近 25°C 越好

        # 濕度舒適度（40–60% 為舒適範圍）
        reward += -abs(humid - 50) * 0.05  # 越接近 50% 越好

        # 若功耗比基準省電 30% 以上，額外加分
        if save_rate >= 0.3:
            reward += 0.5

        # 若風速在舒適風速區間（ex. 500~900 RPM），額外加分
        if 500 <= action <= 900:
            reward += 0.2


        self.current_step += 1
        if self.current_step >= self.max_steps - 1:
            done = True

        return self._get_obs(), reward, done, {}

    def render(self, mode='human'):
        row = self.data.iloc[self.current_step]
        print(f"Step {self.current_step} - Temp: {row['Temperature (°C)']} °C, Humid: {row['Humidity (%)']} %")

# === 訓練模型 ===
csv_path = "/content/temperature_humidity_log.csv"  # 替換為你的 CSV 檔路徑
env = DummyVecEnv([lambda: THBLDC(csv_path)])
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=500000)

# === 儲存模型 ===
model.save("ppo_ups_battery_model")
print("✅ 模型已儲存為 ppo_ups_battery_model.zip")

# === 測試模型 ===
def test_model(model, env, episodes=1000):
    total_rewards = []
    for ep in range(episodes):
        obs = env.reset()
        done = False
        ep_reward = 0
        while not done:
            action, _ = model.predict(obs)
            obs, reward, done, _ = env.step(action)
            ep_reward += reward
        total_rewards.append(ep_reward)
        print(f"Episode {ep+1}: Score = {ep_reward}")
    print(f"\n📊 Average Score: {np.mean(total_rewards)}")

# === 測試模型執行 ===
test_model(model, env)



[1;30;43m串流輸出內容已截斷至最後 5000 行。[0m
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 361         |
|    iterations           | 24          |
|    time_elapsed         | 136         |
|    total_timesteps      | 49152       |
| train/                  |             |
|    approx_kl            | 0.025131298 |
|    clip_fraction        | 0.313       |
|    clip_range           | 0.2         |
|    entropy_loss         | -6.8        |
|    explained_variance   | 0.128       |
|    learning_rate        | 0.0003      |
|    loss                 | 23.1        |
|    n_updates            | 230         |
|    policy_gradient_loss | -0.0564     |
|    value_loss           | 44.5        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 361         |
|    iterations           | 25          |