# 贪婪算法的不同和优化

## 最高奖励的结果

In [7]:
from typing import List

from core import RLEnv, GreedyAgent
from algorithms import greedy_normal, epsilon_greedy, epsilon_decreasing_greedy
from train import train

SEED = 42

env = RLEnv(machine_count=10, seed=SEED)
nomal_greedy_agent = GreedyAgent(name="normal_greedy", env=env, greedy_algorithm=greedy_normal, seed=SEED)
epsilon_greedy_agent = GreedyAgent(name="epsilon_greedy", env=env, greedy_algorithm=epsilon_greedy, seed=SEED)
epsilon_decreasing_greedy_agent = GreedyAgent(name="epsilon_decreasing_greedy", env=env, greedy_algorithm=epsilon_decreasing_greedy, seed=SEED)

agents: List[GreedyAgent] = [
    nomal_greedy_agent,
    epsilon_greedy_agent,
    epsilon_decreasing_greedy_agent
]

for agent in agents:
    train(agent, episodes=1000)

Name: normal_greedy 
Total rewards: 77 
Rewards per machine: Rewards(values=[77, 0, 0, 0, 0, 0, 0, 0, 0, 0], counts=[1000, 0, 0, 0, 0, 0, 0, 0, 0, 0])
--------------------------------------------------
Name: epsilon_greedy 
Total rewards: 134 
Rewards per machine: Rewards(values=[81, 2, 3, 6, 4, 6, 7, 10, 11, 4], counts=[916, 8, 8, 11, 8, 10, 11, 12, 12, 4])
--------------------------------------------------
当前 epsilon 已经降到 0.5 了， 回合：138
Name: epsilon_decreasing_greedy 
Total rewards: 759 
Rewards per machine: Rewards(values=[2, 5, 7, 9, 7, 12, 12, 15, 678, 12], counts=[17, 19, 18, 22, 14, 20, 18, 22, 838, 12])
Final epsilon: 0.0100
--------------------------------------------------


## 最高平均回报率的结果

In [8]:
from typing import List

from core import RLEnv, GreedyAgent
from algorithms import greedy_average, epsilon_average, epsilon_decreasing_average
from train import train

SEED = 42

env = RLEnv(machine_count=10, seed=SEED)
nomal_greedy_agent = GreedyAgent(name="greedy_average", env=env, greedy_algorithm=greedy_average, seed=SEED)
epsilon_greedy_agent = GreedyAgent(name="epsilon_average", env=env, greedy_algorithm=epsilon_average, seed=SEED)
epsilon_decreasing_greedy_agent = GreedyAgent(name="epsilon_decreasing_average", env=env, greedy_algorithm=epsilon_decreasing_average, seed=SEED)

agents: List[GreedyAgent] = [
    nomal_greedy_agent,
    epsilon_greedy_agent,
    epsilon_decreasing_greedy_agent
]

for agent in agents:
    train(agent, episodes=1000)

Name: greedy_average 
Total rewards: 77 
Rewards per machine: Rewards(values=[77, 0, 0, 0, 0, 0, 0, 0, 0, 0], counts=[1000, 0, 0, 0, 0, 0, 0, 0, 0, 0])
--------------------------------------------------
Name: epsilon_average 
Total rewards: 847 
Rewards per machine: Rewards(values=[5, 2, 3, 6, 4, 6, 7, 33, 34, 747], counts=[36, 8, 8, 11, 8, 10, 11, 44, 40, 824])
--------------------------------------------------
当前 epsilon 已经降到 0.5 了， 回合：138
Name: epsilon_decreasing_average 
Total rewards: 847 
Rewards per machine: Rewards(values=[2, 5, 7, 9, 9, 12, 13, 24, 47, 719], counts=[17, 19, 18, 22, 17, 21, 19, 31, 55, 781])
--------------------------------------------------
