In [2]:
import torch
import numpy as np
import pandas as pd
import os
from agent.ppo import PPO
from environment.env import PMSP
from cfg_local import Configure

cfg = Configure()
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print('Current Device:',device)
rule_weight = {100: {"ATCS": [2.730, 1.153], "COVERT": 6.8},
               200: {"ATCS": [3.519, 1.252], "COVERT": 4.4},
               400: {"ATCS": [3.338, 1.209], "COVERT": 3.9}}

weight_tard = 0.5
weight_setup = 1 - weight_tard
optim = cfg.optim
learning_rate = cfg.lr
K_epoch = cfg.K_epoch
T_horizon = cfg.T_horizon
num_episode = cfg.n_episode
num_job = cfg.num_job
num_m = cfg.num_machine
keyword = "F4_debug"




Current Device: cuda


<All keys matched successfully>

In [7]:

env = PMSP(num_job=num_job, num_m=num_m, reward_weight=[weight_tard, weight_setup],
                   rule_weight=rule_weight[num_job])
agent = PPO(cfg, env.state_dim, env.action_dim, optimizer_name=optim, K_epoch=K_epoch).to(device)

modelpath = 'output/F4_1K_lr_0.0001_K_1_T_1_2/5_5/model/episode-500.pt'
checkpoint = torch.load(modelpath)
agent.load_state_dict(checkpoint["model_state_dict"])
state = env.reset()
r_epi = 0.0
done = False
action_list = [0, 0, 0, 0]
mapping = {0: "SSPT", 1: "ATCS", 2: "MDD", 3: "COVERT"}
while not done:
    logit = agent.pi(torch.from_numpy(state).float().to(device))
    prob = torch.softmax(logit, dim=-1)

    action = torch.argmax(prob).item()
    next_state, reward, done = env.step(action)
    action_list[action] += 1
    print(round(env.sim_env.now,3), '\t| PPO chose ', mapping[action])
    state = next_state
    if done:
        tardiness = env.monitor.tardiness / env.num_job
        setup = env.monitor.setup / env.num_job
        makespan = env.sink.makespan
        break

for i in range(4):
    print(mapping[i],':\t', action_list[i])

0.785 	| PPO chose  ATCS
5.325 	| PPO chose  ATCS
8.641 	| PPO chose  ATCS
14.375 	| PPO chose  ATCS
18.46 	| PPO chose  ATCS
21.183 	| PPO chose  ATCS
24.924 	| PPO chose  ATCS
30.624 	| PPO chose  ATCS
33.384 	| PPO chose  ATCS
34.089 	| PPO chose  ATCS
38.065 	| PPO chose  ATCS
39.831 	| PPO chose  ATCS
44.235 	| PPO chose  ATCS
45.486 	| PPO chose  ATCS
51.807 	| PPO chose  ATCS
51.958 	| PPO chose  ATCS
55.454 	| PPO chose  ATCS
60.287 	| PPO chose  ATCS
63.439 	| PPO chose  ATCS
65.72 	| PPO chose  ATCS
68.479 	| PPO chose  ATCS
78.043 	| PPO chose  ATCS
82.828 	| PPO chose  ATCS
83.396 	| PPO chose  ATCS
86.168 	| PPO chose  ATCS
88.013 	| PPO chose  ATCS
88.06 	| PPO chose  ATCS
93.905 	| PPO chose  ATCS
94.204 	| PPO chose  ATCS
97.515 	| PPO chose  ATCS
102.797 	| PPO chose  ATCS
104.746 	| PPO chose  ATCS
105.262 	| PPO chose  ATCS
105.375 	| PPO chose  ATCS
110.58 	| PPO chose  ATCS
115.941 	| PPO chose  ATCS
125.938 	| PPO chose  ATCS
126.076 	| PPO chose  ATCS
128.074 	| 

In [11]:

env = PMSP(num_job=num_job, num_m=num_m, reward_weight=[weight_tard, weight_setup],
                   rule_weight=rule_weight[num_job])
agent = PPO(cfg, env.state_dim, env.action_dim, optimizer_name=optim, K_epoch=K_epoch).to(device)

modelpath = 'output/F4_1K_lr_0.0001_K_1_T_1_2/5_5/model/episode-1.pt'
checkpoint = torch.load(modelpath)
agent.load_state_dict(checkpoint["model_state_dict"])
state = env.reset()
r_epi = 0.0
done = False
action_list = [0, 0, 0, 0]
mapping = {0: "SSPT", 1: "ATCS", 2: "MDD", 3: "COVERT"}
while not done:
    logit = agent.pi(torch.from_numpy(state).float().to(device))
    prob = torch.softmax(logit, dim=-1)

    action = torch.argmax(prob).item()
    next_state, reward, done = env.step(action)
    action_list[action] += 1
    print(round(env.sim_env.now,3) , '\t| PPO chose ', mapping[action])
    state = next_state
    if done:
        tardiness = env.monitor.tardiness / env.num_job
        setup = env.monitor.setup / env.num_job
        makespan = env.sink.makespan
        break
        
print()
print(modelpath)
for i in range(4):
    print(mapping[i],':\t', action_list[i])

0.799 	| PPO chose  MDD
1.459 	| PPO chose  MDD
4.665 	| PPO chose  MDD
4.759 	| PPO chose  MDD
15.816 	| PPO chose  MDD
16.468 	| PPO chose  MDD
17.471 	| PPO chose  MDD
17.93 	| PPO chose  MDD
18.884 	| PPO chose  MDD
34.203 	| PPO chose  MDD
34.343 	| PPO chose  MDD
37.172 	| PPO chose  MDD
39.849 	| PPO chose  MDD
40.465 	| PPO chose  MDD
48.348 	| PPO chose  MDD
52.021 	| PPO chose  MDD
52.318 	| PPO chose  MDD
55.102 	| PPO chose  MDD
59.137 	| PPO chose  MDD
65.141 	| PPO chose  MDD
68.478 	| PPO chose  MDD
68.667 	| PPO chose  MDD
70.484 	| PPO chose  MDD
76.258 	| PPO chose  MDD
80.961 	| PPO chose  MDD
87.89 	| PPO chose  MDD
88.535 	| PPO chose  MDD
89.775 	| PPO chose  MDD
91.673 	| PPO chose  MDD
94.813 	| PPO chose  MDD
98.856 	| PPO chose  MDD
105.045 	| PPO chose  MDD
107.499 	| PPO chose  MDD
109.51 	| PPO chose  MDD
110.526 	| PPO chose  MDD
113.567 	| PPO chose  MDD
115.925 	| PPO chose  MDD
117.001 	| PPO chose  MDD
121.838 	| PPO chose  MDD
126.309 	| PPO chose  MD

In [10]:

env = PMSP(num_job=num_job, num_m=num_m, reward_weight=[weight_tard, weight_setup],
                   rule_weight=rule_weight[num_job])
agent = PPO(cfg, env.state_dim, env.action_dim, optimizer_name=optim, K_epoch=K_epoch).to(device)

modelpath = 'output/F4_1K_lr_0.0001_K_1_T_1_2/5_5/model/episode-200.pt'
checkpoint = torch.load(modelpath)
agent.load_state_dict(checkpoint["model_state_dict"])
state = env.reset()
r_epi = 0.0
done = False
action_list = [0, 0, 0, 0]
mapping = {0: "SSPT", 1: "ATCS", 2: "MDD", 3: "COVERT"}
while not done:
    logit = agent.pi(torch.from_numpy(state).float().to(device))
    prob = torch.softmax(logit, dim=-1)

    action = torch.argmax(prob).item()
    next_state, reward, done = env.step(action)
    action_list[action] += 1
    print(round(env.sim_env.now,3) , '\t| PPO chose ', mapping[action])
    state = next_state
    if done:
        tardiness = env.monitor.tardiness / env.num_job
        setup = env.monitor.setup / env.num_job
        makespan = env.sink.makespan
        break
        
print()
print(modelpath)
for i in range(4):
    print(mapping[i],':\t', action_list[i])

1.794 	| PPO chose  ATCS
3.473 	| PPO chose  ATCS
4.78 	| PPO chose  ATCS
7.485 	| PPO chose  ATCS
13.085 	| PPO chose  ATCS
20.968 	| PPO chose  ATCS
22.132 	| PPO chose  ATCS
26.865 	| PPO chose  ATCS
32.201 	| PPO chose  ATCS
35.03 	| PPO chose  ATCS
36.733 	| PPO chose  ATCS
37.781 	| PPO chose  ATCS
45.348 	| PPO chose  ATCS
45.597 	| PPO chose  ATCS
48.636 	| PPO chose  ATCS
50.215 	| PPO chose  ATCS
57.453 	| PPO chose  ATCS
68.006 	| PPO chose  ATCS
75.137 	| PPO chose  ATCS
78.968 	| PPO chose  ATCS
79.085 	| PPO chose  ATCS
79.358 	| PPO chose  ATCS
83.023 	| PPO chose  ATCS
93.892 	| PPO chose  ATCS
95.784 	| PPO chose  ATCS
99.896 	| PPO chose  ATCS
101.132 	| PPO chose  ATCS
102.843 	| PPO chose  ATCS
110.311 	| PPO chose  ATCS
112.681 	| PPO chose  ATCS
113.697 	| PPO chose  ATCS
116.884 	| PPO chose  ATCS
119.896 	| PPO chose  ATCS
123.388 	| PPO chose  ATCS
125.291 	| PPO chose  ATCS
129.194 	| PPO chose  ATCS
130.66 	| PPO chose  ATCS
133.315 	| PPO chose  ATCS
134.141

In [25]:
def tround(t, decimal):
    f = t.clone()
    f = f.tolist()
    return [round(f[i],decimal) for i in range(len(f))]

tround(prob,3)

[0.002, 0.992, 0.005, 0.001]

In [14]:
import torch
import numpy as np
import pandas as pd
import os
from agent.ppo import PPO
from environment.env import PMSP
from cfg_local import Configure


def tround(t, decimal):
    f = t.clone()
    f = f.tolist()
    return [round(f[i],decimal) for i in range(len(f))]

def nround(t, decimal):
    f = t.tolist()
    return [round(f[i],decimal) for i in range(len(f))]

tround(prob,3)
cfg = Configure()
weight_tard = 0.5
weight_setup = 1 - weight_tard
optim = cfg.optim
learning_rate = cfg.lr
K_epoch = cfg.K_epoch
T_horizon = cfg.T_horizon
num_episode = cfg.n_episode
num_job = cfg.num_job
num_m = cfg.num_machine
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print('Current Device:',device)
rule_weight = {100: {"ATCS": [2.730, 1.153], "COVERT": 6.8},
               200: {"ATCS": [3.519, 1.252], "COVERT": 4.4},
               400: {"ATCS": [3.338, 1.209], "COVERT": 3.9}}


env = PMSP(num_job=num_job, num_m=num_m, reward_weight=[weight_tard, weight_setup],
                   rule_weight=rule_weight[num_job])
agent = PPO(cfg, env.state_dim, env.action_dim, optimizer_name=optim, K_epoch=K_epoch).to(device)

modelpath = 'output/F4_1K_lr_0.0001_K_1_T_1_2/5_5/model/episode-400.pt'
checkpoint = torch.load(modelpath)
agent.load_state_dict(checkpoint["model_state_dict"])
state = env.reset()
r_epi = 0.0
done = False
action_list = [0, 0, 0, 0]
mapping = {0: "SSPT", 1: "ATCS", 2: "MDD", 3: "COVERT"}
while not done:
    logit = agent.pi(torch.from_numpy(state).float().to(device))
    prob = torch.softmax(logit, dim=-1)
    """
    f_1 = np.zeros(self.num_m)  # Setup -> 현재 라인의 셋업 값과 같은 셋업인 job의 수
    f_2 = np.zeros(4)  # Due Date -> Tardiness level for non-setup
    f_3 = np.zeros(4)  # Due Date -> Tardiness level for setup
    f_4 = np.zeros(self.num_m)  # General Info -> 각 라인의 progress rate
    """
    action = torch.argmax(prob).item()
    next_state, reward, done = env.step(action)
    action_list[action] += 1
    
    
    # 남은 job이 20개 미만으로 떨어지면
    if env.n_route > 60: 
        print('\tQueue:\t',[env.input_queue[i].feature for i in range(len(env.input_queue))])
        print('\tCalling line:',env.calling_line.name, '\tSetting:\t',env.calling_setting)
        print('\t\tM0:\t',env.model['Machine 0'].setup)
        print('\t\tM1:\t',env.model['Machine 1'].setup)
        print('\t\tM2:\t',env.model['Machine 2'].setup)
        print('\t\tM3:\t',env.model['Machine 3'].setup)
        print('\t\tM4:\t',env.model['Machine 4'].setup)
        print('\tf1 : ',nround(state[:env.num_m],4))
        print('\tf2 : ',nround(state[env.num_m:env.num_m+4],4))
        print('\tf3 : ',nround(state[env.num_m+4:env.num_m+8],4))
        print('\tf4 : ',nround(state[env.num_m+8:],4))
    print(round(env.sim_env.now,3) , '\t|Probability:', tround(prob,3))
    print(round(env.sim_env.now,3) , '\t| PPO chose ', mapping[action])
    print()
    state = next_state
    if done:
        tardiness = env.monitor.tardiness / env.num_job
        setup = env.monitor.setup / env.num_job
        makespan = env.sink.makespan
        break
        
print()
print(modelpath)
for i in range(4):
    print(mapping[i],':\t', action_list[i])
print('Tardiness:', round(tardiness,3))
print('setup:', round(setup,3))
print('makespan:', round(makespan,3))

Current Device: cuda
0.37 	|Probability: [0.002, 0.993, 0.005, 0.001]
0.37 	| PPO chose  ATCS
6.021 	|Probability: [0.001, 0.995, 0.004, 0.001]
6.021 	| PPO chose  ATCS
9.333 	|Probability: [0.001, 0.994, 0.004, 0.001]
9.333 	| PPO chose  ATCS
11.644 	|Probability: [0.001, 0.994, 0.004, 0.001]
11.644 	| PPO chose  ATCS

13.957 	|Probability: [0.001, 0.995, 0.004, 0.001]
13.957 	| PPO chose  ATCS

20.696 	|Probability: [0.002, 0.993, 0.004, 0.001]
20.696 	| PPO chose  ATCS

28.369 	|Probability: [0.002, 0.993, 0.005, 0.001]
28.369 	| PPO chose  ATCS

31.606 	|Probability: [0.002, 0.993, 0.005, 0.001]
31.606 	| PPO chose  ATCS

37.452 	|Probability: [0.002, 0.992, 0.005, 0.001]
37.452 	| PPO chose  ATCS

38.647 	|Probability: [0.002, 0.993, 0.005, 0.001]
38.647 	| PPO chose  ATCS

39.077 	|Probability: [0.001, 0.995, 0.003, 0.0]
39.077 	| PPO chose  ATCS

44.845 	|Probability: [0.002, 0.992, 0.005, 0.001]
44.845 	| PPO chose  ATCS

45.195 	|Probability: [0.002, 0.992, 0.005, 0.001]
45.19

In [20]:
prob.round(decimals=3)

tensor([0.0020, 0.9930, 0.0050, 0.0010], device='cuda:0',
       grad_fn=<RoundBackward1>)