# PFSPNet
[Deep Reinforcement Learning Based Optimization Algorithm for Permutation Flow-Shop Scheduling | IEEE Journals & Magazine | IEEE Xplore](https://ieeexplore.ieee.org/document/9594768/)
1) 采纳原文的思路，使用了 Transformer 作为网络骨干;
2) 采用逐步增加 n 的训练方式;
3) Critic 部分采用了.

In [1]:
import numpy as np

import torch
import torch.optim as optim
import torch.nn.functional as F

from PFSP import pfspStep, getMinCmax
from PFSPNet import default_config, PFSPNet, PFSPDataLoader

from torch.utils.tensorboard import SummaryWriter

from datetime import datetime

In [2]:
# !cd OneDrive\Study\Code\RL\PFSPNet
# !tensorboard --logdir="tb_logs"

In [10]:
class Solver():
    def __init__(self, models=None, scopes=None):
        self.models = models
        self.scopes = scopes
        
    def __call__(self, P: torch.tensor, state: torch.tensor) -> torch.tensor:
        """
        Args:
            P (torch.tensor): [batch_size, n, m_max] 带选择的下个 Job 集合
            state (torch.tensor): [batch_size, m_max] 上一个 Job 各个工序的完成时间

        Returns:
            Cmax (torch.tensor): [batch_size]
        """
        
        batch_size = P.shape[0]
        N = P.shape[1]
        
        if self.models is None:
            Cmax = getMinCmax(P, state)
            return Cmax
            
        i = 0
        model = self.models[i]

        with torch.no_grad():
            for n in range(N, 0, -1):

                if n <= 5:
                    Cmax = getMinCmax(P, state)
                    break

                #  n>5 使用模型求解
                
                if n <= self.scopes[i]:
                    i += 1
                    model = self.models[i]
                    
                probs, _ = model(P, state)
                # outputs: [batch_size]

                # 下一步最大概率
                prob, idx = probs.max(-1)
                # [batch_size]

                J = P[range(batch_size), idx, :]
                state_next = pfspStep(J, state)

                idx_01 = F.one_hot(idx, num_classes=n).view(-1)
                P_next = P.view(-1, P.shape[-1])[idx_01 == 0, :].view(-1, n - 1, P.shape[-1])
                
                P = P_next
                state = state_next

        return Cmax

In [11]:
def trainPartRandom(model, opt_actor, opt_critic, train_P, train_state, n_step, solver):
    
    P = train_P
    # P: [batch_size, n, m_max]

    state = train_state
    # state = torch.zeros_like(P[:, 0, :]).view(-1, P.shape[-1])
    # state: [batch_size, m_max]
    # 初始状态，假设存在上一个 Job ，其所有工序完成时间均为 0
    
    batch_size = P.shape[0]
    n = P.shape[1]
    
    probss = []
    
    probN = []
    baselineN = []
    # [batch_size, n_step]
    
    for i in range(n, n - n_step, -1):
        probs, baseline = model(P, state)
        # probs: [batch_size, n]
        # baselines: [batch_size]
        
        probss.append(probs)
        
        # 根据概率分布随机选取下一步
        idx = torch.multinomial(probs, num_samples=1).view(-1)
        prob = probs[range(batch_size), idx]
        # prob: [batch_size]
        
        probN.append(prob)
        baselineN.append(baseline)
        
        J = P[range(batch_size), idx, :]
        state_next = pfspStep(J, state)
        
        idx_01 = F.one_hot(idx, num_classes=i).view(-1)
        P_next = P.view(-1, P.shape[-1])[idx_01 == 0, :].view(-1, i - 1, P.shape[-1])
        
        P = P_next
        state = state_next
        
    probN = torch.stack(probN, dim=1)
    baselineN = torch.stack(baselineN, dim=1)
    
    Cmax = solver(P, state)
    
    # Train Critic
    loss = ((Cmax.detach().view(-1, 1) - baselineN)**2).sum() / batch_size
    
    opt_critic.zero_grad()
    loss.backward(retain_graph=True)
    opt_critic.step()
    
    # Train Actor
    reinforcement = ((Cmax.view(-1, 1) - baselineN).detach() * torch.log(probN)).sum() / batch_size
    
    opt_actor.zero_grad()
    reinforcement.backward()
    opt_actor.step()
    
    return Cmax, probss, baselineN

# Train

In [36]:
# problem parameters
n = 10
m = 20
n_step = 5

# train parameters
n_epoch = 200
dataset_size = 200
batch_size = 200

opt = 'adam'
lr = 1e-5

# valid parameters
validset_size = 200

# model parameters
default_config['n_layers'] = 3

In [37]:
model = PFSPNet(default_config)

In [None]:
trainLoader = PFSPDataLoader(dataset_size, batch_size, n, m)
validLoader = PFSPDataLoader(validset_size, validset_size, n, m)
trainsolver = Solver()
validsolver = Solver([model], [5])

if opt == 'adam':
    optimizer_actor = optim.Adam(model.parameters(), lr=lr)
elif opt == 'sgd':
    optimizer_actor = optim.SGD(model.parameters(), lr=lr)
    
if opt == 'adam':
    optimizer_critic = optim.Adam(model.crtic.parameters(), lr=lr)
elif opt == 'sgd':
    optimizer_critic = optim.SGD(model.crtic.parameters(), lr=lr)

if torch.cuda.is_available():
    model = model.cuda()

In [None]:
TIMESTAMP = f"{datetime.now():%Y-%m-%dT%H-%M-%S/}"
CONFIGSTAMP = f"n{n}to{n-n_step}_m{m}"

writer = SummaryWriter(log_dir="tb_logs/train_PFSPNet/" + CONFIGSTAMP + "/" + TIMESTAMP)

In [None]:
k = 0  # 训练次数

In [31]:
for epoch in range(n_epoch):

    # train
    model.train()
    for train_P, train_state in trainLoader:
        Cmax, probs, baselines = trainPartRandom(
            model, optimizer_actor, optimizer_critic, train_P, train_state, n_step, trainsolver
        )

    k += 1
    for i, prob in enumerate(probs):
        writer.add_scalar(f"max prob for {n-i} step", prob.max(dim=-1)[0].mean(), k)
    for i, baseline in enumerate(baselines.T):
        writer.add_scalar(f"baseline for {n-i} step", baseline.mean(), k)
    writer.add_scalar("Cmax", Cmax.mean(), k)

    # validation
    model.eval()
    if epoch % 20 == 0:
        for valid_P, valid_state in trainLoader:
            Cmax_valid = validsolver(valid_P, valid_state)

        writer.add_scalar("Cmax_valid", Cmax_valid.mean(), k)

In [17]:
writer.close()

In [None]:
torch.save(model.state_dict(), "models/train_PFSPNet/" + CONFIGSTAMP + "/" + TIMESTAMP)