In [1]:
from pypetri.elements import *
from pypetri.petri_net import *
from pypetri.example_nets import *
import math

import numpy as np
import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.distributions import Categorical
import matplotlib.pyplot as plt
import collections
import random

from tqdm import tqdm


In [2]:
class ReplayBuffer():
    def __init__(self, capacity):
        self.buffer = collections.deque(maxlen=capacity)

    def __len__(self):
        return len(self.buffer)
    
    def add(self, state, action, reward, next_state, done):
        self.buffer.append((state, action, reward, next_state, done))
        
    def sample(self, batch_size):
        transitions = random.sample(self.buffer, batch_size)
        state, action, reward, next_state, done = zip(*transitions)
        return np.array(state), action, reward, np.array(next_state), done
    
    def size(self):
        return len(self.buffer)
        

In [3]:
class P2P(nn.Module):
    def __init__(self, dim_in, dim_out, adj_pt, f=F.leaky_relu):
        super(P2P, self).__init__()
        self.weight = nn.Parameter(torch.Tensor(dim_out, dim_in).float())
        self.bias = nn.Parameter(torch.Tensor(dim_out).float())
        self.f = f
        self.adj_pt = adj_pt
        self.pm = torch.matmul(self.adj_pt, self.adj_pt.t())
        self.batch_norm = nn.BatchNorm1d(dim_out)
        self.reset_parameters()
    
    def reset_parameters(self):
        nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5))
        if self.bias is not None:
            fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in)
            nn.init.uniform_(self.bias, -bound, bound)
    
    def forward(self, x):
        x = torch.matmul(self.pm, x)
        diag = self.pm.diagonal().unsqueeze(0).unsqueeze(2)
        x = x / diag
        x = torch.matmul(x, self.weight.t()) + self.bias.unsqueeze(0)
        
        batch_size, Np, dim_out = x.size()
        x = x.view(-1, dim_out)
        x = self.batch_norm(x)
        x = x.view(batch_size, Np, dim_out) 
        
        x = self.f(x)
        return x
    
class T2T(nn.Module):
    def __init__(self, dim_in, dim_out, adj_pt, f=F.leaky_relu):
        super(T2T, self).__init__()
        self.weight = nn.Parameter(torch.Tensor(dim_out, dim_in).float())
        self.bias = nn.Parameter(torch.Tensor(dim_out).float())
        self.f = f
        self.adj_pt = adj_pt
        self.pm = torch.matmul(self.adj_pt.t(), self.adj_pt)
        self.batch_norm = nn.BatchNorm1d(dim_out)
        self.reset_parameters()
    
    def reset_parameters(self):
        nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5))
        if self.bias is not None:
            fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in)
            nn.init.uniform_(self.bias, -bound, bound)
    
    def forward(self, x):
        x = torch.matmul(self.pm, x)
        
        diag = self.pm.diagonal()
        diag[diag == 0] = 1
        diag = diag.unsqueeze(0).unsqueeze(2)
        x = x / diag
        x = torch.matmul(x, self.weight.t()) + self.bias.unsqueeze(0)
        
        batch_size, Np, dim_out = x.size()
        x = x.view(-1, dim_out)
        x = self.batch_norm(x)
        x = x.view(batch_size, Np, dim_out) 
        
        x = self.f(x)
        return x
    
class T2P(nn.Module):
    def __init__(self, dim_in, dim_out, adj_pt, f=F.leaky_relu, add_bias=True):
        super(T2P, self).__init__()
        self.adj_pt = adj_pt
        self.f = f
        self.weight = nn.Parameter(torch.Tensor(dim_out, dim_in).float())
        self.bias = nn.Parameter(torch.Tensor(dim_out).float())
        self.add_bias = add_bias
        self.reset_parameters()
    
    def reset_parameters(self):
        nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5))
        if self.bias is not None:
            fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in)
            nn.init.uniform_(self.bias, -bound, bound)
            
    def forward(self, x):
        # print(x)
        x = torch.matmul(self.adj_pt, x)
        # print(x)
        x = torch.matmul(x, self.weight.t())
        # print(x)
        if self.add_bias:
            x = x + self.bias.unsqueeze(0)
        x = self.f(x)
        return x
        
class P2T(nn.Module):
    def __init__(self, dim_in, dim_out, adj_pt, f=F.leaky_relu, add_bias=True):
        super(P2T, self).__init__()
        self.adj_pt = adj_pt
        self.f = f
        self.weight = nn.Parameter(torch.Tensor(dim_out, dim_in).float())
        self.bias = nn.Parameter(torch.Tensor(dim_out).float())
        self.add_bias = add_bias
        self.reset_parameters()
        
    def reset_parameters(self):
        nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5))
        if self.bias is not None:
            fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in)
            nn.init.uniform_(self.bias, -bound, bound)
        
    def forward(self, x):
        x = torch.matmul(self.adj_pt.t(), x)
        x = torch.matmul(x, self.weight.t())
        if self.add_bias:
            x = x + self.bias.unsqueeze(0)
        x = self.f(x)
        return x
        

In [4]:
class GCPN_layer(torch.nn.Module):
    def __init__(self, lp_in, lt_in, lp_out, lt_out, adj_matrix):
        super(GCPN_layer, self).__init__()
        self.P2P = P2P(lp_in, lp_out, adj_matrix)
        self.T2T = T2T(lt_in, lt_out, adj_matrix)
        self.P2T = P2T(lp_out, lt_out, adj_matrix)
        self.T2P = T2P(lt_out, lp_out, adj_matrix)
        
    def forward(self, p, t):
        p1 = self.P2P(p)
        t1 = self.T2T(t)
        
        dt = self.P2T(p1)
        # print(t1)
        t2 = t1 + dt
        # print(t2)
        dp = self.T2P(t2)
        # print(dp)
        p2 = p1 + dp
        # print(p2)
        return p2, t2

In [5]:
class GPNQNet(torch.nn.Module):
    def __init__(self, lp0, lt0, adj_matrix, device):
        super(GPNQNet, self).__init__()
        self.adj_matrix = torch.from_numpy(adj_matrix).float().to(device)
        
        self.gcpn_1 = GCPN_layer(lp0, lt0, 8, 8, self.adj_matrix)
        self.gcpn_2 = GCPN_layer(8, 8, 16, 16, self.adj_matrix)
        self.gcpn_3 = GCPN_layer(16, 16, 64, 64, self.adj_matrix)
        self.gcpn_4 = GCPN_layer(64, 64, 16, 16, self.adj_matrix)
        self.gcpn_5 = GCPN_layer(16, 16, 1, 1, self.adj_matrix)
        

    def forward(self, p, t):
        if isinstance(p, np.ndarray):
            p = torch.from_numpy(p).float()
        if isinstance(t, np.ndarray):
            t = torch.from_numpy(t).float()
        p1, t1 = self.gcpn_1(p, t)
        p2, t2 = self.gcpn_2(p1, t1)
        p3, t3 = self.gcpn_3(p2, t2)
        p4, t4 = self.gcpn_4(p3, t3)
        _, tf = self.gcpn_5(p4, t4)
        
        return tf
        

In [6]:
class DQN:
    def __init__(self, lp, lt, action_dim, adj_matrix, learning_rate, gamma, epsilon, target_update, device):
        self.lp = lp
        self.lt = lt
        self.action_dim = action_dim
        self.q_net = GPNQNet(lp, lt, adj_matrix, device).to(device)
        self.target_q_net = GPNQNet(lp, lt, adj_matrix, device).to(device)
        self.optimizer = torch.optim.Adam(self.q_net.parameters(), lr=learning_rate)
        self.gamma = gamma
        self.epsilon = epsilon
        self.target_update = target_update
        self.device = device
        self.count = 0
        
    def take_action(self, state):
        if np.random.random() < self.epsilon:
            action = np.random.randint(0, self.action_dim)
        else:
            state_p = state[0]
            state_l = state[1]
            state_p = torch.tensor([state_p], dtype=torch.float).to(self.device)
            state_l = torch.tensor([state_l], dtype=torch.float).to(self.device)
            action = self.q_net(state_p, state_l).argmax().item()
        return action
        
    def update(self, transition_dict):
        # print(transition_dict['states'][:, 0])
        p_states = torch.tensor([transition_dict['states'][:, 0]], dtype=torch.float).squeeze(0).to(self.device)
        t_states = torch.tensor([transition_dict['states'][:, 1]], dtype=torch.float).squeeze(0).to(self.device)
        
        actions = torch.tensor([transition_dict['actions']]).view(-1, 1).unsqueeze(-1).to(self.device)
        rewards = torch.tensor([transition_dict['rewards']], dtype=torch.float).view(-1, 1).to(self.device)
        
        next_p_states = torch.tensor([transition_dict['next_states'][:, 0]], dtype=torch.float).squeeze(0).to(self.device)
        next_t_states = torch.tensor([transition_dict['next_states'][:, 1]], dtype=torch.float).squeeze(0).to(self.device)
        dones = torch.tensor([transition_dict['dones']], dtype=torch.float).view(-1, 1).to(self.device)

        q_values = self.q_net(p_states, t_states)
        q_values = q_values.gather(1, actions)
        max_next_q_values = self.target_q_net(next_p_states, next_t_states).max(1)[0].view(-1, 1)
        q_targets = rewards + self.gamma * max_next_q_values * (1 - dones)
        loss = torch.mean(F.mse_loss(q_values, q_targets))
        
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        
        if self.count % self.target_update == 0:
            self.target_q_net.load_state_dict(self.q_net.state_dict())
        self.count += 1

In [7]:

lr = 2e-3
gamma = 0.98
epsilon = 0.05
num_episode = 500
target_update = 10
buffer_size = 10000
minimal_size = 500
batch_size = 64

efm_net = EmptyNet('efm_net')
initial_file_path = 'initial_file/neural_petri_net.csv'
# initial_file_path = 'initial_file/test_net_2.csv'
efm_net.init_by_csv(initial_file_path)
efm_net.set_dt(1)

lp = efm_net.get_state()[0].shape[1]
lt = efm_net.get_state()[1].shape[1]
action_dim = efm_net.get_action_space()

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

# random.seed(0)
# np.random.seed(0)
# torch.manual_seed(0)
replay_buffer = ReplayBuffer(buffer_size)
agent = DQN(lp, lt, action_dim, efm_net.get_adj_matrix(), lr, gamma, epsilon, target_update, device)

cuda


In [8]:
state = efm_net.get_state()
action = agent.take_action(state)
state_p = state[0]
state_l = state[1]
state_p = torch.tensor([state_p], dtype=torch.float).to(device)
state_l = torch.tensor([state_l], dtype=torch.float).to(device)

  state_p = torch.tensor([state_p], dtype=torch.float).to(self.device)


In [9]:
adj_matrix = efm_net.get_adj_matrix()
adj_matrix = torch.from_numpy(adj_matrix).float().to(device)
test_layer = GCPN_layer(3, 3, 8, 8, adj_matrix).to(device)
p1, t1 = test_layer(state_p, state_l)
# print(p1, t1)

In [10]:

output = agent.q_net(state_p, state_l)
print(output, action)

tensor([[[ 1.0487],
         [ 1.0487],
         [ 1.0487],
         [ 1.0487],
         [ 1.0487],
         [ 1.0487],
         [ 1.0487],
         [ 1.0487],
         [ 1.0487],
         [ 1.0487],
         [ 1.0487],
         [ 1.0487],
         [ 1.0487],
         [ 1.0487],
         [ 1.0487],
         [ 1.0487],
         [ 1.0487],
         [ 1.0487],
         [ 1.0487],
         [ 1.0487],
         [ 1.0487],
         [ 1.0487],
         [ 1.0487],
         [ 1.0487],
         [ 1.0487],
         [-0.0206],
         [-0.0206],
         [-0.0206],
         [-0.0206],
         [-0.0206],
         [-0.0206],
         [-0.0206],
         [-0.0206],
         [-0.0206],
         [-0.0206],
         [-0.0206],
         [-0.0206],
         [-0.0206],
         [-0.0206],
         [-0.0206],
         [-0.0206],
         [-0.0206],
         [-0.0206],
         [-0.0206],
         [-0.0206],
         [-0.0206],
         [-0.0206],
         [-0.0206],
         [-0.0206],
         [-0.0206],
