In [1]:
%config Completer.use_jedi = False

In [2]:
import numpy as np
import torch.autograd
import time
import torch.optim as optim
import torch.nn as nn
import matplotlib.pyplot as plt
if torch.cuda.is_available():
    device = torch.device("cuda:0")  # you can continue going on here, like cuda:1 cuda:2....etc. 
    print("Running on the GPU")
else:
    device = torch.device("cpu")
    print("Running on the CPU")
import dgl
from graphenvs import HalfCheetahGraphEnv
import itertools

Running on the GPU


Using backend: pytorch


In [3]:
class Network(nn.Module):
    def __init__(
        self,
        input_size,
        output_size,
        hidden_sizes,
        with_batch_norm=False,
        activation=None
    ):
        super(Network, self).__init__()
        self.hidden_sizes = hidden_sizes
        self.input_size = input_size
        self.output_size = output_size
        
        self.layers = nn.ModuleList()

        self.layers.append(nn.Linear(self.input_size, hidden_sizes[0]))
        if with_batch_norm:
            self.layers.append(nn.LayerNorm(normalized_shape=(hidden_sizes[0])))
        self.layers.append(nn.ReLU())
        
        for i in range(len(hidden_sizes) - 1):
            self.layers.append(nn.Linear(hidden_sizes[i], hidden_sizes[i+1]))
            if with_batch_norm:
                self.layers.append(nn.LayerNorm(normalized_shape=(hidden_sizes[i+1])))
            self.layers.append(nn.ReLU())
        
        self.layers.append(nn.Linear(hidden_sizes[len(hidden_sizes) - 1], self.output_size))
        
        if activation is not None:
            self.layers.append(activation())
            
    def forward(self, x):
        out = x
        
        for layer in self.layers:
            out = layer(out)
            
        return out


In [4]:
class GraphNeuralNetwork(nn.Module):
    def __init__(
        self,
        inputNetwork,
        messageNetwork,
        updateNetwork,
        outputNetwork,
        numMessagePassingIterations,
        withInputNetwork = True
    ):
        
        super(GraphNeuralNetwork, self).__init__()
                
        self.inputNetwork = inputNetwork
        self.messageNetwork = messageNetwork
        self.updateNetwork = updateNetwork
        self.outputNetwork = outputNetwork
        
        self.numMessagePassingIterations = numMessagePassingIterations
        self.withInputNetwork = withInputNetwork
        
    def inputFunction(self, nodes):
        return {'state' : self.inputNetwork(nodes.data['input'])}
    
    def messageFunction(self, edges):
        
        batchSize = edges.src['state'].shape[1]
        edgeData = edges.data['feature'].repeat(batchSize, 1).T.unsqueeze(-1)
        nodeInput = edges.src['input']
        
        return {'m' : self.messageNetwork(torch.cat((edges.src['state'], edgeData, nodeInput), -1))}
    
    def updateFunction(self, nodes):
        return {'state': self.updateNetwork(torch.cat((nodes.data['m_hat'], nodes.data['state']), -1))}
    
    def outputFunction(self, nodes):
        
        return {'output': self.outputNetwork(nodes.data['state'])}


    def forward(self, graph, state):
        
        self.update_states_in_graph(graph, state)
        
        if self.withInputNetwork:
            graph.apply_nodes(self.inputFunction)
        
        for messagePassingIteration in range(self.numMessagePassingIterations):
            graph.update_all(self.messageFunction, dgl.function.mean('m', 'm_hat'), self.updateFunction)
        
        graph.apply_nodes(self.outputFunction)
        
        output = graph.ndata['output']
        output = output.squeeze(-1).mean(0)
                
        return output
    
    def update_states_in_graph(self, graph, state):
        if len(state.shape) == 1:
            state = state.unsqueeze(0)
        
        numGraphFeature = 6
        numGlobalStateInformation = 5
        numLocalStateInformation = 2
        numStateVar = state.shape[1] // 2
        globalInformation = torch.cat((state[:, 0:5], state[:, numStateVar:numStateVar+5]), -1)
        
        numNodes = (numStateVar - 5) // 2

        nodeData = torch.empty((numNodes, state.shape[0], numGraphFeature + 2 * numGlobalStateInformation + 2 * numLocalStateInformation)).to(device)
        for nodeIdx in range(numNodes):

            # Assign global features from graph
            nodeData[nodeIdx, :, :6] = graph.ndata['feature'][nodeIdx]
            # Assign local state information
            nodeData[nodeIdx, :, 16] = state[:, 5 + nodeIdx]
            nodeData[nodeIdx, :, 17] = state[:, 5 + numNodes + nodeIdx]
            nodeData[nodeIdx, :, 18] = state[:, numStateVar + 5 + nodeIdx]
            nodeData[nodeIdx, :, 19] = state[:, numStateVar + 5 + numNodes + nodeIdx]

        # Assdign global state information
        nodeData[:, :, 6:16] = globalInformation
        
        if self.withInputNetwork:
            graph.ndata['input'] = nodeData        
        
        else:
            graph.ndata['state'] = nodeData


In [5]:
trainingIdxs = [0, 1, 2, 3, 4, 5]

In [6]:
states = {}
actions = {}
rewards = {}
next_states = {}
dones = {}
env = {}

for morphIdx in trainingIdxs:

    prefix = '../datasets/{}/'.format(morphIdx)
    
    states[morphIdx] = np.load(prefix + 'states_array.npy')
    actions[morphIdx] = np.load(prefix + 'actions_array.npy')
    rewards[morphIdx] = np.load(prefix + 'rewards_array.npy')
    next_states[morphIdx] = np.load(prefix + 'next_states_array.npy')
    dones[morphIdx] = np.load(prefix + 'dones_array.npy')
    
    env[morphIdx] = HalfCheetahGraphEnv(None)
    env[morphIdx].set_morphology(morphIdx)

NoneType: None


None
*************************************************************************************************************


NoneType: None


None
*************************************************************************************************************


NoneType: None


None
*************************************************************************************************************


NoneType: None


None
*************************************************************************************************************


NoneType: None


None
*************************************************************************************************************
None
*************************************************************************************************************


NoneType: None


In [7]:
states_train = {}
states_test = {}
next_states_train = {}
next_states_test = {}

for morphIdx in trainingIdxs:
    permutation = np.random.permutation(states[morphIdx].shape[0])
    
    states[morphIdx] = states[morphIdx][permutation]
    next_states[morphIdx] = next_states[morphIdx][permutation]
    
    states_train[morphIdx] = torch.from_numpy(states[morphIdx][100000:]).float()
    states_test[morphIdx] = torch.from_numpy(states[morphIdx][:100000]).float()
    
    next_states_train[morphIdx] = torch.from_numpy(next_states[morphIdx][100000:]).float()
    next_states_test[morphIdx] = torch.from_numpy(next_states[morphIdx][:100000]).float()

In [8]:
hidden_sizes = [256, 256]

inputSize = 20
stateSize = 64
messageSize = 64
outputSize = 1
numMessagePassingIterations = 6
batch_size = 1024
with_batch_norm=True
numBatchesPerTrainingStep = 1

inputNetwork = Network(inputSize, stateSize, hidden_sizes, with_batch_norm)
messageNetwork = Network(stateSize + inputSize + 1, messageSize, hidden_sizes, with_batch_norm, nn.Tanh)
updateNetwork = Network(stateSize + messageSize, stateSize, hidden_sizes, with_batch_norm)
outputNetwork = Network(stateSize, outputSize, hidden_sizes, with_batch_norm, nn.Sigmoid)

gnn = GraphNeuralNetwork(inputNetwork, messageNetwork, updateNetwork, outputNetwork, numMessagePassingIterations).to(device)

In [9]:
lr = 1e-5
optimizer = optim.Adam(itertools.chain(inputNetwork.parameters(), messageNetwork.parameters(), updateNetwork.parameters(), outputNetwork.parameters())
                       , lr=lr, weight_decay=1e-5)

# lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, patience=0, verbose=True, min_lr=1e-5)
binaryLoss = nn.BCELoss()

zeroTensor = torch.zeros([batch_size]).to(device)
oneTensor = torch.ones([batch_size]).to(device)

In [10]:
numTrainingBatches = int(np.ceil(states_train[trainingIdxs[0]].shape[0] / batch_size))
numTestingBatches = int(np.ceil(states_test[trainingIdxs[0]].shape[0] / batch_size))

trainLosses = {}
testLosses = {}

for morphIdx in trainingIdxs:
    trainLosses[morphIdx] = []
    testLosses[morphIdx] = []

In [17]:
for epoch in range(10):
    
    print('Starting Epoch {}'.format(epoch))
    # Record epoch start time to calculate per epoch time
    epoch_t0 = time.time()
    
    # Randomize the order of traininig examples
    for morphIdx in trainingIdxs:
        permutation = np.random.permutation(states_train[morphIdx].shape[0])

        states_train[morphIdx] = states_train[morphIdx][permutation]
        next_states_train[morphIdx] = next_states_train[morphIdx][permutation]

    with torch.no_grad():
        
        for morphIdx in trainingIdxs:
        
            testLosses[morphIdx].append(torch.zeros(6))
            
            for batch_ in range(0, numTestingBatches-1):
                
                # Get new graphs for each iteration
                g1 = env[morphIdx].get_graph()._get_dgl_graph()
                g2 = env[morphIdx].get_graph()._get_dgl_graph()
                g3 = env[morphIdx].get_graph()._get_dgl_graph()

                current_states = states_test[morphIdx][batch_ * batch_size:(batch_+1)*batch_size]
#                 forward_states = next_states_test[morphIdx][batch_ * batch_size:(batch_+1)*batch_size]
                forward_states = current_states - next_states_test[morphIdx][batch_ * batch_size:(batch_+1)*batch_size]

                forward_x = torch.cat((current_states, forward_states), -1).to(device)
                predicted_sigmoids = gnn(g1, forward_x)
                forwardLoss = binaryLoss(predicted_sigmoids, oneTensor)
                
                # Save Forward Loss and Accuracy
                testLosses[morphIdx][-1][0] += forwardLoss.item()
                testLosses[morphIdx][-1][1] += torch.eq(oneTensor, torch.round(predicted_sigmoids)).sum().item() / float(batch_size)

                choices_range = np.arange(states_test[morphIdx].shape[0])
                random_indeces = np.random.choice(choices_range, size=current_states.shape[0])
                
#                 random_states = states_test[morphIdx][random_indeces]
                random_states = current_states - states_test[morphIdx][random_indeces]
                random_x = torch.cat((current_states, random_states), -1).to(device)
                predicted_sigmoids = gnn(g2, random_x)
                randomLoss = binaryLoss(predicted_sigmoids, zeroTensor)

                testLosses[morphIdx][-1][2] += randomLoss.item()
                testLosses[morphIdx][-1][3] += torch.eq(zeroTensor, torch.round(predicted_sigmoids)).sum().item() / float(batch_size)
                
                random_velocities = torch.from_numpy(np.random.normal(loc=0, scale=1.0, size=batch_size))
                velcocity_changed_states = torch.zeros(batch_size, current_states.shape[-1])
                velcocity_changed_states[:, 0] = -random_velocities
                velocity_changed_x = torch.cat((current_states, velcocity_changed_states), -1).to(device)
                predicted_sigmoids = gnn(g3, velocity_changed_x)
                velocity_changed_loss = binaryLoss(predicted_sigmoids, zeroTensor)

                testLosses[morphIdx][-1][4] += velocity_changed_loss.item()
                testLosses[morphIdx][-1][5] += torch.eq(zeroTensor, torch.round(predicted_sigmoids)).sum().item() / float(batch_size)

            testLosses[morphIdx][-1] /= numTestingBatches
    
    print(' ** Testing ** ')
    for morphIdx in trainingIdxs:
        print('{}: F-Loss {:.3f} | F-Acc {:.3f}| R-Loss {:.3f} | R-Acc {:.3f} | V-Loss {:.3f} | V-Acc {:.3f}'.
            format(morphIdx, testLosses[morphIdx][-1][0], testLosses[morphIdx][-1][1], testLosses[morphIdx][-1][2], testLosses[morphIdx][-1][3], testLosses[morphIdx][-1][4], testLosses[morphIdx][-1][5]))

    
    for batch in range(0, numTrainingBatches-1, numBatchesPerTrainingStep):
                
        batch_t0 = time.time()
        
        for morphIdx in trainingIdxs:
            trainLosses[morphIdx].append(torch.zeros(6))

        optimizer.zero_grad()
        
        for batchOffset in range(numBatchesPerTrainingStep):

            if batch + batchOffset >= numTrainingBatches - 1:
                break
                
            for morphIdx in trainingIdxs:
                
                # Get new graphs for each iteration
                g1 = env[morphIdx].get_graph()._get_dgl_graph()
                g2 = env[morphIdx].get_graph()._get_dgl_graph()
                g3 = env[morphIdx].get_graph()._get_dgl_graph()
                
                current_states = states_train[morphIdx][(batch + batchOffset) * batch_size:(batch + batchOffset + 1)*batch_size]
#                 forward_states = next_states_train[morphIdx][(batch + batchOffset) * batch_size:(batch + batchOffset + 1)*batch_size]
                forward_states = current_states - next_states_train[morphIdx][(batch + batchOffset) * batch_size:(batch + batchOffset + 1)*batch_size]

                forward_x = torch.cat((current_states, forward_states), -1).to(device)
                
                predicted_sigmoids = gnn(g1, forward_x)
                forwardLoss = binaryLoss(predicted_sigmoids, oneTensor)
                
                # Save Forward Loss and Accuracy
                trainLosses[morphIdx][-1][0] += forwardLoss.item()
                trainLosses[morphIdx][-1][1] += torch.eq(oneTensor, torch.round(predicted_sigmoids)).sum().item() / float(batch_size)

                (forwardLoss / numBatchesPerTrainingStep).backward()

                choices_range = np.arange(states_train[morphIdx].shape[0])
                random_indeces = np.random.choice(choices_range, size=batch_size)

#                 random_states = states_train[morphIdx][random_indeces]
                random_states = current_states - states_train[morphIdx][random_indeces]
                
                random_x = torch.cat((current_states, random_states), -1).to(device)
                
                predicted_sigmoids = gnn(g2, random_x)

                randomLoss = binaryLoss(predicted_sigmoids, zeroTensor)
                (9 * (randomLoss / numBatchesPerTrainingStep) / 10).backward()

                trainLosses[morphIdx][-1][2] += randomLoss.item()
                trainLosses[morphIdx][-1][3] += torch.eq(zeroTensor, torch.round(predicted_sigmoids)).sum().item() / float(batch_size)
                
                random_velocities = torch.from_numpy(np.random.normal(loc=0, scale=1.0, size=batch_size))
                velcocity_changed_states = torch.zeros(batch_size, current_states.shape[-1])
                velcocity_changed_states[:, 0] = -random_velocities
                velocity_changed_x = torch.cat((current_states, velcocity_changed_states), -1).to(device)
                predicted_sigmoids = gnn(g3, velocity_changed_x)
                velocity_changed_loss = binaryLoss(predicted_sigmoids, zeroTensor)
                
                ((velocity_changed_loss / numBatchesPerTrainingStep) / 10).backward()
                
                trainLosses[morphIdx][-1][4] += velocity_changed_loss.item()
                trainLosses[morphIdx][-1][5] += torch.eq(zeroTensor, torch.round(predicted_sigmoids)).sum().item() / float(batch_size)

                        
        for morphIdx in trainingIdxs:
            trainLosses[morphIdx][-1] /= numBatchesPerTrainingStep

        optimizer.step()
        batch_time = time.time() - batch_t0

        if batch % 200 == 0:
            print('Batch {} in {:.2f}s'.format(batch, batch_time))
            
            for morphIdx in trainingIdxs:
                print('{}: F-Loss {:.3f} | F-Acc {:.3f}| R-Loss {:.3f} | R-Acc {:.3f} | V-Loss {:.3f} | V-Acc {:.3f}'.
                    format(morphIdx, trainLosses[morphIdx][-1][0], trainLosses[morphIdx][-1][1], trainLosses[morphIdx][-1][2], trainLosses[morphIdx][-1][3], trainLosses[morphIdx][-1][4], trainLosses[morphIdx][-1][5]))


    print('Epoch {} finished in {:.1f}s'.format(epoch, time.time() - epoch_t0))
    torch.save(gnn.state_dict(), 'mixed-delta-validTransition-withVelocity-rightWayAround.pt')


Starting Epoch 0
 ** Testing ** 
0: F-Loss 0.705 | F-Acc 0.163| R-Loss 0.683 | R-Acc 0.566 | V-Loss 0.660 | V-Acc 0.878
1: F-Loss 0.682 | F-Acc 0.610| R-Loss 0.695 | R-Acc 0.429 | V-Loss 0.687 | V-Acc 0.475
2: F-Loss 0.680 | F-Acc 0.666| R-Loss 0.701 | R-Acc 0.275 | V-Loss 0.685 | V-Acc 0.580
3: F-Loss 0.688 | F-Acc 0.412| R-Loss 0.694 | R-Acc 0.451 | V-Loss 0.677 | V-Acc 0.727
4: F-Loss 0.681 | F-Acc 0.565| R-Loss 0.701 | R-Acc 0.314 | V-Loss 0.680 | V-Acc 0.696
5: F-Loss 0.691 | F-Acc 0.381| R-Loss 0.691 | R-Acc 0.477 | V-Loss 0.680 | V-Acc 0.640
Batch 0 in 1.55s
0: F-Loss 0.713 | F-Acc 0.155| R-Loss 0.691 | R-Acc 0.553 | V-Loss 0.668 | V-Acc 0.879
1: F-Loss 0.689 | F-Acc 0.615| R-Loss 0.702 | R-Acc 0.440 | V-Loss 0.694 | V-Acc 0.472
2: F-Loss 0.686 | F-Acc 0.688| R-Loss 0.708 | R-Acc 0.265 | V-Loss 0.692 | V-Acc 0.590
3: F-Loss 0.695 | F-Acc 0.437| R-Loss 0.703 | R-Acc 0.442 | V-Loss 0.685 | V-Acc 0.705
4: F-Loss 0.687 | F-Acc 0.582| R-Loss 0.708 | R-Acc 0.325 | V-Loss 0.687 | V-Acc

KeyboardInterrupt: 

In [12]:
torch.save(gnn.state_dict(), 'mixed-delta-validTransition-withVelocity-rightWayAround.pt')

In [11]:
gnn.load_state_dict(torch.load('mixed-delta-validTransition.pt'))

<All keys matched successfully>

In [12]:
backwardLosses = {}

for morphIdx in trainingIdxs:
    
    backwardLosses[morphIdx] = np.zeros(2)
        
    with torch.no_grad():
        
        for batch_ in range(0, numTestingBatches-1):

            # Get new graphs for each iteration
            g1 = env[morphIdx].get_graph()._get_dgl_graph()

            current_states = states_test[morphIdx][batch_ * batch_size:(batch_+1)*batch_size]
            forward_states = next_states_test[morphIdx][batch_ * batch_size:(batch_+1)*batch_size]
            backward_x = torch.cat((forward_states, forward_states - current_states), -1).to(device)
            predicted_sigmoids = gnn(g1, backward_x)
            backward_loss = binaryLoss(predicted_sigmoids, zeroTensor)

            # Save Forward Loss and Accuracy
            backwardLosses[morphIdx][0] += backward_loss.item()
            backwardLosses[morphIdx][1] += torch.eq(zeroTensor, torch.round(predicted_sigmoids)).sum().item() / float(batch_size)

        backwardLosses[morphIdx] /= numTestingBatches
            
print(backwardLosses)

{0: array([0.62854996, 0.82670998]), 1: array([0.03162608, 0.98389668]), 2: array([0.02427717, 0.98458426]), 3: array([0.04707087, 0.98125598]), 4: array([0.07193574, 0.97553611]), 5: array([0.06604599, 0.97768854])}


In [13]:
velocityChangeLosses = {}
num_samples = 1024
num_states = 100

for morphIdx in trainingIdxs:
    velocityChangeLosses[morphIdx] = np.zeros(2)
        
    with torch.no_grad():
        
                    
        state_indeces = np.random.choice(np.arange(states_test[morphIdx].shape[0]), size=num_states)

        for state_idx in state_indeces:

            # Get new graphs for each iteration
            g1 = env[morphIdx].get_graph()._get_dgl_graph()

            current_states = states_test[morphIdx][state_idx].repeat(num_samples, 1)
#             velocities = torch.from_numpy(np.linspace(start=0, stop=2, num=num_samples))
            random_velocities = torch.from_numpy(np.random.normal(loc=0, scale=1.0, size=batch_size))
            velcocity_changed_states = torch.zeros(batch_size, current_states.shape[-1])
            velcocity_changed_states[:, 0] = random_velocities
            velocities_changed_x = torch.cat((current_states, velcocity_changed_states), -1).to(device)
            predicted_sigmoids = gnn(g1, velocities_changed_x)
            velocity_changed_loss = binaryLoss(predicted_sigmoids, zeroTensor)
            
#             print(predicted_sigmoids)
            
            # Save Forward Loss and Accuracy
            velocityChangeLosses[morphIdx][0] += velocity_changed_loss.item()
            velocityChangeLosses[morphIdx][1] += torch.eq(zeroTensor, torch.round(predicted_sigmoids)).sum().item() / float(batch_size)

        velocityChangeLosses[morphIdx] /= num_states
            
print(velocityChangeLosses)

{0: array([0.19854589, 0.9530957 ]), 1: array([0.03052252, 0.99081055]), 2: array([0.00587696, 0.99836914]), 3: array([0.00777521, 0.99623047]), 4: array([0.15561728, 0.96998047]), 5: array([0.08751803, 0.98418945])}


In [14]:
forward_losses = {}

for morphIdx in trainingIdxs:
    
    forward_losses[morphIdx] = np.zeros(2)
        
    with torch.no_grad():
        
        for batch_ in range(0, numTestingBatches-1):

            # Get new graphs for each iteration
            g1 = env[morphIdx].get_graph()._get_dgl_graph()

            current_states = states_test[morphIdx][batch_ * batch_size:(batch_+1)*batch_size]
            forward_states = next_states_test[morphIdx][batch_ * batch_size:(batch_+1)*batch_size]
            forward_x = torch.cat((current_states, current_states - forward_states), -1).to(device)
            predicted_sigmoids = gnn(g1, forward_x)
            forward_loss = binaryLoss(predicted_sigmoids, oneTensor)

            # Save Forward Loss and Accuracy
            forward_losses[morphIdx][0] += forward_loss.item()
            forward_losses[morphIdx][1] += torch.eq(oneTensor, torch.round(predicted_sigmoids)).sum().item() / float(batch_size)

        forward_losses[morphIdx] /= numTestingBatches
            
print(forward_losses)

{0: array([0.00762939, 0.98811185]), 1: array([0.00558225, 0.98852041]), 2: array([0.00624063, 0.98839086]), 3: array([0.00531584, 0.98840083]), 4: array([0.0068727 , 0.98844069]), 5: array([0.00683172, 0.98847058])}
