In [1]:
%config Completer.use_jedi = False

In [2]:
import numpy as np
import gym
import imageio
from collections import deque
import random
import torch.autograd
import os
import math
import time
from scipy.ndimage.filters import uniform_filter1d
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F 
from torch.autograd import Variable
import sys
import pickle
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.cm as cm
import matplotlib.markers as markers
import pybullet as p 
if torch.cuda.is_available():
    device = torch.device("cuda:0")  # you can continue going on here, like cuda:1 cuda:2....etc. 
    print("Running on the GPU")
else:
    device = torch.device("cpu")
    print("Running on the CPU")
import networkx as nx
from tqdm import tqdm
import dgl
import morphsim as m
from graphenvs import HalfCheetahGraphEnv
import itertools
import queue
import graph_tool as gt
from graph_tool.search import bfs_search, BFSVisitor


%matplotlib widget
torch.no_grad()

Running on the GPU


Using backend: pytorch


<torch.autograd.grad_mode.no_grad at 0x7f74045750a0>

In [3]:
class NetworkInverseDynamics(nn.Module):
    def __init__(
        self,
        input_size,
        output_size,
        hidden_sizes,
        with_batch_norm=False,
        activation=None,
        probabilistic = False
    ):
        super(NetworkInverseDynamics, self).__init__()
        self.hidden_sizes = hidden_sizes
        self.input_size = input_size
        self.output_size = output_size
        self.probabilistic = probabilistic
        self.hidden_layers = nn.ModuleList()

        self.hidden_layers.append(nn.Linear(self.input_size, hidden_sizes[0]))
        if with_batch_norm:
            self.hidden_layers.append(nn.LayerNorm(normalized_shape=(hidden_sizes[0])))
        self.hidden_layers.append(nn.ReLU())
        
        for i in range(len(hidden_sizes) - 1):
            self.hidden_layers.append(nn.Linear(hidden_sizes[i], hidden_sizes[i+1]))
            if with_batch_norm:
                self.hidden_layers.append(nn.LayerNorm(normalized_shape=(hidden_sizes[i+1])))
            self.hidden_layers.append(nn.ReLU())
        
        if probabilistic:
            self.output_size = self.output_size * 2
        self.final_layer = nn.Linear(hidden_sizes[len(hidden_sizes) - 1], self.output_size)
        
        self.final_activation = None
        if activation is not None:
            self.final_activation = activation()
            
    def forward(self, x):
        out = x
        
        for layer in self.hidden_layers:
            out = layer(out)
            
        out = self.final_layer(out)
            
        if self.final_activation:
            if self.probabilistic:
                out[:, :, 0] = self.final_activation(out[:, :, 0])

            else:
                out = self.final_activation(out)
        return out


In [4]:
class GNNInverseDynamics(nn.Module):
    def __init__(
        self,
        inputNetwork,
        messageNetwork,
        updateNetwork,
        outputNetwork,
        numMessagePassingIterations,
        withInputNetwork = True
    ):
        
        super(GNNInverseDynamics, self).__init__()
                
        self.inputNetwork = inputNetwork
        self.messageNetwork = messageNetwork
        self.updateNetwork = updateNetwork
        self.outputNetwork = outputNetwork
        
        self.numMessagePassingIterations = numMessagePassingIterations
        self.withInputNetwork = withInputNetwork
        
    def inputFunction(self, nodes):
        return {'state' : self.inputNetwork(nodes.data['input'])}
    
    def messageFunction(self, edges):
        
        batchSize = edges.src['state'].shape[1]
        edgeData = edges.data['feature'].repeat(batchSize, 1).T.unsqueeze(-1)
        nodeInput = edges.src['input']
        
        return {'m' : self.messageNetwork(torch.cat((edges.src['state'], edgeData, nodeInput), -1))}
    
    def updateFunction(self, nodes):
        return {'state': self.updateNetwork(torch.cat((nodes.data['m_hat'], nodes.data['state']), -1))}
    
    def outputFunction(self, nodes):
        
        return {'output': self.outputNetwork(nodes.data['state'])}


    def forward(self, graph, state):
        
        self.update_states_in_graph(graph, state)
        
        if self.withInputNetwork:
            graph.apply_nodes(self.inputFunction)
        
        for messagePassingIteration in range(self.numMessagePassingIterations):
            graph.update_all(self.messageFunction, dgl.function.mean('m', 'm_hat'), self.updateFunction)
        
        graph.apply_nodes(self.outputFunction)
        
        output = graph.ndata['output']
        output = torch.transpose(output, dim0=0, dim1=1).squeeze(-1)

        return output
    
    def update_states_in_graph(self, graph, state):
        if len(state.shape) == 1:
            state = state.unsqueeze(0)
        
        numGraphFeature = 6
        numGlobalStateInformation = 5
        numLocalStateInformation = 2
        numStateVar = state.shape[1] // 2
        globalInformation = torch.cat((state[:, 0:5], state[:, numStateVar:numStateVar+5]), -1)
        
        numNodes = (numStateVar - 5) // 2

        nodeData = torch.empty((numNodes, state.shape[0], numGraphFeature + 2 * numGlobalStateInformation + 2 * numLocalStateInformation)).to(device)
        for nodeIdx in range(numNodes):

            # Assign global features from graph
            nodeData[nodeIdx, :, :6] = graph.ndata['feature'][nodeIdx]
            # Assign local state information
            nodeData[nodeIdx, :, 16] = state[:, 5 + nodeIdx]
            nodeData[nodeIdx, :, 17] = state[:, 5 + numNodes + nodeIdx]
            nodeData[nodeIdx, :, 18] = state[:, numStateVar + 5 + nodeIdx]
            nodeData[nodeIdx, :, 19] = state[:, numStateVar + 5 + numNodes + nodeIdx]

        # Assdign global state information
        nodeData[:, :, 6:16] = globalInformation
        
        if self.withInputNetwork:
            graph.ndata['input'] = nodeData        
        
        else:
            graph.ndata['state'] = nodeData


In [5]:
class NetworkTransitionVerifier(nn.Module):
    def __init__(
        self,
        input_size,
        output_size,
        hidden_sizes,
        with_batch_norm=False,
        activation=None
    ):
        super(NetworkTransitionVerifier, self).__init__()
        self.hidden_sizes = hidden_sizes
        self.input_size = input_size
        self.output_size = output_size
        
        self.layers = nn.ModuleList()

        self.layers.append(nn.Linear(self.input_size, hidden_sizes[0]))
        if with_batch_norm:
            self.layers.append(nn.LayerNorm(normalized_shape=(hidden_sizes[0])))
        self.layers.append(nn.ReLU())
        
        for i in range(len(hidden_sizes) - 1):
            self.layers.append(nn.Linear(hidden_sizes[i], hidden_sizes[i+1]))
            if with_batch_norm:
                self.layers.append(nn.LayerNorm(normalized_shape=(hidden_sizes[i+1])))
            self.layers.append(nn.ReLU())
        
        self.layers.append(nn.Linear(hidden_sizes[len(hidden_sizes) - 1], self.output_size))
        
        if activation is not None:
            self.layers.append(activation())
            
    def forward(self, x):
        out = x
        
        for layer in self.layers:
            out = layer(out)
            
        return out


In [6]:
class GNNTransitionVerifier(nn.Module):
    def __init__(
        self,
        inputNetwork,
        messageNetwork,
        updateNetwork,
        outputNetwork,
        numMessagePassingIterations,
        withInputNetwork = True
    ):
        
        super(GNNTransitionVerifier, self).__init__()
                
        self.inputNetwork = inputNetwork
        self.messageNetwork = messageNetwork
        self.updateNetwork = updateNetwork
        self.outputNetwork = outputNetwork
        
        self.numMessagePassingIterations = numMessagePassingIterations
        self.withInputNetwork = withInputNetwork
        
    def inputFunction(self, nodes):
        return {'state' : self.inputNetwork(nodes.data['input'])}
    
    def messageFunction(self, edges):
        
        batchSize = edges.src['state'].shape[1]
        edgeData = edges.data['feature'].repeat(batchSize, 1).T.unsqueeze(-1)
        nodeInput = edges.src['input']
        
        return {'m' : self.messageNetwork(torch.cat((edges.src['state'], edgeData, nodeInput), -1))}
    
    def updateFunction(self, nodes):
        return {'state': self.updateNetwork(torch.cat((nodes.data['m_hat'], nodes.data['state']), -1))}
    
    def outputFunction(self, nodes):
        
        return {'output': self.outputNetwork(nodes.data['state'])}


    def forward(self, graph, state):
        
        self.update_states_in_graph(graph, state)
        
        if self.withInputNetwork:
            graph.apply_nodes(self.inputFunction)
        
        for messagePassingIteration in range(self.numMessagePassingIterations):
            graph.update_all(self.messageFunction, dgl.function.mean('m', 'm_hat'), self.updateFunction)
        
        graph.apply_nodes(self.outputFunction)
        
        output = graph.ndata['output']
        output = output.squeeze(-1).mean(0)
                
        return output
    
    def update_states_in_graph(self, graph, state):
        if len(state.shape) == 1:
            state = state.unsqueeze(0)
        
        numGraphFeature = 6
        numGlobalStateInformation = 5
        numLocalStateInformation = 2
        numStateVar = state.shape[1] // 2
        globalInformation = torch.cat((state[:, 0:5], state[:, numStateVar:numStateVar+5]), -1)
        
        numNodes = (numStateVar - 5) // 2

        nodeData = torch.empty((numNodes, state.shape[0], numGraphFeature + 2 * numGlobalStateInformation + 2 * numLocalStateInformation)).to(device)
        for nodeIdx in range(numNodes):

            # Assign global features from graph
            nodeData[nodeIdx, :, :6] = graph.ndata['feature'][nodeIdx]
            # Assign local state information
            nodeData[nodeIdx, :, 16] = state[:, 5 + nodeIdx]
            nodeData[nodeIdx, :, 17] = state[:, 5 + numNodes + nodeIdx]
            nodeData[nodeIdx, :, 18] = state[:, numStateVar + 5 + nodeIdx]
            nodeData[nodeIdx, :, 19] = state[:, numStateVar + 5 + numNodes + nodeIdx]

        # Assdign global state information
        nodeData[:, :, 6:16] = globalInformation
        
        if self.withInputNetwork:
            graph.ndata['input'] = nodeData        
        
        else:
            graph.ndata['state'] = nodeData


In [7]:
class NetworkAutoEncoder(nn.Module):
    def __init__(
        self,
        input_size,
        output_size,
        hidden_sizes,
        batch_size=256, # Needed only for batch norm
        with_batch_norm=False,
        activation=None
    ):
        super(NetworkAutoEncoder, self).__init__()
        self.hidden_sizes = hidden_sizes
        self.input_size = input_size
        self.output_size = output_size
        
        self.layers = nn.ModuleList()

        self.layers.append(nn.Linear(self.input_size, hidden_sizes[0]))
        if with_batch_norm:
#             self.layers.append(nn.BatchNorm1d(batch_size))
            self.layers.append(nn.LayerNorm(normalized_shape=(hidden_sizes[0])))
        self.layers.append(nn.ReLU())
        
        for i in range(len(hidden_sizes) - 1):
            self.layers.append(nn.Linear(hidden_sizes[i], hidden_sizes[i+1]))
            if with_batch_norm:
#                 self.layers.append(nn.BatchNorm1d(batch_size))
                self.layers.append(nn.LayerNorm(normalized_shape=(hidden_sizes[i+1])))
            self.layers.append(nn.ReLU())
        
        self.layers.append(nn.Linear(hidden_sizes[len(hidden_sizes) - 1], self.output_size))
        
        if activation is not None:
            self.layers.append(activation())
            
    def forward(self, x):
        out = x
        
        for layer in self.layers:
            out = layer(out)
            
        return out

In [8]:
class GraphNeuralNetworkAutoEncoder(nn.Module):
    def __init__(
            self,
            inputNetwork,
            messageNetwork,
            updateNetwork,
            outputNetwork,
            numMessagePassingIterations,
            encoder=True
    ):

        super(GraphNeuralNetworkAutoEncoder, self).__init__()

        self.inputNetwork = inputNetwork
        self.messageNetwork = messageNetwork
        self.updateNetwork = updateNetwork
        self.outputNetwork = outputNetwork

        self.numMessagePassingIterations = numMessagePassingIterations
        self.encoder = encoder

    def inputFunction(self, nodes):
        return {'state': self.inputNetwork(nodes.data['input'])}

    def messageFunction(self, edges):

        batchSize = edges.src['state'].shape[1]
        edgeData = edges.data['feature'].repeat(batchSize, 1).T.unsqueeze(-1)
        nodeInput = edges.src['input']

        #         print(edges.src['state'].shape)
        #         print(nodeInput.shape)
        return {'m': self.messageNetwork(torch.cat((edges.src['state'], edgeData, nodeInput), -1))}

    def updateFunction(self, nodes):
        return {'state': self.updateNetwork(torch.cat((nodes.data['m_hat'], nodes.data['state']), -1))}

    def outputFunction(self, nodes):

        #         numNodes, batchSize, stateSize = graph.ndata['state'].shape
        #         return self.outputNetwork.forward(graph.ndata['state'])
        return {'output': self.outputNetwork(nodes.data['state'])}

    def forward(self, graph, state):

        self.update_states_in_graph(graph, state)

        graph.apply_nodes(self.inputFunction)

        for messagePassingIteration in range(self.numMessagePassingIterations):
            graph.update_all(self.messageFunction, dgl.function.max('m', 'm_hat'), self.updateFunction)

        graph.apply_nodes(self.outputFunction)

        output = graph.ndata['output']

        if self.encoder:
            output = F.normalize(output, dim=-1)

        return output

    def update_states_in_graph(self, graph, state):

        if self.encoder:
            if len(state.shape) == 1:
                state = state.unsqueeze(0)

            numGraphFeature = 6
            numGlobalStateInformation = 5
            numLocalStateInformation = 2
            numStateVar = state.shape[1]
            globalInformation = state[:, 0:5]
            batch_size = state.shape[0]
            numNodes = (numStateVar - 5) // 2

            nodeData = torch.empty(
                (numNodes, batch_size, numGraphFeature + numGlobalStateInformation + numLocalStateInformation)).to(
                device)

            nodeData[:, :, 0:numGlobalStateInformation] = globalInformation
            for nodeIdx in range(numNodes):
                # Assign local state information
                nodeData[nodeIdx, :, numGlobalStateInformation] = state[:, 5 + nodeIdx]
                nodeData[nodeIdx, :, numGlobalStateInformation + 1] = state[:, 5 + numNodes + nodeIdx]
                # Assign global features from graph
                nodeData[nodeIdx, :, numGlobalStateInformation + 2: numGlobalStateInformation + 2 + numGraphFeature] = \
                graph.ndata['feature'][nodeIdx]

            graph.ndata['input'] = nodeData

        else:
            numNodes, batchSize, inputSize = state.shape
            nodeData = torch.empty((numNodes, batchSize, inputSize + 6)).to(device)
            nodeData[:, :, :inputSize] = state
            nodeData[:, :, inputSize: inputSize + 6] = graph.ndata['feature'].unsqueeze(dim=1).repeat_interleave(
                batchSize, dim=1)
            #             for nodeIdx in range(numNodes):
            #                 nodeData[nodeIdx, :, inputSize : inputSize + 6] = graph.ndata['feature'][nodeIdx]

            graph.ndata['input'] = nodeData

In [9]:
states = {}
actions = {}
rewards = {}
next_states = {}
dones = {}
env = {}

for morphIdx in [4]:

    prefix = '../datasets-old-run//{}/'.format(morphIdx)
    
    states[morphIdx] = torch.from_numpy(np.load(prefix + 'states_array.npy'))
    actions[morphIdx] = torch.from_numpy(np.load(prefix + 'actions_array.npy'))
    rewards[morphIdx] = torch.from_numpy(np.load(prefix + 'rewards_array.npy'))
    next_states[morphIdx] = torch.from_numpy(np.load(prefix + 'next_states_array.npy'))
    dones[morphIdx] = torch.from_numpy(np.load(prefix + 'dones_array.npy'))
    
    env[morphIdx] = HalfCheetahGraphEnv(None)
    env[morphIdx].set_morphology(morphIdx)
    env[morphIdx].reset()

NoneType: None


None
*************************************************************************************************************


In [108]:
mean_state = states[morphIdx].mean(0)
std_state  = states[morphIdx].mean(0)

In [11]:
def getNewGraph(env):
    return env.get_graph()._get_dgl_graph()

In [12]:
def fibonacci_sphere(samples=1):

    points = []
    phi = math.pi * (3. - math.sqrt(5.))  # golden angle in radians

    for i in range(samples):
        y = 1 - (i / float(samples - 1)) * 2  # y goes from 1 to -1
        radius = math.sqrt(1 - y * y)  # radius at y

        theta = phi * i  # golden angle increment

        x = math.cos(theta) * radius
        z = math.sin(theta) * radius

        points.append((x, y, z))

    return np.array(points)

In [13]:
hidden_sizes = [256, 256]

inputSize = 13
stateSize = 64
messageSize = 64
latentSize = 3
numMessagePassingIterations = 6
with_batch_norm = True

# # Encoder Networks
encoderInputNetwork = NetworkAutoEncoder(inputSize, stateSize, hidden_sizes, with_batch_norm=with_batch_norm)
encoderMessageNetwork = NetworkAutoEncoder(stateSize + inputSize + 1, messageSize, hidden_sizes, with_batch_norm=with_batch_norm, activation=nn.Tanh)
encoderUpdateNetwork = NetworkAutoEncoder(stateSize + messageSize, stateSize, hidden_sizes, with_batch_norm=with_batch_norm)
encoderOutputNetwork = NetworkAutoEncoder(stateSize, latentSize, hidden_sizes, with_batch_norm=with_batch_norm)
encoderGNN = GraphNeuralNetworkAutoEncoder(encoderInputNetwork, encoderMessageNetwork, encoderUpdateNetwork, encoderOutputNetwork, numMessagePassingIterations, encoder=True).to(device)

# # Decoder Networks
decoderInputNetwork = NetworkAutoEncoder(latentSize + 6, stateSize, hidden_sizes, with_batch_norm=with_batch_norm)
decoderMessageNetwork = NetworkAutoEncoder(stateSize + latentSize + 7, messageSize, hidden_sizes, with_batch_norm=with_batch_norm, activation=nn.Tanh)
decoderUpdateNetwork = NetworkAutoEncoder(stateSize + messageSize, stateSize, hidden_sizes, with_batch_norm=with_batch_norm)
decoderOutputNetwork = NetworkAutoEncoder(stateSize, 7, hidden_sizes, with_batch_norm=with_batch_norm)
decoderGNN = GraphNeuralNetworkAutoEncoder(decoderInputNetwork, decoderMessageNetwork, decoderUpdateNetwork, decoderOutputNetwork, numMessagePassingIterations, encoder=False).to(device)

# encoderGNN.load_state_dict(torch.load('../models/new/4-latent-single-GNN-AutoEncoder/5/0.0-1.5/encoderGNN.pt'))
# decoderGNN.load_state_dict(torch.load('../models/new/4-latent-single-GNN-AutoEncoder/5/0.0-1.5/decoderGNN.pt'))

print(encoderGNN.load_state_dict(torch.load('../models/new/3-latent-single-GNN-AutoEncoder/4/0.4-1.33/' + 'encoderGNN.pt')))
print(decoderGNN.load_state_dict(torch.load('../models/new/3-latent-single-GNN-AutoEncoder/4/0.4-1.33/' + 'decoderGNN.pt')))

# print(encoderGNN.load_state_dict(torch.load('../models/new/3-latent-single-GNN-AutoEncoder/5/no-contrastive/' + 'encoderGNN.pt')))
# print(decoderGNN.load_state_dict(torch.load('../models/new/3-latent-single-GNN-AutoEncoder/5/no-contrastive/' + 'decoderGNN.pt')))


# print(encoderGNN.load_state_dict(torch.load('../models/new/3-latent-single-GNN-AutoEncoder/5/no-contrastive/' + 'encoderGNN.pt')))
# print(decoderGNN.load_state_dict(torch.load('../models/new/3-latent-single-GNN-AutoEncoder/5/no-contrastive/' + 'decoderGNN.pt')))


<All keys matched successfully>
<All keys matched successfully>


In [14]:
hidden_sizes = [256, 256]

inputSize = 20
stateSize = 64
messageSize = 64
outputSize = 1
numMessagePassingIterations = 6
with_batch_norm = True

inputNetwork = NetworkTransitionVerifier(inputSize, stateSize, hidden_sizes, with_batch_norm)
messageNetwork = NetworkTransitionVerifier(stateSize + inputSize + 1, messageSize, hidden_sizes, with_batch_norm, nn.Tanh)
updateNetwork = NetworkTransitionVerifier(stateSize + messageSize, stateSize, hidden_sizes, with_batch_norm)
outputNetwork = NetworkTransitionVerifier(stateSize, outputSize, hidden_sizes, with_batch_norm, nn.Sigmoid)

transitionVerifier = GNNTransitionVerifier(inputNetwork, messageNetwork, updateNetwork, outputNetwork, numMessagePassingIterations).to(device)
transitionVerifier.load_state_dict(torch.load('mixed-delta-validTransition.pt'))

<All keys matched successfully>

In [15]:
hidden_sizes = [256, 256]

inputSize = 20
stateSize = 64
messageSize = 64
outputSize = 1
numMessagePassingIterations = 6
with_batch_norm = True

inputNetwork = NetworkInverseDynamics(inputSize, stateSize, hidden_sizes, with_batch_norm)
messageNetwork = NetworkInverseDynamics(stateSize + inputSize + 1, messageSize, hidden_sizes, with_batch_norm, nn.Tanh)
updateNetwork = NetworkInverseDynamics(stateSize + messageSize, stateSize, hidden_sizes, with_batch_norm)
outputNetwork = NetworkInverseDynamics(stateSize, outputSize, hidden_sizes, with_batch_norm, nn.Tanh, probabilistic=True)

inverseDynamics = GNNInverseDynamics(inputNetwork, messageNetwork, updateNetwork, outputNetwork, numMessagePassingIterations).to(device)
inverseDynamics.load_state_dict(torch.load('../models/new/inverseDynamics-single/4/attempt-1/de0'))

<All keys matched successfully>

In [16]:
def reconstructStateFromGraph(graph_data):
    num_nodes, batch_size, data_size = graph_data.shape
    output = torch.empty((batch_size, 5 + 2 * num_nodes))
    output[:, :5] = graph_data[:, :, :5].mean(dim=0)
    output[:, 5 : 5 + num_nodes] = graph_data[:, :, 5].T
    output[:, 5 + num_nodes:] = graph_data[:, :, 6].T
    
    return output

In [17]:
if fig:
    plt.close(fig)
fig = plt.figure()
within_range_list = torch.tensor(within_range_list)
plt.plot(np.arange(within_range_list.size(0)), uniform_filter1d(within_range_list, size=128))
plt.show()

NameError: name 'fig' is not defined

In [90]:
# name = '1e5'
# folder = 'encoding-graphs/new/4/'

# torch.save(saved_encodings, folder + 'saved_encodings_' + name)
# torch.save(saved_states, folder + 'saved_states_' + name)
# states_graph.save(folder + name + '.gt')

In [93]:
name = '1e5'
folder = 'encoding-graphs/new/4/'

saved_encodings = torch.load(folder + 'saved_encodings_' + name)
saved_states = torch.load(folder + 'saved_states_' + name)
states_graph = gt.load_graph(folder + name + '.gt')

In [34]:
all_distances = []
saved_states = saved_states[torch.randperm(saved_states.size(0))]
for new_state in tqdm(saved_states):
    
    state_distances = (states[morphIdx] - new_state) ** 2
    lowest_dst_idx = torch.argmin(state_distances.mean(-1))
    all_distances.append(state_distances[lowest_dst_idx].cpu())
# all_distances = torch.stack(all_distances).numpy()
fig, ax = plt.subplots()
ax.boxplot(np.log10(all_distances + 1e-6))
fig.show()

100%|██████████| 1000011/1000011 [1:03:32<00:00, 262.27it/s]


In [46]:
largest_deviation = []
mses = []
# saved_states = saved_states[torch.randperm(saved_states.size(0))]
for new_state in tqdm(saved_states):
    
    state_distances = (states[morphIdx] - new_state) ** 2
    lowest_dst_idx = torch.argmin(state_distances.mean(-1))
    largest_deviation.append(state_distances[lowest_dst_idx].max().cpu())
    mses.append(state_distances[lowest_dst_idx].mean(-1).cpu())
largest_deviation = np.log10(torch.stack(largest_deviation).numpy()) + 1e-6
mses = np.log10(torch.stack(mses).numpy()) + 1e-6
fig, ax = plt.subplots()
ax.scatter(mses, largest_deviation)
fig.show()

  3%|▎         | 32671/1000011 [02:05<1:01:44, 261.14it/s]


KeyboardInterrupt: 

In [37]:
fig.savefig('graph-state-differences-visualized.png')

In [39]:
# Visualize whole graph on sphere

# plt.close(fig)
fig = plt.figure()

x = saved_encodings[:, 0, 0]
y = saved_encodings[:, 0, 1]
z = saved_encodings[:, 0, 2]

ax = plt.axes(projection ='3d')
ax.scatter3D(x, y, z, alpha=0.1)
# for idx in range(100):
#     ax.scatter3D(succesful_encodings[-idx, 0, 0], succesful_encodings[-idx, 0, 1], succesful_encodings[-idx, 0, 2], c='red')

# for idx in range(100):
#     ax.scatter3D(succesful_encodings[idx, 0, 0], succesful_encodings[idx, 0, 1], succesful_encodings[idx, 0, 2], c='green')

# for idx in indices:
#     ax.scatter3D(saved_encodings[idx, 0, 0], saved_encodings[idx, 0, 1], saved_encodings[idx, 0, 2], c='green')

ax.view_init(elev=0, azim=45)
fig.savefig('1-graph-sphere.png')

ax.view_init(elev=30, azim=90)
fig.savefig('2-graph-sphere.png')


ax.view_init(elev=15, azim=0)
fig.savefig('3-graph-sphere.png')

ax.view_init(elev=90, azim=0)
fig.savefig('4-graph-sphere.png')

fig.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [40]:
distances = torch.cdist(saved_encodings, saved_encodings).mean(-1).mean(-1).numpy()
fig, axs = plt.subplots(2)
axs[0].plot(np.arange(distances.shape[0]), np.log10(uniform_filter1d(distances, size=4096)))
axs[1].boxplot(np.log10(distances))
fig.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [44]:
# Visualize all dimensions of a specific encoding
try:
    plt.close(fig)
except:
    pass
fig = plt.figure()


if not torch.is_tensor(saved_encodings):
    saved_encodings = torch.stack(saved_encodings)

batch_size, num_nodes, dimensionality = saved_encodings.shape
ax = plt.axes(projection ='3d')
random_idx = saved_encodings.size(0) - 1
sphere_skeleton = fibonacci_sphere(1000)

ax.scatter3D(sphere_skeleton[:, 0], sphere_skeleton[:, 1], sphere_skeleton[:, 2], alpha=0.02)

for node in range(num_nodes):
    
    random_idx1 = saved_encodings.size(0) - 1
    x = saved_encodings[random_idx1, node, 0]
    y = saved_encodings[random_idx1, node, 1]
    z = saved_encodings[random_idx1, node, 2]
    ax.scatter3D(x, y, z, c='blue')
    
    random_idx2 = saved_encodings.size(0) - 2
    x = saved_encodings[random_idx2, node, 0]
    y = saved_encodings[random_idx2, node, 1]
    z = saved_encodings[random_idx2, node, 2]
    ax.scatter3D(x, y, z, c='red')
    
ax.view_init(elev=0, azim=45)
fig.savefig('one-state-all-dimensions/view1.png')

ax.view_init(elev=30, azim=90)
fig.savefig('one-state-all-dimensions/view2.png')


ax.view_init(elev=15, azim=0)
fig.savefig('one-state-all-dimensions/view3.png')

ax.view_init(elev=90, azim=0)
fig.savefig('one-state-all-dimensions/view4.png')

plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [153]:
def addNodeToGraph(new_state, new_encoding, saved_encodings, saved_states, states_graph, encoderGNN, decoderGNN, 
                   transitionVerifier, device, env, morphIdx, alpha=0.4, batch_size=2048, transition_threshold=0.9, verbose=False, dry_run=False):
    
    with torch.no_grad():
        
        if new_encoding is None:
            g = getNewGraph(env[morphIdx]).to(device)
            new_encoding = encoderGNN(g, new_state.unsqueeze(0)).squeeze(1).cpu()
        
        if saved_encodings.is_cuda:
            new_state = new_state.cuda()
            new_encoding = new_encoding.cuda()

        # Check if node already exists in the graph

#         smallest_mse_exisiting_states = ((saved_states - new_state) ** 2).mean(-1).min()
#         if smallest_mse_exisiting_states <= 1e-6:
#             print('State already exists')
#             return

        # tensor of size (len(saved_encodings),)
        encoding_distances = torch.norm(saved_encodings - new_encoding, dim=-1).mean(-1)
        boolean_indeces_within_range = encoding_distances <= alpha
        indeces_within_range = torch.arange(end=saved_encodings.size(0))[boolean_indeces_within_range]
        states_within_range = saved_states[indeces_within_range]
                
        num_batches = int(np.ceil((states_within_range.size(0) / batch_size)))
        
        out_neighbors = []
        in_neighbors = []
        
        for batch in range(num_batches):
            
            transition_states = states_within_range[batch * batch_size : (batch + 1) * batch_size]
            
            forward_transitions = new_state.repeat(transition_states.size(0), 2)
            forward_transitions [:, forward_transitions.size(1) // 2:] -= transition_states
            
            backward_transitions = transition_states.repeat(1, 2)
            backward_transitions [:, backward_transitions.size(1) // 2:] -= new_state
                
            possibile_batch_indeces = torch.arange(start=batch * batch_size, end=min((batch + 1) * batch_size, states_within_range.size(0)))

            g = getNewGraph(env[morphIdx]).to(device)
            probabilities_forward = transitionVerifier(g, forward_transitions.to(device))
            boolean_valid_forward_indeces = probabilities_forward > transition_threshold
            valid_forward_batch_indeces = possibile_batch_indeces[boolean_valid_forward_indeces]
            valid_forward_global_indeces = indeces_within_range[valid_forward_batch_indeces]
            
            g = getNewGraph(env[morphIdx]).to(device)
            probabilities_backward = transitionVerifier(g, backward_transitions.to(device))
            boolean_valid_backward_indeces = probabilities_backward > transition_threshold
            valid_backward_batch_indeces = possibile_batch_indeces[boolean_valid_backward_indeces]
            valid_backward_global_indeces = indeces_within_range[valid_backward_batch_indeces]

            out_neighbors.extend(valid_forward_global_indeces.tolist())
            in_neighbors.extend(valid_backward_global_indeces.tolist())
    
    
    if verbose:
        print('****')
        print('States-within-Range:', states_within_range.size(0))
        print('In-Degree:', len(in_neighbors))
        print('In-Succes-Rate: {:.4f}'.format(len(in_neighbors) / states_within_range.size(0)))
        print('Out-Degree:', len(out_neighbors))
        print('Out-Succes-Rate: {:.4f}'.format(len(out_neighbors) / states_within_range.size(0)))
    
    new_state_idx = states_graph.num_vertices()
    if not dry_run:
        
        saved_encodings = torch.cat((saved_encodings, new_encoding.unsqueeze(0)), dim=0)
        saved_states = torch.cat((saved_states, new_state.unsqueeze(0)), dim=0)
        states_graph.add_vertex()
        
        for neighbour_idx in out_neighbors:
            states_graph.add_edge(new_state_idx, neighbour_idx)

        for neighbour_idx in in_neighbors:
            states_graph.add_edge(neighbour_idx, new_state_idx)
    
    no_edges_in = len(in_neighbors) == 0
    no_edges_out = len(in_neighbors) == 0
    return saved_encodings, saved_states, states_graph, new_state_idx, no_edges_in, no_edges_out

In [224]:
def addNodeToGraphWithPath(new_state, new_encoding, saved_encodings, saved_states, states_graph, encoderGNN, decoderGNN, 
                           transitionVerifier, device, env, morphIdx, alpha=0.4, batch_size=2048, transition_threshold=0.9, 
                           verbose=False, dry_run=False, goal_index=None, transition_threshold_modifier=0.02, lowest_allowed_threshold=0.5):
    
    with torch.no_grad():
        
        if new_encoding is None:
            g = getNewGraph(env[morphIdx]).to(device)
            new_encoding = encoderGNN(g, new_state.unsqueeze(0)).squeeze(1).cpu()
        
        if saved_encodings.is_cuda:
            new_state = new_state.cuda()
            new_encoding = new_encoding.cuda()


        # tensor of size (len(saved_encodings),)
        encoding_distances = torch.norm(saved_encodings - new_encoding, dim=-1).mean(-1)
        boolean_indeces_within_range = encoding_distances <= alpha
        indeces_within_range = torch.arange(end=saved_encodings.size(0))[boolean_indeces_within_range]
        states_within_range = saved_states[indeces_within_range]
                
        num_batches = int(np.ceil((states_within_range.size(0) / batch_size)))
        
        probabilities_forward = torch.empty(states_within_range.size(0))
        probabilities_backward = torch.empty(states_within_range.size(0))
        
        for batch in range(num_batches):
            
            transition_states = states_within_range[batch * batch_size : (batch + 1) * batch_size]
            
            forward_transitions = new_state.repeat(transition_states.size(0), 2)
            forward_transitions [:, forward_transitions.size(1) // 2:] -= transition_states
            
            backward_transitions = transition_states.repeat(1, 2)
            backward_transitions [:, backward_transitions.size(1) // 2:] -= new_state
                
            g = getNewGraph(env[morphIdx]).to(device)
            probabilities_forward[batch * batch_size : (batch+1) * batch_size] = transitionVerifier(g, forward_transitions.to(device)).cpu()

            g = getNewGraph(env[morphIdx]).to(device)
            probabilities_backward[batch * batch_size : (batch+1) * batch_size] = transitionVerifier(g, backward_transitions.to(device)).cpu()
        
        
        boolean_valid_forward_indeces = probabilities_forward > transition_threshold
        out_neighbors = indeces_within_range[boolean_valid_forward_indeces].tolist()

        boolean_valid_backward_indeces = probabilities_backward > transition_threshold
        in_neighbors = indeces_within_range[boolean_valid_backward_indeces].tolist()
        
        
    if verbose:
        print('****')
        print('States-within-Range:', states_within_range.size(0))
        print('In-Degree:', len(in_neighbors))
        print('In-Succes-Rate: {:.4f}'.format(len(in_neighbors) / states_within_range.size(0)))
        print('Out-Degree:', len(out_neighbors))
        print('Out-Succes-Rate: {:.4f}'.format(len(out_neighbors) / states_within_range.size(0)))
    
    # If found we are lookiung for a path, we will overwrite this at a later point
    path = None
    new_state_idx = states_graph.num_vertices()
    if not dry_run:
        
        saved_encodings = torch.cat((saved_encodings, new_encoding.unsqueeze(0)), dim=0)
        saved_states = torch.cat((saved_states, new_state.unsqueeze(0)), dim=0)
        states_graph.add_vertex()
        
        for neighbour_idx in out_neighbors:
            states_graph.add_edge(new_state_idx, neighbour_idx)

        for neighbour_idx in in_neighbors:
            states_graph.add_edge(neighbour_idx, new_state_idx)
        
        if goal_index is not None:
            path = getPath(states_graph, new_state_idx, goal_index)
                            
            if len(path) == 0:
                
                if verbose:
                    print('Trying with additional states')
                    
                lower_transition_threshold = transition_threshold - transition_threshold_modifier
                upper_transition_threshold = transition_threshold
                
                iterations = 1
                while True:
                                        
                    boolean_valid_forward_indeces = (probabilities_forward > lower_transition_threshold) * (probabilities_forward < upper_transition_threshold)
                    out_neighbors = indeces_within_range[boolean_valid_forward_indeces].tolist()

                    boolean_valid_backward_indeces = (probabilities_backward > lower_transition_threshold) * (probabilities_backward < upper_transition_threshold)
                    in_neighbors = indeces_within_range[boolean_valid_backward_indeces].tolist()
                    
                    for neighbour_idx in out_neighbors:
                        states_graph.add_edge(new_state_idx, neighbour_idx)

                    for neighbour_idx in in_neighbors:
                        states_graph.add_edge(neighbour_idx, new_state_idx)
                    
                    path = getPath(states_graph, new_state_idx, goal_index)
                    
                    if len(path) != 0:
                        print('Nice! Length {} in {} iterations'.format(len(path), iterations))
                        break
                    
                    if lower_transition_threshold < lowest_allowed_threshold:
                        print('Failed :()')
                        break
                    
                    iterations += 1
                    lower_transition_threshold -= transition_threshold_modifier
                    upper_transition_threshold -= transition_threshold_modifier
                    
    
    return saved_encodings, saved_states, states_graph, new_state_idx, path

In [229]:
transition_threshold = 0.9
with torch.no_grad():
    
    random_idx = np.random.choice(int(1e6))
    
    saved_states = states[morphIdx][random_idx].clone().unsqueeze(0)
    g = getNewGraph(env[morphIdx]).to(device)
    saved_encodings = encoderGNN(g, saved_states).squeeze(1).cpu().unsqueeze(0)
    states_graph = gt.Graph()
    states_graph.add_vertex()

    for i in tqdm(range(50000)):

        random_idx = np.random.choice(int(1e6))
        new_state = states[morphIdx][random_idx].clone()
        saved_encodings, saved_states, states_graph, new_state_idx, no_edges_in, no_edges_out = addNodeToGraph(new_state, None, saved_encodings, saved_states, states_graph, encoderGNN, decoderGNN, transitionVerifier, device, env, morphIdx, transition_threshold=transition_threshold)

IndexError: index 707750 is out of bounds for dimension 0 with size 1000

In [175]:
class VisitorExample(BFSVisitor):

    def __init__(self, name, pred, dist):
        self.name = name
        self.pred = pred
        self.dist = dist

    def discover_vertex(self, u):
        pass
#         print("-->", self.name[u], "has been discovered!")

    def examine_vertex(self, u):
        pass
#         print(self.name[u], "has been examined...")

    def tree_edge(self, e):
        self.pred[e.target()] = int(e.source())
        self.dist[e.target()] = self.dist[e.source()] + 1

In [199]:
def getPath(states_graph, start_idx, end_idx):
    
    name = states_graph.new_vertex_property("int")
    name.a = np.arange(states_graph.num_vertices())
    dist = states_graph.new_vertex_property("float")
    dist.a = np.zeros(states_graph.num_vertices()) + float('inf')
    pred = states_graph.new_vertex_property("int")
    pred.a = np.zeros(states_graph.num_vertices()) - 1
    source_idx = start_state_idx
    dist.a[source_idx] = 0
    visitorExample = VisitorExample(name, pred, dist)
    
    bfs_search(states_graph, states_graph.vertex(start_idx), visitorExample)

    
    if pred.a[end_idx] == -1:
        return []
    
    path = []
    current_idx = end_idx

    while current_idx != start_idx:
        
        path.append(current_idx)
        current_idx = pred.a[current_idx]
    
    path.append(start_idx)
    path.reverse()
    return path

In [225]:
name = '1e5'
folder = 'encoding-graphs/new/4/'

saved_encodings = torch.load(folder + 'saved_encodings_' + name)
saved_states = torch.load(folder + 'saved_states_' + name)
states_graph = gt.load_graph(folder + name + '.gt')

In [226]:
transition_threshold = 0.95
start_state = torch.from_numpy(env[morphIdx].reset())
saved_encodings, saved_states, states_graph, start_state_idx, _= addNodeToGraphWithPath(start_state, None, saved_encodings, saved_states, states_graph, encoderGNN, decoderGNN, transitionVerifier, device, env, morphIdx, verbose=True, transition_threshold=transition_threshold)

****
States-within-Range: 5841
In-Degree: 110
In-Succes-Rate: 0.0188
Out-Degree: 219
Out-Succes-Rate: 0.0375


In [227]:
indeces_to_keep = states[morphIdx][:, 0] > 3.5
all_indeces = np.arange(states[morphIdx].shape[0])
goal_state_idx_temp = np.random.choice(all_indeces[indeces_to_keep])
goal_state = states[morphIdx][goal_state_idx_temp].clone()
print('Initial speed', goal_state[0].item())
saved_encodings, saved_states, states_graph, goal_state_idx, path = addNodeToGraphWithPath(goal_state, None, saved_encodings, saved_states, states_graph, encoderGNN, decoderGNN, transitionVerifier, device, env, morphIdx, verbose=True, transition_threshold=transition_threshold, goal_index=start_state_idx)

Initial speed 4.139471082004165
****
States-within-Range: 4115
In-Degree: 11
In-Succes-Rate: 0.0027
Out-Degree: 20
Out-Succes-Rate: 0.0049


In [228]:
transition_threshold = 0.95
current_state_idx = start_state_idx

divergences = []
rewards = []
travelled_indeces = []
speeds = []
im_array = []

with torch.no_grad():
    for i in range(1000):
        
        if i  % 50 == 0:
            print('Reached', i)
            
        next_state_idx = path[1]
        next_state = saved_states[next_state_idx]
        transition = saved_states[current_state_idx].repeat(1, 2)
        transition[:, transition.shape[1] //2:] -= next_state
        
        g = getNewGraph(env[morphIdx])
        action = inverseDynamics(g, transition)[0][:, 0]
        actual_next_state, reward, done, _ = env[morphIdx].step(action.squeeze(0).cpu().numpy())
        actual_next_state = torch.from_numpy(actual_next_state)

        rewards.append(reward)
        env[morphIdx]._current_env._pb_env.camera_adjust()
        im_array.append(env[morphIdx]._current_env._pb_env.render_camera_image())
        speeds.append(actual_next_state[0])
        
        divergences.append(((actual_next_state - next_state) - mean_state) / std_state)
        
        saved_encodings, saved_states, states_graph, current_state_idx, path = addNodeToGraphWithPath(actual_next_state, None, saved_encodings, saved_states, states_graph, encoderGNN, decoderGNN, transitionVerifier, device, env, morphIdx, transition_threshold=transition_threshold, goal_index=goal_state_idx)

Reached 0
Reached 50
Reached 100
Reached 150
Reached 200
Reached 250
Reached 300
Reached 350
Reached 400
Reached 450
Reached 500
Reached 550
Reached 600
Reached 650
Reached 700
Reached 750
Reached 800
Reached 850
Reached 900
Reached 950


In [159]:
transition_threshold = 0.95
current_state_idx = start_state_idx

divergences = []
rewards = []
travelled_indeces = []
speeds = []
im_array = []

with torch.no_grad():
    for i in range(1000):
        
        if i  % 50 == 0:
            print('Reached', i)
            
        travelled_indeces.append(current_state_idx)
        name = states_graph.new_vertex_property("int")
        name.a = np.arange(states_graph.num_vertices())
        dist = states_graph.new_vertex_property("float")
        dist.a = np.zeros(states_graph.num_vertices()) + float('inf')
        pred = states_graph.new_vertex_property("int")
        pred.a = np.zeros(states_graph.num_vertices()) - 1
        dist.a[current_state_idx] = 0
        bfs_search(states_graph, states_graph.vertex(current_state_idx), VisitorExample(name, pred, dist))

        if pred.a[goal_state_idx] == -1:
            
            print('No Path Found')
            
            transition_threshold_modifier = 0.02
            
            while True:
                
#                 indeces_to_keep = states[morphIdx][:, 0] > 3.5
#                 all_indeces = np.arange(states[morphIdx].shape[0])
#                 goal_state_idx_temp = np.random.choice(all_indeces[indeces_to_keep])
#                 goal_state = states[morphIdx][goal_state_idx_temp].clone()
#                 saved_encodings, saved_states, states_graph, goal_state_idx, no_edges_in, no_edges_out = addNodeToGraph(goal_state, None, saved_encodings, saved_states, states_graph, encoderGNN, decoderGNN, transitionVerifier, device, env, morphIdx, verbose=True, transition_threshold=transition_threshold)
                
                saved_encodings, saved_states, states_graph, current_state_idx, no_edges_in, no_edges_out = addNodeToGraph(actual_next_state, None, saved_encodings, saved_states, states_graph, encoderGNN, decoderGNN, transitionVerifier, device, env, morphIdx, verbose=True, transition_threshold=transition_threshold-transition_threshold_modifier)
                
                travelled_indeces.append(current_state_idx)
                name = states_graph.new_vertex_property("int")
                name.a = np.arange(states_graph.num_vertices())
                dist = states_graph.new_vertex_property("float")
                dist.a = np.zeros(states_graph.num_vertices()) + float('inf')
                pred = states_graph.new_vertex_property("int")
                pred.a = np.zeros(states_graph.num_vertices()) - 1
                dist.a[current_state_idx] = 0
                bfs_search(states_graph, states_graph.vertex(current_state_idx), VisitorExample(name, pred, dist))

                if pred.a[goal_state_idx] != -1:
                    print('Succesfully found new connections')
                    break
                
                transition_threshold_modifier += 0.02
                print('No luck, trying again')
                
#                 print(transition_threshold - transition_threshold_modifier)
#                 if transition_threshold - transition_threshold_modifier <= 0.5:
#                     print('Failed')
#                     break
                

        path = getPath(pred.a, current_state_idx, goal_state_idx)
        next_state_idx = path[1]
        next_state = saved_states[next_state_idx]
        transition = saved_states[current_state_idx].repeat(1, 2)
        transition[:, transition.shape[1] //2:] -= next_state
        
        g = getNewGraph(env[morphIdx])
        action = inverseDynamics(g, transition)[0][:, 0]
        actual_next_state, reward, done, _ = env[morphIdx].step(action.squeeze(0).cpu().numpy())
        actual_next_state = torch.from_numpy(actual_next_state)

        rewards.append(reward)
        env[morphIdx]._current_env._pb_env.camera_adjust()
        im_array.append(env[morphIdx]._current_env._pb_env.render_camera_image())
        speeds.append(actual_next_state[0])
        
        divergences.append(((actual_next_state - next_state) - mean_state) / std_state)
        saved_encodings, saved_states, states_graph, current_state_idx, no_edges_in, no_edges_out = addNodeToGraph(actual_next_state, None, saved_encodings, saved_states, states_graph, encoderGNN, decoderGNN, transitionVerifier, device, env, morphIdx, transition_threshold=transition_threshold)

Reached 0
No Path Found
****
States-within-Range: 3285
In-Degree: 24
In-Succes-Rate: 0.0073
Out-Degree: 0
Out-Succes-Rate: 0.0000
No luck, trying again
****
States-within-Range: 3286
In-Degree: 25
In-Succes-Rate: 0.0076
Out-Degree: 0
Out-Succes-Rate: 0.0000
No luck, trying again
****
States-within-Range: 3287
In-Degree: 25
In-Succes-Rate: 0.0076
Out-Degree: 0
Out-Succes-Rate: 0.0000
No luck, trying again
****
States-within-Range: 3288
In-Degree: 25
In-Succes-Rate: 0.0076
Out-Degree: 0
Out-Succes-Rate: 0.0000
No luck, trying again
****
States-within-Range: 3289
In-Degree: 26
In-Succes-Rate: 0.0079
Out-Degree: 0
Out-Succes-Rate: 0.0000
No luck, trying again
****
States-within-Range: 3290
In-Degree: 26
In-Succes-Rate: 0.0079
Out-Degree: 0
Out-Succes-Rate: 0.0000
No luck, trying again
****
States-within-Range: 3291
In-Degree: 27
In-Succes-Rate: 0.0082
Out-Degree: 0
Out-Succes-Rate: 0.0000
No luck, trying again
****
States-within-Range: 3292
In-Degree: 27
In-Succes-Rate: 0.0082
Out-Degree: 

KeyboardInterrupt: 

In [None]:
def save_graph_and_video(speeds, rewards, im_array, name):
    fig, ax = plt.subplots(2, sharex=True)
    ax[0].set_title('Speeds')
    ax[1].set_title('Cumulative Rewards')
    ax[0].plot(uniform_filter1d(speeds, size=10))
    ax[1].plot(np.cumsum(rewards))
    ax[1].set_xlabel('Steps')
    ax[1].set_ylabel('Reward')
    ax[1].set_ylabel('Speed')
    fig.savefig(name + '.png')
    video = imageio.get_writer(name + '.mp4', mode='I', fps=24, codec='libx264', bitrate='16M')
    for img in im_array:
        video.append_data(img)
    video.close()

In [151]:
name = 'our-agent-0.95'
speeds = np.array(speeds)
im_array = (np.array(im_array) * 256).astype(np.uint8)
rewards = np.array(rewards)

In [152]:
save_graph_and_video(speeds, rewards, im_array, name)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …



In [85]:
divergences = torch.stack(divergences)

In [85]:
rewards = np.array(rewards)

In [75]:
rewards_98 = rewards.sum()

In [86]:
rewards_90 = rewards.sum()

In [87]:
rewards_90

163.56464241507734

In [88]:
rewards_98

88.65455398894879

In [88]:
divergences.mean(0).to(torch.float32)

tensor([-9.7563e-01, -2.8780e+00,  9.5641e+00, -1.0662e+00, -1.0256e+00,
        -9.1540e+01, -2.9764e+00, -1.1058e+00,  8.8603e-02,  1.0242e+02,
         5.8463e+00, -4.3454e+00, -7.8245e-01, -9.9487e-01, -9.9340e-01,
        -1.0537e+00, -4.5260e+00, -1.0100e+00, -1.1416e+00])

In [129]:
for i in range(divergences.shape[0]):
    print(divergences[i].sqrt())

tensor([0.0441, 0.9872, 0.1064, 0.0141, 0.0046, 0.3514, 0.0564, 0.0579, 0.0295,
        0.7295, 0.3154, 1.0601, 0.2295, 0.1546, 0.1554, 0.1317, 0.3169, 0.0767,
        0.0503], dtype=torch.float64)
tensor([6.1827e-01, 4.2576e-01, 5.6537e-01, 1.4168e-01, 1.7570e-02, 4.3017e-01,
        4.0350e-02, 5.9558e-01, 7.1358e-03, 7.7624e-01, 2.8824e-07, 6.0487e-03,
        3.4569e-02, 3.3446e-01, 1.1475e-01, 1.1585e-01, 6.6616e-01, 2.3250e-01,
        3.3756e-01], dtype=torch.float64)
tensor([5.8276e-02, 6.4721e-01, 5.0872e-01, 7.2219e-03, 1.7523e-02, 3.2398e-02,
        2.2284e-01, 1.1753e-04, 1.1248e-01, 5.8877e-01, 1.0352e-03, 1.4735e-03,
        1.6584e-03, 1.1285e-01, 1.6902e-01, 3.5184e-02, 5.6954e-01, 4.3935e-03,
        5.9379e-02], dtype=torch.float64)
tensor([3.6783e-02, 2.1665e-01, 6.7462e-02, 4.6786e-04, 1.2793e-02, 5.1707e-03,
        1.3633e-01, 1.1753e-04, 7.3566e-02, 4.8671e-02, 5.2234e-05, 2.3305e-03,
        2.3875e-03, 3.2580e-02, 1.6902e-01, 5.9932e-03, 9.1761e-02, 2.2646e-03