In [13]:
%matplotlib inline
import networkx as nx
from networkx.drawing.nx_agraph import write_dot, graphviz_layout
import matplotlib.pyplot as plt
import numpy as np
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from coolname import generate_slug as name
import yaml
from sklearn import datasets
from sklearn.utils import shuffle

In [14]:
def zero_grad(self, grad_input, grad_output):
    temp = list(grad_input)
    #Temp[2] is the gradients to the weights, zero them out so they cant change. 
    temp[2] *= torch.transpose(self.mask,0, 1)
    return tuple(temp)

class SparseNet(nn.Module):
    def __init__(self, weights, biases, masks):
        super(SparseNet, self).__init__()
        self.act_log = None
        self.err_log = None
        self.layers = nn.ModuleList()
        self.masks = masks
        self.out = None
        for w, b, m in zip(weights,biases,masks):
            temp = nn.Linear(w.shape[1],w.shape[0])
            temp.weight.data = torch.from_numpy(w.astype(np.float32))
            temp.bias.data = torch.from_numpy(b.astype(np.float32))
            temp.mask = torch.from_numpy(m.astype(np.float32))
            temp.register_backward_hook(zero_grad)
            self.layers.append(temp)

    def forward(self, x, log=False):
        self.log = log
        
        for l in self.layers[:-1]:
            y = l(x)
            x = torch.cat((x,y),1)
            x = F.relu(x)
        self.out = self.layers[-1](x)
        
        if(log):
            self.out.retain_grad()
            if self.act_log is None:
                self.act_log = x.detach()
            else:
                self.act_log = torch.cat((self.act_log,x.detach()), dim=0)
            
        return self.out
    
    def clear(self):
        self.act_log = None
        self.err_log = None

    def dumpweights(self):
        weights = []
        biases = []
        for l in self.layers:
            weights.append(l.weight.data.numpy().astype(np.float64))
            biases.append(l.bias.data.numpy().astype(np.float64))
        return weights, biases

class graph_neural_network():
    def __init__(self,n_in,n_out,h_edges=3):
        self.G = nx.DiGraph()
        self.h_edges = h_edges
        self.inputs = ["(in)"+name() for i in range(n_in)]
        self.outputs = ["(out)"+name() for i in range(n_out)]
        self.flat = None
        for i in self.inputs:
            self.G.add_node(i) 
        for o in self.outputs:
            self.G.add_node(o)
            self.G.node[o]['bias'] = np.random.normal(0,1) 
        self.hidden = []
    
    def add_hidden(self,incoming,outgoing):
        h = name()
        print(incoming,outgoing)
        edges = []
        for i in incoming:
            edges.append((i,h,np.random.normal(0,1.0/self.h_edges)))
        
        approx_xavier = len(self.outputs) * 1.0/ (1+len(self.hidden))
        
        for o in outgoing:
            edges.append((h,o,np.random.normal(0,approx_xavier)))
            
        self.G.add_weighted_edges_from(edges)
        self.G.node[h]['bias'] = random.uniform(-.1,.1)
        self.hidden.append(h)
    
    def add_random_hidden(self):
        incoming = [random.choice(self.inputs+self.hidden) for i in range(self.h_edges)]
        outgoing = [random.choice(self.outputs)]
        self.add_hidden(incoming,outgoing)
    
    def get_layers(self):
        G = self.G
        G2 = nx.topological_sort(G)
        max_layer = 0
        for n in G2:
            if n not in self.outputs:
                G.node[n]['layer'] = max([G.node[k[0]]['layer'] for k in G.in_edges(n)] + [-1]) + 1
                max_layer = max(max_layer,G.node[n]['layer'])
        for n in self.outputs:
            G.node[n]['layer'] = max_layer + 1

        layers = [[] for _ in range(max_layer + 2)]

        for n in G:
            layers[G.node[n]['layer']].append(n)

        self.flat = []
        flat_idx = 0
        for i, layer in enumerate(layers):
            for j, n in enumerate(layer):
                G.node[n]['idx'] = j
                G.node[n]['flat_idx'] = flat_idx
                self.flat.append(n)
                flat_idx += 1
        
        return layers
    
    def get_output_idxes(self):
        outputs = self.outputs
        G = self.G
        out_idxes = []
        for n in G:
            if(n in outputs):
                out_idxes.append(G.node[n]['flat_idx'])
        return out_idxes
    
    def get_weights(self):
        G = self.G
        outputs = self.outputs
        
        layers = self.get_layers()
        mask = []    
        weights = []
        biases = []

        n_nodes = 0
        for i in range(len(layers) - 1):
            n_nodes += len(layers[i])
            mask.append(np.zeros((len(layers[i+1]),n_nodes)))
            biases.append(np.zeros((len(layers[i+1]))))
            weights.append(np.zeros((len(layers[i+1]),n_nodes)))

            for j, node1 in enumerate(layers[i+1]):
                biases[i][j] = G.node[node1]['bias']
                for node0, _ in G.in_edges(node1): 
                    u = G.node[node0]['flat_idx']
                    v = G.node[node1]['idx']
                    mask[i][v,u] = 1
                    weights[i][v,u] = G[node0][node1]['weight']

        return weights, biases, mask
    
    def set_weights(self,weights,biases):
        layers = self.get_layers()
        for i in range(len(layers) - 1):
            for j, node1 in enumerate(layers[i+1]):
                self.G.node[node1]['bias'] = biases[i][j]
                for node0, _ in self.G.in_edges(node1): 
                    u = self.G.node[node0]['flat_idx']
                    v = self.G.node[node1]['idx']
                    self.G[node0][node1]['weight'] = weights[i][v,u]
                    
    def create_nn(self):
        w, b, m = self.get_weights()
        self.nn = SparseNet(w,b,m)
    
    def update_graph(self):
        weights = [l.weight.data.numpy() for l in self.nn.layers]
        biases = [l.bias.data.numpy() for l in self.nn.layers]
        self.set_weights(weights,biases)
    
    def get_err_act_vectors(self,X,y):
        gnn.nn.clear()
        for j in range(len(X)):
            out = gnn.nn(X[j],log=True) 
            loss = criterion(out,y[j])
            loss.backward()

            if gnn.nn.err_log is None:
                gnn.nn.err_log = gnn.nn.out.grad
            else:
                gnn.nn.err_log = torch.cat((gnn.nn.err_log,gnn.nn.out.grad), dim=0)

            optimizer.zero_grad()

        return gnn.nn.act_log, gnn.nn.err_log
    
    def add_cossim_hidden(self,X,y):
        act, err = self.get_err_act_vectors(X,y)
        
        out = random.randint(0,len(self.outputs)-1)
        err = err[:,out:out+1]
        
        incoming = []
        
        cossim = F.cosine_similarity(err,act,dim=0)
        _, choices = torch.topk(cossim.abs(), self.h_edges, dim=0)
        
        for choice in choices:
            incoming.append(self.flat[choice])
        outgoing = [self.outputs[out]]
        self.add_hidden(incoming,outgoing)
        
        
        

In [15]:
# import some data to play with
data = datasets.load_iris()
X = [torch.FloatTensor([d]) for d in data.data]
y = [torch.FloatTensor([[d]]) for d in data.target]

In [16]:
X, y = shuffle(X,y)

In [17]:
gnn = graph_neural_network(len(X[0][0]),len(y[0][0]),h_edges=5)
criterion = nn.MSELoss()

In [18]:
avgloss = 0
maxnodes = 50
split = int(len(X) * 0.8)
for k in range(maxnodes+1):
    gnn.create_nn()
    optimizer = optim.Adam(gnn.nn.parameters(), lr=0.001)
    
    for j in range(50000):
        i = j % split
        out = gnn.nn(X[i])
        optimizer.zero_grad()
        loss = criterion(out,y[i])
        avgloss += loss
        loss.backward()
        optimizer.step()
        if j % 5000 == 4999:
            print(avgloss/5000)
            avgloss = 0
            
    SSE = 0
    for v_X, v_y in zip(X[400:],y[400:]):
        out = gnn.nn(v_X)
        SSE += (out[0][0] - v_y[0][0])**2
    print("v_SSE:", SSE/len(X[split:]))
    if k != maxnodes:
        print("Adding node")
        gnn.update_graph()
        Xh, yh = shuffle(X[:split],y[:split])
        gnn.add_cossim_hidden(Xh[:150],yh[:150])

IndexError: list index out of range

In [None]:
gnn.choose_connectons(X,y)