In [1]:
import os

if os.getcwd().endswith("notebooks"):
    os.chdir("..")
    print("using project root as working dir")

using project root as working dir


In [2]:
from dataclasses import dataclass
import numpy as np
import networkx as nx
import math
from tqdm.notebook import tqdm
import random
from typing import List, Tuple


@dataclass
class Args:
    random_seed = None
    # torch
    batch_size = 64
    epochs = 10
    layers = 10
    layer_size = 16
    train_size = 0.7
    wandb = False
    # graph
    graph_size = 1000
    graph_shape = 'disc'
    rg_radius = 0.05
    # dataset manipulation
    ds_padded = True

args = Args()

In [3]:
import torch
from torch import nn
from torch.utils.data import Dataset, TensorDataset, DataLoader


# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu")
#device = "cpu"
print(f"using {device} device")

using cuda device


In [4]:
NodePosition = Tuple[float, float]
NodePositions = List[NodePosition]
NodeIndexPairs = List[Tuple[int, int]]

def gen_nodes(size: int, shape: str = "disc") -> NodePositions:
    if shape == 'disc':
        return __gen_nodes_disc(size)
    else:
        raise f'unsupported node shape: {shape}'


def __gen_nodes_disc(amount: int) -> NodePositions:
    points = []
    with tqdm(total=amount, desc="generating random-uniform nodes on disc") as pbar:
        while len(points) < amount:
            p = (random.uniform(0, 1), random.uniform(0, 1))
            d = (p[0] - 0.5, p[1] - 0.5)
            if math.sqrt(d[0] * d[0] + d[1] * d[1]) > 0.5:
                continue
            points.append(p)
            pbar.update(1)
    return points


def get_node_pairs(n_nodes: int) -> NodeIndexPairs:
    return [
        (i0, i1)
        for i0 in tqdm(range(n_nodes), desc="generating node pairs")
        for i1 in range(i0 + 1, n_nodes)
    ]


# https://stackoverflow.com/a/36460020/10619052
def list_to_dict(items: list) -> dict:
    return {v: k for v, k in enumerate(tqdm(items, desc="creating dict from list"))}

In [5]:
# Define graph builder
class RandomGeometricGraphBuilder:
    def __init__(self):
        # generate graph
        self.nodes = gen_nodes(args.graph_size, args.graph_shape)
        self.n_nodes = len(self.nodes)
        self.graph = nx.random_geometric_graph(
            self.n_nodes,
            args.rg_radius,
            pos=list_to_dict(self.nodes)
        )
        self.node_index_pairs = get_node_pairs(self.n_nodes)
        self.edges: NodeIndexPairs = [
            (i0, i1)
            for (i0, i1) in tqdm(self.node_index_pairs, desc="generating dataset labels from node pairs")
            if self.graph.has_edge(i0, i1)
        ]

In [6]:
# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        #self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(4, args.layer_size),
            nn.ReLU(),
            nn.Linear(args.layer_size, args.layer_size),
            nn.ReLU(),
            nn.Linear(args.layer_size, 2)
        )

    def forward(self, x):
        #x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [7]:
# Define evaluator
class EmbeddingEvaluator:
    def __init__(self, nodes: NodePositions, edges: NodeIndexPairs, embedding: NodePositions):
        self.nodes = nodes # used for 2D representation of graph (not used in training)
        self.n_nodes = len(nodes)
        self.node_index_pairs = get_node_pairs(self.n_nodes)
        self.edges = edges
        self.embedding = embedding
        # generate net
        self.reset_net()
        # generate dataset
        self.ds_values = torch.tensor([
            [*self.nodes[i0], *self.nodes[i1]] # type: [float, float, float, float]
            for (i0, i1) in tqdm(self.node_index_pairs, desc="generating dataset values from node pairs")
        ])
        self.ds_labels = torch.LongTensor([
            1 if (edge in self.edges) else 0
            for edge in tqdm(self.node_index_pairs, desc="generating dataset labels from node pairs")
        ])
        self.dataset = TensorDataset(self.ds_values, self.ds_labels)
        #? do we ant to over-fit?
        self.train_dataset, self.test_dataset = torch.utils.data.random_split(self.dataset, [args.train_size, 1 - args.train_size])
        self.train_dataloader = DataLoader(self.train_dataset, batch_size=args.batch_size, num_workers=0, shuffle=True)
        self.test_dataloader = DataLoader(self.test_dataset, batch_size=args.batch_size, num_workers=0, shuffle=False)


    def reset_net(self):
        self.net = NeuralNetwork().to(device)


    def train(self, loss_fn, optimizer):
        for epoch in range(args.epochs):
            with tqdm(total=len(self.train_dataloader), desc="starting model...") as pbar:
                pbar.set_description(f"Epoch {epoch + 1}")
                self.__train(pbar, loss_fn, optimizer)
                self.__test(pbar, loss_fn)


    def __train(self, pbar, loss_fn, optimizer):
        self.net.train()

        n_train_batches = len(self.train_dataloader)
        intv = int(n_train_batches / 100) # interval in which the pbar is updated (every 1%)
        for batch, (X, y) in enumerate(self.train_dataloader):
            X, y = X.to(device), y.to(device)
            # Compute prediction error
            pred = self.net(X)
            loss = loss_fn(pred, y)
            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # update progress
            if batch % intv == 0 or batch == n_train_batches - 1:
                pbar.update(batch - pbar.n)
                pbar.set_postfix_str(f"loss: {loss.item():>6f}")


    def __test(self, pbar, loss_fn):
        self.net.eval()

        n_test_batches = len(self.test_dataloader)
        n_test_values = len(self.test_dataloader.dataset)
        test_loss, correct = 0, 0
        pbar.set_postfix_str(f"evaluating epoch...")
        with torch.no_grad():
            for x, y in self.test_dataloader:
                x, y = x.to(device), y.to(device)
                pred = self.net(x)
                test_loss += loss_fn(pred, y).item()
                correct += (pred.argmax(1) == y).type(torch.float).sum().item()
        test_loss /= n_test_batches
        correct /= n_test_values
        pbar.set_postfix_str(f"epoch result: accuracy: {(100*correct):>0.1f}%, avg_loss: {test_loss:>8f}")


    def predict(self, embedding: NodePositions):
        self.net.eval()

        data = TensorDataset(torch.tensor(embedding))
        dataloader = DataLoader(data, num_workers=0, shuffle=False)

        predictions = []
        with torch.no_grad():
            for (x) in dataloader:
                x = x.to(device)
                pred = self.net(x)
                predictions.push(pred.cpu().detatch().numpy())

        return predictions

In [8]:
graph_builder = RandomGeometricGraphBuilder()
evaluator = EmbeddingEvaluator(
    graph_builder.nodes,
    graph_builder.edges,
    graph_builder.nodes # for random geometric graph, the structure defines the embedding
)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(evaluator.net.parameters(), lr=1e-3)

evaluator.train(loss_fn, optimizer)

#pred = evaluator.predict(graph_builder.nodes)
#print(pred)

print("Done!")

generating random-uniform nodes on disc:   0%|          | 0/1000 [00:00<?, ?it/s]

creating dict from list:   0%|          | 0/1000 [00:00<?, ?it/s]

generating node pairs:   0%|          | 0/1000 [00:00<?, ?it/s]

generating dataset labels from node pairs:   0%|          | 0/499500 [00:00<?, ?it/s]

generating node pairs:   0%|          | 0/1000 [00:00<?, ?it/s]

generating dataset values from node pairs:   0%|          | 0/499500 [00:00<?, ?it/s]

generating dataset labels from node pairs:   0%|          | 0/499500 [00:00<?, ?it/s]

starting model...:   0%|          | 0/5464 [00:00<?, ?it/s]

starting model...:   0%|          | 0/5464 [00:00<?, ?it/s]

starting model...:   0%|          | 0/5464 [00:00<?, ?it/s]

starting model...:   0%|          | 0/5464 [00:00<?, ?it/s]

starting model...:   0%|          | 0/5464 [00:00<?, ?it/s]

starting model...:   0%|          | 0/5464 [00:00<?, ?it/s]

starting model...:   0%|          | 0/5464 [00:00<?, ?it/s]

starting model...:   0%|          | 0/5464 [00:00<?, ?it/s]

starting model...:   0%|          | 0/5464 [00:00<?, ?it/s]

starting model...:   0%|          | 0/5464 [00:00<?, ?it/s]

Done!


In [9]:
def predict(self, edges: NodeIndexPairs):
    self.net.eval()

    #data = TensorDataset(torch.tensor(edges))
    data = edges
    dataloader = DataLoader(data, num_workers=0, shuffle=False)

    predictions = []
    with torch.no_grad():
        for x in tqdm(dataloader, desc="predicting"):
            pred = self.net(x.to(device))
            predictions.append(pred)
        #return self.net(edges.to(device))

    return predictions

pred = predict(evaluator, evaluator.ds_values)
print("prediced")

#evaluator.net(torch.tensor(graph_builder.nodes).to(device))

print(type(pred))

predicting:   0%|          | 0/499500 [00:00<?, ?it/s]

prediced


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [22]:
print(type(pred[0][0][0]))
pred[0]

<class 'torch.Tensor'>


tensor([[ 2.6341, -2.2398]], device='cuda:0')


    predictions = []
    with torch.no_grad():
        for x in dataloader:
            pred = self.net(x.to(device))
            predictions.append(pred.cpu().detatch().numpy())


args = Args()

graph_dataset = GraphDataset(args)
full_dataset = graph_dataset.dataset
train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [args.train_size, 1 - args.train_size]) ## do we ant to over-fit?

train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=0, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=0, shuffle=False)

model = NeuralNetwork().to(device)

# train & test
for epoch in range(args.epochs):
    with tqdm(total=len(train_dataloader), desc="starting model...") as pbar:
        pbar.set_description(f"Epoch {epoch + 1}")
        train(pbar, model, train_dataloader, loss_fn, optimizer)
        test(pbar, model, test_dataloader, loss_fn)


# predict & print
pred = predict(model, graph_dataset)