In [2]:
filename = "/home/nbiescas/Desktop/CVC/CVC_internship/omniglot.npz"
import numpy as np
import random
import webbrowser
import time
import networkx as nx
import matplotlib.pyplot as plt
load_data = np.load(filename, allow_pickle=True, encoding='latin1')


train_set = load_data['train']
valid_set = load_data['valid']
test_set = load_data['test']

def generate_grid():
    url = r"D:\NIL\OMNIGLOT\sample.svg"
    for _ in range(5):
        s_list = []
        for i in range(3):
            for j in range(3):
                s_list.append((random.choice(train_set), (i, j)))

        matrix = make_grid_svg(s_list=s_list)
        draw_strokes(matrix)
        webbrowser.open(url)
        time.sleep(0.5)

In [3]:
import dgl
import torch
def read_npz_files(filename):
    if (filename.split('.')[-1] != 'npz'):
        raise ValueError("Invalid file format")
    load_data = np.load(filename, allow_pickle=True, encoding='latin1')
    return load_data['train'], load_data['valid'], load_data['test']

def read_letter(data, self_loops = True):
    if self_loops:
        adj_matrix = np.identity(len(data)) 
    else:
        adj_matrix = np.zeros((len(data), len(data))) 
    node_feat  = np.zeros((len(data), 2)) # features (num_nodes x 2) 2 because the features are the cordinates of each node    
    
    x = 0
    y = 0
    for node_id, row in enumerate(data):
        x += row[0]
        y -= row[1]
        node_feat[node_id] = [x, y]
        if (node_id != 0):
            _, _, previous_lift = data[node_id - 1]
            if (previous_lift == 1):
                continue
            adj_matrix[node_id][node_id - 1] = 1
            adj_matrix[node_id - 1][node_id] = 1

    src, dst = np.nonzero(adj_matrix)
    g = dgl.graph((src, dst))
    g.ndata['feat'] = torch.from_numpy(node_feat)

    #G = nx.from_numpy_array(am) # Createa a graph from an adjacency matrix
    #nx.set_node_attributes(G, node_label, 'position')

    return g

In [4]:
import networkx as nx
import matplotlib.pyplot as plt

def plot_graph(G, position, ax):
    #fig, ax = plt.subplots(figsize=(5, 7))
      
    # Draw nodes and edges
    nx.draw_networkx_nodes(G, ax=ax, pos=position)
    nx.draw_networkx_edges(G, ax=ax, pos=position)
    
    # Add labels
    node_labels = {n: n for n in G.nodes()}
    nx.draw_networkx_labels(G, ax=ax, pos=position, labels=node_labels)
    
    # Customize plot appearance
    ax.set_title("Graph Visualization")
    ax.set_xlabel("X Axis")
    ax.set_ylabel("Y Axis")
    
    plt.tight_layout()

In [5]:
sample = train_set[random.randint(0, len(train_set))]
dgl_graph = read_letter(sample, self_loops = True)

def random_remove_edges(dgl_graph, prob = 0.1):
    num_edges = dgl_graph.num_edges()
    edge_ids = list(range(num_edges))
    random.shuffle(edge_ids)
    num_remove = int(num_edges * prob)
    edges_to_remove = edge_ids[:num_remove]
    return dgl.remove_edges(dgl_graph, edges_to_remove)


In [7]:
dgl_graph.num_nodes()

27

In [None]:
fig, idx = plt.subplots(3, 3, figsize=(12, 12))
for i in range(3):
    for j in range(3):
        axis = idx[i, j]
        sample = train_set[random.randint(0, len(train_set))]
        dgl_graph = random_remove_edges(read_letter(sample, self_loops = False), prob=0.4)
        G = dgl.to_networkx(dgl_graph)
        features = {node_id:row for node_id, row in enumerate(dgl_graph.ndata["feat"].numpy())}
        plot_graph(G, features, axis)

plt.show()

#draw_strokes(sample)
#webbrowser.open(r"D:\NIL\OMNIGLOT\sample.svg")

In [44]:
#Divide in train, test, validation.
import torch
from torch.utils.data import DataLoader

def collate(graphs):
    labels = list(map(lambda x: x.adj(), graphs))
    removed_edges = list(map(random_remove_edges, graphs))
    batched_graph = dgl.batch(removed_edges)
    return batched_graph, labels

trainset, validset, testset = read_npz_files(filename)

Train_Graphs = [read_letter(graph, self_loops=True) for graph in trainset]
Valid_Graphs = [read_letter(graph, self_loops=True) for graph in validset]
Test_Graphs  = [read_letter(graph, self_loops=True) for graph in testset]

# Define the three dataloaders. Train data will be shuffled at each epoch
train_loader = DataLoader(Train_Graphs, batch_size=1, shuffle=True,
                         collate_fn=collate)
valid_loader = DataLoader(Valid_Graphs, batch_size=1, collate_fn=collate)
test_loader = DataLoader(Test_Graphs, batch_size=1, collate_fn=collate)

In [24]:
#Divide in train, test, validation.
import torch
from torch.utils.data import DataLoader

def collate(graphs):
    print(graphs)
    labels = list(map(lambda x: x.adj(), graphs))
    removed_edges = list(map(random_remove_edges, graphs))
    batched_graph = dgl.batch(removed_edges)
    return batched_graph, labels

In [25]:
trainset, validset, testset = read_npz_files(filename)
Train_Graphs = [read_letter(graph) for graph in trainset[:10]]
Valid_Graphs = [read_letter(graph) for graph in validset[:10]]
Test_Graphs  = [read_letter(graph) for graph in testset[:10]]

train_loader = DataLoader(Train_Graphs, batch_size=32, shuffle=True,
                     collate_fn=collate)
val_loader = DataLoader(Valid_Graphs, batch_size=32, collate_fn=collate)
test_loader  = DataLoader(Test_Graphs, batch_size=32, collate_fn=collate)



In [6]:
from VGAE import VGAE
from Data_Loaders import loaders
from pathlib import Path
BASE_DIR = Path("/home/nbiescas/Desktop/CVC/CVC_internship") #or Path().absolute()
DATA_PATH = BASE_DIR / "omniglot.npz"

In [16]:
model = VGAE(2, 10).double()
train_loader, val_loader, test_loader = loaders(DATA_PATH)

In [20]:
graph, _ = next(iter(train_loader))

In [23]:
graph.number_of_edges()

35

In [24]:
total_edges = 0
for graph, _ in train_loader:
    total_edges += graph.number_of_edges()

In [25]:
total_edges

648

In [10]:
import torch

In [15]:
a = torch.tensor([[1, 0, 1], [0, 0, 1]])
b = torch.tensor([[1, 0, 1], [0, 0, 1]])
torch.sum(a == b)

tensor(6)

In [43]:
graph, adjacenc = next(iter(train_loader))

In [None]:
model(graph, graph.ndata['feat'])

In [33]:
train(train_loader, val_loader, model)

RuntimeError: expected scalar type Float but found Double

In [38]:
model(graph, features)


(tensor([[1., 0., 0., 1., 1., 1., 0., 1., 0.],
         [0., 1., 0., 1., 1., 1., 1., 0., 0.],
         [0., 0., 1., 0., 0., 0., 1., 0., 1.],
         [1., 1., 0., 1., 1., 1., 0., 1., 0.],
         [1., 1., 0., 1., 1., 1., 0., 1., 0.],
         [1., 1., 0., 1., 1., 1., 0., 1., 0.],
         [0., 1., 1., 0., 0., 0., 1., 0., 1.],
         [1., 0., 0., 1., 1., 1., 0., 1., 0.],
         [0., 0., 1., 0., 0., 0., 1., 0., 1.]], dtype=torch.float64,
        grad_fn=<SigmoidBackward0>),
 tensor([[ -28.2817,  -23.6174,  -56.5341,   32.1436,   93.1186,  -60.2646,
            -3.8599,   34.5649,   21.1156,   28.5343],
         [ -59.9945,  -50.1001, -119.9269,   68.1869,  197.5345, -127.8404,
            -8.1882,   73.3232,   44.7930,   60.5305],
         [  -6.3851,  -38.4946,  -25.0240,    4.4134,    3.8723,  -16.2399,
             0.6061,   30.0871,   24.2464,  -43.6818],
         [ -16.4867,  -90.5485,  -61.3426,   12.1542,   14.5739,  -41.2296,
             1.1709,   71.7420,   57.4091,  -99.4