In [1]:
import dgl
import torch as th

Using backend: pytorch


In [2]:
graph_data = {
  ('drug', 'interacts', 'drug'): (th.tensor([0, 1]), th.tensor([1, 2])),
   ('drug', 'interacts', 'gene'): (th.tensor([0, 1]), th.tensor([2, 3])),
    ('drug', 'treats', 'disease'): (th.tensor([1]), th.tensor([2]))
}

In [3]:
graph_data

{('drug', 'interacts', 'drug'): (tensor([0, 1]), tensor([1, 2])),
 ('drug', 'interacts', 'gene'): (tensor([0, 1]), tensor([2, 3])),
 ('drug', 'treats', 'disease'): (tensor([1]), tensor([2]))}

In [4]:
g = dgl.heterograph(graph_data)

In [5]:
g.ntypes

['disease', 'drug', 'gene']

In [6]:
g.etypes

['interacts', 'interacts', 'treats']

In [7]:
g.canonical_etypes

[('drug', 'interacts', 'drug'),
 ('drug', 'interacts', 'gene'),
 ('drug', 'treats', 'disease')]

**A homogeneous graph**

dgl.heterograph({('node_type', 'edge_type', 'node_type'): (u, v)})

**A bipartite graph**

dgl.heterograph({('source_type', 'edge_type', 'destination_type'): (u, v)})

In [10]:
g

Graph(num_nodes={'disease': 3, 'drug': 3, 'gene': 4},
      num_edges={('drug', 'interacts', 'drug'): 2, ('drug', 'interacts', 'gene'): 2, ('drug', 'treats', 'disease'): 1},
      metagraph=[('drug', 'drug', 'interacts'), ('drug', 'gene', 'interacts'), ('drug', 'disease', 'treats')])

In [11]:
g.num_nodes()

10

In [12]:
g.metagraph().edges()

OutMultiEdgeDataView([('drug', 'drug'), ('drug', 'gene'), ('drug', 'disease')])

In [13]:
# Get the number of all nodes in the graph
g.num_nodes()

10

In [14]:
# Get the number of drug nodes
g.num_nodes('drug')

3

In [15]:
# Nodes of different types have separate IDs, hence not well-defined without a type specified
g.nodes()

DGLError: Node type name must be specified if there are more than one node types.

In [16]:
 g.nodes('drug')

tensor([0, 1, 2])

In [18]:
g.nodes('gene')

tensor([0, 1, 2, 3])

To set/get features for a specific node/edge type, DGL provides two new types of syntax – g.nodes[‘node_type’].data[‘feat_name’] and g.edges[‘edge_type’].data[‘feat_name’].

In [19]:
# Set/get feature 'hv' for nodes of type 'drug'
g.nodes['drug'].data['hv'] = th.ones(3, 1)

In [20]:
g.nodes['drug'].data['hv']

tensor([[1.],
        [1.],
        [1.]])

#Set/get feature 'he' for edge of type 'treats'

In [21]:
g.edges['treats'].data['he'] = th.zeros(1, 1)

In [22]:
g.edges['treats'].data['he']

tensor([[0.]])

<h4> Edge type Subgraph </h4>
Retain relations ('drug', 'interacts', 'drug') and ('drug', 'treats', 'disease'). 
All nodes for 'drug' and 'disease' will be retained

In [24]:
eg = dgl.edge_type_subgraph(g, [('drug', 'interacts', 'drug'),
                         ('drug', 'treats', 'disease')])

In [25]:
eg

Graph(num_nodes={'disease': 3, 'drug': 3},
      num_edges={('drug', 'interacts', 'drug'): 2, ('drug', 'treats', 'disease'): 1},
      metagraph=[('drug', 'drug', 'interacts'), ('drug', 'disease', 'treats')])

In [26]:
eg.nodes['drug'].data['hv']

tensor([[1.],
        [1.],
        [1.]])

Heterographs provide a clean interface for managing nodes/edges of different types and their associated features. 
This is particularly helpful when:

    The features for nodes/edges of different types have different data types or sizes.

    We want to apply different operations to nodes/edges of different types.
    
If the above conditions do not hold and one does not want to distinguish node/edge types in modeling, then DGL allows converting a heterogeneous graph to a homogeneous graph with dgl.DGLGraph.to_homogeneous() API. It proceeds as follows:

    Relabels nodes/edges of all types using consecutive integers starting from 0

    Merges the features across node/edge types specified by the user.

In [27]:
g = dgl.heterograph({
...    ('drug', 'interacts', 'drug'): (th.tensor([0, 1]), th.tensor([1, 2])),
...    ('drug', 'treats', 'disease'): (th.tensor([1]), th.tensor([2]))})

In [28]:
g

Graph(num_nodes={'disease': 3, 'drug': 3},
      num_edges={('drug', 'interacts', 'drug'): 2, ('drug', 'treats', 'disease'): 1},
      metagraph=[('drug', 'drug', 'interacts'), ('drug', 'disease', 'treats')])

In [30]:
g.nodes['drug'].data['hv'] = th.zeros(3, 1)
g.nodes['disease'].data['hv'] = th.ones(3, 1)
g.edges['interacts'].data['he'] = th.zeros(2, 1)
g.edges['treats'].data['he'] = th.zeros(1, 2)

In [31]:
# By default, it does not merge any features
hg = dgl.to_homogeneous(g)
'hv' in hg.ndata

False

In [34]:
hg.ndata

{'_TYPE': tensor([0, 0, 0, 1, 1, 1]), '_ID': tensor([0, 1, 2, 0, 1, 2])}

Copy edge features
For feature copy, it expects features to have  the same size and dtype across node/edge types

In [35]:
hg = dgl.to_homogeneous(g, edata=['he'])

DGLError: Cannot concatenate column he with shape Scheme(shape=(2,), dtype=torch.float32) and shape Scheme(shape=(1,), dtype=torch.float32)

In [36]:
# Copy node features
hg = dgl.to_homogeneous(g, ndata=['hv'])
hg.ndata['hv']

tensor([[1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.]])

<h3> Second Example from old code </h3>

In [37]:
import dgl
import numpy as np

ratings = dgl.heterograph(
    {('user', '+1', 'movie') : (np.array([0, 0, 1]), np.array([0, 1, 0])),
     ('user', '-1', 'movie') : (np.array([2]), np.array([1]))})

In [38]:
ratings

Graph(num_nodes={'movie': 2, 'user': 3},
      num_edges={('user', '+1', 'movie'): 3, ('user', '-1', 'movie'): 1},
      metagraph=[('user', 'movie', '+1'), ('user', 'movie', '-1')])

 Manipulating heterograph

You can create a more realistic heterograph using the ACM dataset. To do this, first 
download the dataset as follows:

In [44]:
import scipy.io
import urllib.request

# data_url = 'https://data.dgl.ai/dataset/ACM.mat'
# data_file_path = '/tmp/ACM.mat'

# urllib.request.urlretrieve(data_url, data_file_path)
data = scipy.io.loadmat('ACM.mat')
print(list(data.keys()))

['__header__', '__version__', '__globals__', 'TvsP', 'PvsA', 'PvsV', 'AvsF', 'VvsC', 'PvsL', 'PvsC', 'A', 'C', 'F', 'L', 'P', 'T', 'V', 'PvsT', 'CNormPvsA', 'RNormPvsA', 'CNormPvsC', 'RNormPvsC', 'CNormPvsT', 'RNormPvsT', 'CNormPvsV', 'RNormPvsV', 'CNormVvsC', 'RNormVvsC', 'CNormAvsF', 'RNormAvsF', 'CNormPvsL', 'RNormPvsL', 'stopwords', 'nPvsT', 'nT', 'CNormnPvsT', 'RNormnPvsT', 'nnPvsT', 'nnT', 'CNormnnPvsT', 'RNormnnPvsT', 'PvsP', 'CNormPvsP', 'RNormPvsP']


The dataset stores node information by their types: ``P`` for paper, ``A``
 for author, ``C`` for conference, ``L`` for subject code, and so on. The relationships are stored as SciPy sparse matrix under key ``XvsY``, where ``X`` and ``Y`` could be any of the node type code.

The following code prints out some statistics about the paper-author relationships.

In [45]:
print(type(data['PvsA']))
print('#Papers:', data['PvsA'].shape[0])
print('#Authors:', data['PvsA'].shape[1])
print('#Links:', data['PvsA'].nnz)

<class 'scipy.sparse.csc.csc_matrix'>
#Papers: 12499
#Authors: 17431
#Links: 37055


In [47]:
# Converting this SciPy matrix to a heterograph in DGL is straightforward.

pa_g = dgl.heterograph({('paper', 'written-by', 'author') : data['PvsA'].nonzero()})

In [48]:
pa_g

Graph(num_nodes={'author': 17431, 'paper': 12499},
      num_edges={('paper', 'written-by', 'author'): 37055},
      metagraph=[('paper', 'author', 'written-by')])

In [49]:
# You can easily print out the type names and other structural information.

print('Node types:', pa_g.ntypes)
print('Edge types:', pa_g.etypes)
print('Canonical edge types:', pa_g.canonical_etypes)

Node types: ['author', 'paper']
Edge types: ['written-by']
Canonical edge types: [('paper', 'written-by', 'author')]


In [52]:
# Nodes and edges are assigned integer IDs starting from zero and each type has its own counting.
# To distinguish the nodes and edges of different types, specify the type name as the argument.
print(pa_g.number_of_nodes('paper'))

12499


In [53]:
# Canonical edge type name can be shortened to only one edge type name if it is
# uniquely distinguishable.
print(pa_g.number_of_edges(('paper', 'written-by', 'author')))
print(pa_g.number_of_edges('written-by'))
print(pa_g.successors(1, etype='written-by'))  # get the authors that write paper #1

37055
37055
tensor([3532, 6421, 8516, 8560])


In [54]:
# Type name argument could be omitted whenever the behavior is unambiguous.
print(pa_g.number_of_edges())  # Only one edge type, the edge type argument could be omitted

37055


In [55]:
# A homogeneous graph is just a special case of a heterograph with only one type
# of node and edge.

# Paper-citing-paper graph is a homogeneous graph
pp_g = dgl.heterograph({('paper', 'citing', 'paper') : data['PvsP'].nonzero()})
# equivalent (shorter) API for creating homogeneous graph
pp_g = dgl.from_scipy(data['PvsP'])

# All the ntype and etype arguments could be omitted because the behavior is unambiguous.
print(pp_g.number_of_nodes())
print(pp_g.number_of_edges())
print(pp_g.successors(3))

12499
30789
tensor([1361, 2624, 8670, 9845])


In [56]:
dir(pp_g)

['__class__',
 '__contains__',
 '__copy__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_batch_num_edges',
 '_batch_num_nodes',
 '_canonical_etypes',
 '_dsttypes_invmap',
 '_edge_frames',
 '_etype2canonical',
 '_etypes',
 '_etypes_invmap',
 '_find_etypes',
 '_get_e_repr',
 '_get_n_repr',
 '_graph',
 '_idtype_str',
 '_init',
 '_is_unibipartite',
 '_node_frames',
 '_ntypes',
 '_pop_e_repr',
 '_pop_n_repr',
 '_reset_cached_info',
 '_set_e_repr',
 '_set_n_repr',
 '_srctypes_invmap',
 'add_edge',
 'add_edges',
 'add_nodes',
 'add_self_loop',
 'adj',
 'adjacency_matrix',
 'adjacency_matrix_scipy',
 'all_edges',
 'apply_edges',
 'apply_node

In [60]:
pp_g.srcnodes()

tensor([    0,     1,     2,  ..., 12496, 12497, 12498])

In [61]:
pp_g.edges()

(tensor([ 2193,  2250,  2256,  ...,  8351,  8362, 12497]),
 tensor([    0,     0,     0,  ..., 12498, 12498, 12498]))

In [75]:
pp_g.predecessors(0)

tensor([2193, 2250, 2256, 2710, 2778, 3111, 3114, 3853, 3889, 3937, 4204, 4242,
        5479, 5496])

In [80]:
pp_g.metagraph()

<networkx.classes.multidigraph.MultiDiGraph at 0x238771b2888>

In [76]:
# Create a subset of the ACM graph using the paper-author, paper-paper, 
# and paper-subject relationships.  Meanwhile, also add the reverse
# relationship to prepare for the later sections.

G = dgl.heterograph({
        ('paper', 'written-by', 'author') : data['PvsA'].nonzero(),
        ('author', 'writing', 'paper') : data['PvsA'].transpose().nonzero(),
        ('paper', 'citing', 'paper') : data['PvsP'].nonzero(),
        ('paper', 'cited', 'paper') : data['PvsP'].transpose().nonzero(),
        ('paper', 'is-about', 'subject') : data['PvsL'].nonzero(),
        ('subject', 'has', 'paper') : data['PvsL'].transpose().nonzero(),
    })
print(G)

Graph(num_nodes={'author': 17431, 'paper': 12499, 'subject': 73},
      num_edges={('author', 'writing', 'paper'): 37055, ('paper', 'cited', 'paper'): 30789, ('paper', 'citing', 'paper'): 30789, ('paper', 'is-about', 'subject'): 12499, ('paper', 'written-by', 'author'): 37055, ('subject', 'has', 'paper'): 12499},
      metagraph=[('author', 'paper', 'writing'), ('paper', 'paper', 'cited'), ('paper', 'paper', 'citing'), ('paper', 'subject', 'is-about'), ('paper', 'author', 'written-by'), ('subject', 'paper', 'has')])


In [78]:
dir(G)

['__class__',
 '__contains__',
 '__copy__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_batch_num_edges',
 '_batch_num_nodes',
 '_canonical_etypes',
 '_dsttypes_invmap',
 '_edge_frames',
 '_etype2canonical',
 '_etypes',
 '_etypes_invmap',
 '_find_etypes',
 '_get_e_repr',
 '_get_n_repr',
 '_graph',
 '_idtype_str',
 '_init',
 '_is_unibipartite',
 '_node_frames',
 '_ntypes',
 '_pop_e_repr',
 '_pop_n_repr',
 '_reset_cached_info',
 '_set_e_repr',
 '_set_n_repr',
 '_srctypes_invmap',
 'add_edge',
 'add_edges',
 'add_nodes',
 'add_self_loop',
 'adj',
 'adjacency_matrix',
 'adjacency_matrix_scipy',
 'all_edges',
 'apply_edges',
 'apply_node

In [81]:
import pygraphviz as pgv

ModuleNotFoundError: No module named 'pygraphviz'

In [None]:
# Learning tasks associated with heterographs
# -------------------------------------------
# Some of the typical learning tasks that involve heterographs include:
#
# * *Node classification and regression* to predict the class of each node or
#   estimate a value associated with it.
#
# * *Link prediction* to predict if there is an edge of a certain
#   type between a pair of nodes, or predict which other nodes a particular
#   node is connected with (and optionally the edge types of such connections).
#
# * *Graph classification/regression* to assign an entire
#   heterograph into one of the target classes or to estimate a numerical
#   value associated with it.
#
# In this tutorial, we designed a simple example for the first task.
#
# A semi-supervised node classification example

# Our goal is to predict the publishing conference of a paper using the ACM
# academic graph we just created. To further simplify the task, we only focus
# on papers published in three conferences: *KDD*, *ICML*, and *VLDB*. All
# the other papers are not labeled, making it a semi-supervised setting.
#
# The following code extracts those papers from the raw dataset and prepares 
# the training, validation, testing split.

In [82]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

pvc = data['PvsC'].tocsr()
# find all papers published in KDD, ICML, VLDB
c_selected = [0, 11, 13]  # KDD, ICML, VLDB
p_selected = pvc[:, c_selected].tocoo()
# generate labels
labels = pvc.indices
labels[labels == 11] = 1
labels[labels == 13] = 2
labels = torch.tensor(labels).long()

In [96]:
# generate train/val/test split
pid = p_selected.row
shuffle = np.random.permutation(pid)
train_idx = torch.tensor(shuffle[0:800]).long()
val_idx = torch.tensor(shuffle[800:900]).long()
test_idx = torch.tensor(shuffle[900:]).long()

In [84]:
labels.shape

torch.Size([12499])

In [85]:
dir(pvc)

['__abs__',
 '__add__',
 '__array_priority__',
 '__bool__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__div__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__idiv__',
 '__imul__',
 '__init__',
 '__init_subclass__',
 '__isub__',
 '__iter__',
 '__itruediv__',
 '__le__',
 '__len__',
 '__lt__',
 '__matmul__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__nonzero__',
 '__pow__',
 '__radd__',
 '__rdiv__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmatmul__',
 '__rmul__',
 '__round__',
 '__rsub__',
 '__rtruediv__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__str__',
 '__sub__',
 '__subclasshook__',
 '__truediv__',
 '__weakref__',
 '_add_dense',
 '_add_sparse',
 '_arg_min_or_max',
 '_arg_min_or_max_axis',
 '_asindices',
 '_binopt',
 '_cs_matrix__get_has_canonical_format',
 '_cs_matrix__get_sorted',
 '_cs_matrix__set_has_canonical_format',
 '_cs_matr

In [87]:
pvc

<12499x14 sparse matrix of type '<class 'numpy.float64'>'
	with 12499 stored elements in Compressed Sparse Row format>

In [88]:
p_selected1 = pvc.tocoo()

In [90]:
p_selected

<12499x3 sparse matrix of type '<class 'numpy.float64'>'
	with 2094 stored elements in COOrdinate format>

In [95]:
pvc.todok()

<12499x14 sparse matrix of type '<class 'numpy.float64'>'
	with 12499 stored elements in Dictionary Of Keys format>

In [62]:
pp_g.dstnodes()

tensor([    0,     1,     2,  ..., 12496, 12497, 12498])

In [62]:
pp_g.dstnodes()

tensor([    0,     1,     2,  ..., 12496, 12497, 12498])

In [62]:
pp_g.dstnodes()

tensor([    0,     1,     2,  ..., 12496, 12497, 12498])

In [104]:
test_idx

tensor([ 5998, 10805,  8518,  ...,  8500, 10745,  7262])

<h3>Relational-GCN on heterograph</h3>

We use `Relational-GCN <https://arxiv.org/abs/1703.06103>`_ to learn the
 representation of nodes in the graph. Its message-passing equation is as
 follows:

 $   h_i^{(l+1)} = \sigma\left(\sum_{r\in \mathcal{R}}
    \sum_{j\in\mathcal{N}_r(i)}W_r^{(l)}h_j^{(l)}\right)$

 Breaking down the equation, you see that there are two parts in the
 computation.

 (i) Message computation and aggregation within each relation :math:`r`

 (ii) Reduction that merges the results from multiple relationships

 Following this intuition, perform message passing on a heterograph in
 two steps.

 (i) Per-edge-type message passing

 (ii) Type wise reduction

In [98]:
import dgl.function as fn

class HeteroRGCNLayer(nn.Module):
    def __init__(self, in_size, out_size, etypes):
        super(HeteroRGCNLayer, self).__init__()
        # W_r for each relation
        self.weight = nn.ModuleDict({
                name : nn.Linear(in_size, out_size) for name in etypes
            })

    def forward(self, G, feat_dict):
        # The input is a dictionary of node features for each type
        funcs = {}
        for srctype, etype, dsttype in G.canonical_etypes:
            # Compute W_r * h
            Wh = self.weight[etype](feat_dict[srctype])
            # Save it in graph for message passing
            G.nodes[srctype].data['Wh_%s' % etype] = Wh
            # Specify per-relation message passing functions: (message_func, reduce_func).
            # Note that the results are saved to the same destination feature 'h', which
            # hints the type wise reducer for aggregation.
            funcs[etype] = (fn.copy_u('Wh_%s' % etype, 'm'), fn.mean('m', 'h'))
        # Trigger message passing of multiple types.
        # The first argument is the message passing functions for each relation.
        # The second one is the type wise reducer, could be "sum", "max",
        # "min", "mean", "stack"
        G.multi_update_all(funcs, 'sum')
        # return the updated node feature dictionary
        return {ntype : G.nodes[ntype].data['h'] for ntype in G.ntypes}

In [99]:
# Create a simple GNN by stacking two ``HeteroRGCNLayer``. Since the
# nodes do not have input features, make their embeddings trainable.

class HeteroRGCN(nn.Module):
    def __init__(self, G, in_size, hidden_size, out_size):
        super(HeteroRGCN, self).__init__()
        # Use trainable node embeddings as featureless inputs.
        embed_dict = {ntype : nn.Parameter(torch.Tensor(G.number_of_nodes(ntype), in_size))
                      for ntype in G.ntypes}
        for key, embed in embed_dict.items():
            nn.init.xavier_uniform_(embed)
        self.embed = nn.ParameterDict(embed_dict)
        # create layers
        self.layer1 = HeteroRGCNLayer(in_size, hidden_size, G.etypes)
        self.layer2 = HeteroRGCNLayer(hidden_size, out_size, G.etypes)

    def forward(self, G):
        h_dict = self.layer1(G, self.embed)
        h_dict = {k : F.leaky_relu(h) for k, h in h_dict.items()}
        h_dict = self.layer2(G, h_dict)
        # get paper logits
        return h_dict['paper']


In [106]:
# Train and evaluate
# ~~~~~~~~~~~~~~~~~~
# Train and evaluate this network.

# Create the model. The output has three logits for three classes.
model = HeteroRGCN(G, 10, 10, 3)

opt = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

best_val_acc = 0
best_test_acc = 0

for epoch in range(100):
    logits = model(G)
    # The loss is computed only for labeled nodes.
    loss = F.cross_entropy(logits[train_idx], labels[train_idx])

    pred = logits.argmax(1)
    train_acc = (pred[train_idx] == labels[train_idx]).float().mean()
    val_acc = (pred[val_idx] == labels[val_idx]).float().mean()
    test_acc = (pred[test_idx] == labels[test_idx]).float().mean()

    if best_val_acc < val_acc:
        best_val_acc = val_acc
        best_test_acc = test_acc

    opt.zero_grad()
    loss.backward()
    opt.step()

    if epoch % 5 == 0:
        print('Loss %.4f, Train Acc %.4f, Val Acc %.4f (Best %.4f), Test Acc %.4f (Best %.4f)' % (
            loss.item(),
            train_acc.item(),
            val_acc.item(),
            best_val_acc.item(),
            test_acc.item(),
            best_test_acc.item(),
        ))


Loss 1.1476, Train Acc 0.2825, Val Acc 0.2600 (Best 0.2600), Test Acc 0.2764 (Best 0.2764)
Loss 0.9114, Train Acc 0.5075, Val Acc 0.4700 (Best 0.5100), Test Acc 0.5092 (Best 0.5544)
Loss 0.7546, Train Acc 0.5512, Val Acc 0.4700 (Best 0.5100), Test Acc 0.5117 (Best 0.5544)
Loss 0.5548, Train Acc 0.7550, Val Acc 0.6400 (Best 0.6400), Test Acc 0.6759 (Best 0.6759)
Loss 0.3889, Train Acc 0.9137, Val Acc 0.6600 (Best 0.6600), Test Acc 0.7127 (Best 0.7127)
Loss 0.2679, Train Acc 0.9613, Val Acc 0.7100 (Best 0.7100), Test Acc 0.7680 (Best 0.7680)
Loss 0.1778, Train Acc 0.9862, Val Acc 0.7500 (Best 0.7500), Test Acc 0.7688 (Best 0.7688)
Loss 0.1189, Train Acc 0.9937, Val Acc 0.7500 (Best 0.7600), Test Acc 0.7730 (Best 0.7714)
Loss 0.0821, Train Acc 0.9950, Val Acc 0.7400 (Best 0.7600), Test Acc 0.7596 (Best 0.7714)
Loss 0.0587, Train Acc 0.9987, Val Acc 0.7200 (Best 0.7600), Test Acc 0.7588 (Best 0.7714)
Loss 0.0441, Train Acc 1.0000, Val Acc 0.7300 (Best 0.7600), Test Acc 0.7613 (Best 0.7714)