In [1]:
import numpy as np
import scipy.sparse as sp

In [2]:
path_dimenet_data = '/home/kotobiam/dimenet/data/qm9_eV.npz'

In [3]:
data = np.load(path_dimenet_data, allow_pickle=True)

In [25]:
for k in data:
    print(k)

R
N
Z
id
A
B
C
mu
alpha
homo
lumo
gap
r2
zpve
U0
U
H
G
Cv
meta


In [85]:
N = data['N'][[0, 1, 5]]
N_cumsum = np.concatenate([[0], np.cumsum(data['N'])])
Z = np.zeros(np.sum(N), dtype=np.int32)
R = np.zeros([np.sum(N), 3], dtype=np.float32)
adj_matrices=[]

In [86]:
def bmat_fast(mats):
    new_data = np.concatenate([mat.data for mat in mats])

    ind_offset = np.zeros(1 + len(mats))

    ind_offset[1:] = np.cumsum([mat.shape[0] for mat in mats])

    new_indices = np.concatenate(
        [mats[i].indices + ind_offset[i] for i in range(len(mats))]
    )

    indptr_offset = np.zeros(1 + len(mats))
    indptr_offset[1:] = np.cumsum([mat.nnz for mat in mats])
    new_indptr = np.concatenate(
        [adj_matrices[i].indptr[i >= 1:] + indptr_offset[i] for i in range(len(mats))]
    )
  
    return sp.csr_matrix((new_data, new_indices, new_indptr))

In [87]:
nend=0
for k, i in enumerate([0, 1, 5]):
    n = N[k]
    nstart=nend
    nend=nstart+n
    
    s, e = (
       N_cumsum[i],
       N_cumsum[i+1]
    )
    
    Z[nstart:nend] = data['Z'][s:e]
    positions = data['R'][s:e]
    R[nstart:nend] = positions
    
    Dij = np.linalg.norm(positions[:, None, :] - positions[None, :, :], axis=-1)
    
    adj_matrices.append(sp.csr_matrix(Dij <= 3))

    adj_matrices[-1] -= sp.eye(n, dtype=np.bool)
    
adj_matrix = bmat_fast(adj_matrices)

idx_t, idx_s = adj_matrix.nonzero()

edge_ids = sp.csr_matrix(
    (np.arange(adj_matrix.nnz), (idx_t, idx_s)), 
    shape=adj_matrix.shape,
    dtype='int32'
)



In [92]:
edges

array([[ 0,  0,  0,  0,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  3,
         4,  4,  4,  4,  5,  5,  5,  6,  6,  6,  7,  7,  7,  8,  8,  8,
         9,  9,  9, 10, 10, 10, 11, 11, 11, 12, 12, 12],
       [ 1,  2,  3,  4,  0,  2,  3,  4,  0,  1,  3,  4,  0,  1,  2,  4,
         0,  1,  2,  3,  6,  7,  8,  5,  7,  8,  5,  6,  8,  5,  6,  7,
        10, 11, 12,  9, 11, 12,  9, 10, 12,  9, 10, 11]], dtype=int32)

In [91]:
edges = np.stack([idx_t, idx_s], axis=0)

In [93]:
mask = edges[0] < edges[1]

In [94]:
edges = edges[:, mask]

In [97]:
edges

array([[ 0,  0,  0,  0,  1,  1,  1,  2,  2,  3,  5,  5,  5,  6,  6,  7,
         9,  9,  9, 10, 10, 11],
       [ 1,  2,  3,  4,  2,  3,  4,  3,  4,  4,  6,  7,  8,  7,  8,  8,
        10, 11, 12, 11, 12, 12]], dtype=int32)

In [96]:
edges[::-1]

array([[ 1,  2,  3,  4,  2,  3,  4,  3,  4,  4,  6,  7,  8,  7,  8,  8,
        10, 11, 12, 11, 12, 12],
       [ 0,  0,  0,  0,  1,  1,  1,  2,  2,  3,  5,  5,  5,  6,  6,  7,
         9,  9,  9, 10, 10, 11]], dtype=int32)

In [98]:
idx_t, idx_s = np.concatenate([edges, edges[::-1]], axis=-1)

In [99]:
idx_t

array([ 0,  0,  0,  0,  1,  1,  1,  2,  2,  3,  5,  5,  5,  6,  6,  7,  9,
        9,  9, 10, 10, 11,  1,  2,  3,  4,  2,  3,  4,  3,  4,  4,  6,  7,
        8,  7,  8,  8, 10, 11, 12, 11, 12, 12], dtype=int32)

In [100]:
idx_s

array([ 1,  2,  3,  4,  2,  3,  4,  3,  4,  4,  6,  7,  8,  7,  8,  8, 10,
       11, 12, 11, 12, 12,  0,  0,  0,  0,  1,  1,  1,  2,  2,  3,  5,  5,
        5,  6,  6,  7,  9,  9,  9, 10, 10, 11], dtype=int32)

In [45]:
N_undir_edges = int(len(idx_s) / 2)

In [47]:
ind = np.arange(N_undir_edges)

In [101]:
id_swap = np.concatenate([ind + N_undir_edges, ind])

In [102]:
id_swap

array([22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
       39, 40, 41, 42, 43,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
       12, 13, 14, 15, 16, 17, 18, 19, 20, 21])

In [23]:
edge_ids[idx_t]

<44x13 sparse matrix of type '<class 'numpy.int32'>'
	with 152 stored elements in Compressed Sparse Row format>

In [103]:
id3_ba = edge_ids[idx_s].data.astype('int32').flatten()
id3_ca = edge_ids[idx_s].tocoo().row.astype('int32').flatten()

In [104]:
id3_i = idx_t[id3_ca]
id3_k = idx_s[id3_ba]

In [105]:
mask = id3_i != id3_k

In [107]:
id3_ba = id3_ba[mask]
id3_ca = id3_ca[mask]

In [109]:
id3_ca = id_swap[id3_ca]

In [111]:
idx_sorted = np.argsort(id3_ca)

In [112]:
id3_ca[idx_sorted]

array([ 0,  0,  0,  1,  1,  1,  2,  2,  2,  3,  3,  3,  4,  4,  4,  5,  5,
        6,  6,  6,  7,  7,  7,  7,  8,  8,  9,  9,  9,  9, 10, 10, 10, 11,
       11, 11, 12, 12, 12, 13, 13, 13, 14, 14, 14, 15, 15, 15, 16, 16, 16,
       17, 17, 17, 18, 18, 18, 19, 19, 19, 20, 20, 20, 21, 21, 21, 22, 22,
       22, 22, 23, 23, 23, 23, 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26,
       26, 27, 27, 27, 27, 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30,
       31, 31, 31, 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 35, 35, 35, 36,
       36, 36, 37, 37, 37, 38, 38, 38, 40, 40, 40, 41, 41, 41, 42, 43, 43])

In [113]:
id3_ba[idx_sorted]

array([ 3,  2,  1,  0,  2,  3,  0,  1,  3,  0,  1,  2,  5,  6,  7,  4,  6,
        7,  5,  4,  8,  9, 10, 11, 10, 11, 12, 13, 14, 15, 20, 21, 22, 22,
       21, 20, 22, 21, 20, 23, 24, 25, 23, 24, 25, 26, 27, 28, 33, 32, 34,
       33, 34, 32, 32, 33, 34, 35, 36, 37, 35, 36, 37, 40, 38, 39,  4,  5,
        6,  7,  9, 11,  8, 10, 12, 13, 14, 15, 16, 17, 18, 19, 11,  9,  8,
       10, 12, 13, 14, 15, 19, 17, 16, 18, 12, 13, 14, 15, 16, 18, 17, 19,
       17, 16, 19, 18, 23, 24, 25, 26, 27, 28, 29, 30, 31, 28, 26, 27, 30,
       31, 29, 29, 30, 31, 35, 36, 37, 41, 42, 43, 39, 38, 40, 43, 42, 41],
      dtype=int32)

# Understanding sparse matrix

In [33]:
Dij <= 1

array([[ True,  True,  True],
       [ True,  True, False],
       [ True, False,  True]])

In [31]:
sp.csr_matrix(Dij <= 1).data

array([ True,  True,  True,  True,  True,  True,  True])

In [32]:
sp.csr_matrix(Dij <= 1).indices

array([0, 1, 2, 0, 1, 0, 2], dtype=int32)

In [12]:
sp.csr_matrix(Dij <= 1).indptr[i >= 1:]

array([3, 5, 7], dtype=int32)

In [11]:
sp.csr_matrix(Dij <= 1).nnz

7

In [31]:
import dgl 

edgeid_to_target, edgeid_to_source = adj_matrix.nonzero()
test_g = dgl.graph((edgeid_to_target, edgeid_to_source))

In [32]:
test_g

Graph(num_nodes=12, num_edges=38,
      ndata_schemes={}
      edata_schemes={})

In [26]:
sp.csr_matrix(np.arange(adj_matrix.nnz)).toarray()

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
        16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
        32, 33, 34, 35, 36, 37]])

In [15]:
adj_matrices

new_data = np.concatenate([mat.data for mat in adj_matrices])

In [16]:
adj_matrices[0].toarray()

array([[False,  True,  True,  True,  True],
       [ True, False,  True,  True,  True],
       [ True,  True, False,  True,  True],
       [ True,  True,  True, False,  True],
       [ True,  True,  True,  True, False]])

In [17]:
test_mat = [[0. , 1.0919529 , 1.0919516 , 1.0919464], 
            [0. , 1.0919529 , 0 , 1.0919464],
           [0. , 1.0919529 , 1.0919516 , 1.0919464]]
test_mat = np.asarray(test_mat)
test_sp_mat = sp.csr_matrix(test_mat)

In [18]:
test_sp_mat.indices

array([1, 2, 3, 1, 3, 1, 2, 3], dtype=int32)

In [19]:
test_sp_mat.toarray()

array([[0.       , 1.0919529, 1.0919516, 1.0919464],
       [0.       , 1.0919529, 0.       , 1.0919464],
       [0.       , 1.0919529, 1.0919516, 1.0919464]])

In [211]:
test_sp_mat.indptr

array([0, 3, 5, 8], dtype=int32)

In [205]:
test_mat = np.asarray(test_mat)

In [207]:
test_mat.shape

(3, 4)

In [50]:
from ase import Atoms

In [45]:
Z

array([6, 1, 1, 1, 1, 7, 1, 1, 1, 8, 1, 1], dtype=int32)

In [51]:
type(Atoms(positions=R, numbers=Z))

ase.atoms.Atoms

In [63]:
np.savez('./test.npz', Z)

In [64]:
test = np.load('./test.npz')

In [65]:
for k in test:
    print(k)

arr_0


In [None]:
test_dict 

In [67]:
type(R)

numpy.ndarray