# Graph matrix subspace checking

In [1]:
import os
os.environ["MKL_NUM_THREADS"] = "8" 
#os.environ["NUMEXPR_NUM_THREADS"] = "8" 
#os.environ["OMP_NUM_THREADS"] = "8" 

In [2]:
from tqdm import tqdm 
import numpy as np
import os
import datetime
from threadpoolctl import threadpool_limits

from graph_utils import *

In [3]:
def get_rank_and_nullspace(A):
    
    _, v, d = np.linalg.svd(A)
        
    # такой выбор толеранса используется в матлабе и в нампае
    # https://numpy.org/doc/stable/reference/generated/numpy.linalg.matrix_rank.html
    tol = v.max() * max(A.shape) * np.finfo(A.dtype).eps
    nnz = (v >= tol).sum()
    
    return nnz, d[nnz:].T

In [4]:
def a_is_subspace_of_b(basis_a, basis_b):
    
    orig_rank, _ = get_rank_and_nullspace(basis_b)
    combined_rank, _ = get_rank_and_nullspace(np.hstack([basis_a, basis_b]))
    
    return orig_rank == combined_rank

In [5]:
def find_chain_starts(basis_set):
    
    # Sort by the dimensionality of subspaces
    basis_set = sorted(basis_set, key=lambda x: x[1])
    chain_starts = [basis_set[0]]
    
    # Go over basis sets and check whether their spaces are subspaces of some of chain_starts spaces
    for i in range(1, len(basis_set)):
        
        not_a_subspace = True
        
        for start in chain_starts:
            if a_is_subspace_of_b(basis_set[i][2], start[2]):
                not_a_subspace = False
                break
        
        if not_a_subspace:
            chain_starts.append(basis_set[i])
    
    return chain_starts

In [6]:
# Function for an online version of an algorithm
# chain_starts are supposed to be sorted by x[1]

def get_new_starts(chain_starts, new_basis):
    
    if len(chain_starts) == 0:
        chain_starts.append(new_basis)
        return
    
    checked_pos = 0
    
    # check if we are embedded in some space
    while checked_pos < len(chain_starts) and len(chain_starts[checked_pos][2][0]) >= len(new_basis[2][0]):
        if a_is_subspace_of_b(new_basis[2], chain_starts[checked_pos][2]):
            return
        checked_pos += 1
    
    # new basis is not embedded anywhere therefore we insert it
    chain_starts.insert(checked_pos, new_basis)
    checked_pos += 1
    
    # now we delete subspaces, that are embedded into a new one
    while checked_pos < len(chain_starts):
        if a_is_subspace_of_b(chain_starts[checked_pos][2], new_basis[2]):
            chain_starts.pop(checked_pos)
        else:
            checked_pos += 1

In [7]:
ALL_GRAPHS = get_graphs()
del ALL_GRAPHS['cycle']

In [8]:
ALL_GRAPHS = {'dupl': ALL_GRAPHS['dupl'], 'zigzag': ALL_GRAPHS['zigzag'], 'fork': ALL_GRAPHS['fork'], 
              'big_triag': ALL_GRAPHS['big_triag'], 'big_zig': ALL_GRAPHS['big_zig']}

In [9]:
%%time

for name, g in ALL_GRAPHS.items():

    print(name)
    I = find_stable_sets(g)
    A, var_matr, num_params = build_triple_equalities(I, g.shape[0])

    basis_set = []
    for i, addA in enumerate(build_variance_equalities_iterator(I, var_matr, num_params, only_nonequivalent=True)):
        basis_set.append([i, *get_rank_and_nullspace(np.array(A + addA[0], dtype=np.float64))])

    chain_starts = find_chain_starts(basis_set)

    print("{} x {}".format(len(A), len(A[0])), "{} x {}".format(len(addA[0]), len(addA[0][0])))
    print(len(chain_starts), "dimensions:", ",".join([str(len(x[2][0])) for x in chain_starts]))

zigzag
12 x 28 18 x 28
1 dimensions: 6
fork
11 x 28 16 x 28
1 dimensions: 7
dupl
10 x 28 14 x 28
1 dimensions: 8
big_triag
16 x 36 29 x 36
1 dimensions: 7
big_zig
16 x 36 29 x 36
1 dimensions: 7
CPU times: user 4min 39s, sys: 2.58 s, total: 4min 42s
Wall time: 1min 19s


In [10]:
%%time

for name, g in ALL_GRAPHS.items():
    
    print(name)
    I = find_stable_sets(g)
    A, var_matr, num_params = build_triple_equalities(I, g.shape[0])
    
    chain_starts = []
    for i, addA in enumerate(build_variance_equalities_iterator(I, var_matr, num_params, only_nonequivalent=True)):
        chain_starts.append([i, *get_rank_and_nullspace(np.array(A + addA[0], dtype=np.float64))])
        chain_starts = find_chain_starts(chain_starts)
    
    print("{} x {}".format(len(A), len(A[0])), "{} x {}".format(len(addA[0]), len(addA[0][0])))
    print(len(chain_starts), "dimensions:", ",".join([str(len(x[2][0])) for x in chain_starts]))

zigzag
12 x 28 18 x 28
1 dimensions: 6
fork
11 x 28 16 x 28
1 dimensions: 7
dupl
10 x 28 14 x 28
1 dimensions: 8
big_triag
16 x 36 29 x 36
1 dimensions: 7
big_zig
16 x 36 29 x 36
1 dimensions: 7
CPU times: user 6min 14s, sys: 3.18 s, total: 6min 18s
Wall time: 1min 34s


In [11]:
%%time

for name, g in ALL_GRAPHS.items():
    
    print(name)
    I = find_stable_sets(g)
    A, var_matr, num_params = build_triple_equalities(I, g.shape[0])
    
    chain_starts = []
    for i, addA in enumerate(build_variance_equalities_iterator(I, var_matr, num_params, only_nonequivalent=True)):
        get_new_starts(chain_starts, [i, *get_rank_and_nullspace(np.array(A + addA[0], dtype=np.float64))])
    
    print("{} x {}".format(len(A), len(A[0])), "{} x {}".format(len(addA[0]), len(addA[0][0])))
    print(len(chain_starts), "dimensions:", ",".join([str(len(x[2][0])) for x in chain_starts]))

big_triag
16 x 36 29 x 36
1 dimensions: 7
big_zig
16 x 36 29 x 36
1 dimensions: 7
fork
11 x 28 16 x 28
1 dimensions: 7
dupl
10 x 28 14 x 28
1 dimensions: 8
zigzag
12 x 28 18 x 28
1 dimensions: 6
CPU times: user 5min 41s, sys: 2.85 s, total: 5min 43s
Wall time: 1min 25s


In [None]:
## no aplas optimizations

# online (stupid): 1min 32s
# offline: 1min 18s
# online (smarter): 1min 25s 

# mkl is used, 4 cores

## limitting number of cores

#os.environ["MKL_NUM_THREADS"] = "1" 
#os.environ["NUMEXPR_NUM_THREADS"] = "1" 
#os.environ["OMP_NUM_THREADS"] = "1" 

# online (stupid): 1min 33s
# offline: 1min 20s
# online (smarter): 1min 26s

In [181]:
len(find_chain_starts(res))

1