# InClose
---

In [1]:
from collections import defaultdict, Counter
from contextlib import redirect_stdout
from IPython.display import SVG
import matplotlib.pyplot as plt
import numpy as np
import pickle
from random import random
import seaborn as sns
from scipy import sparse, special

from sknetwork.clustering import get_modularity, KMeans, Louvain
from sknetwork.data import load_netset, from_edge_list
from sknetwork.embedding import Spectral, SVD
from sknetwork.gnn import GNNClassifier
from sknetwork.ranking import top_k
from sknetwork.topology import Triangles, Cliques, get_connected_components
from sknetwork.utils import get_degrees, get_membership, get_neighbors
from sknetwork.utils.kmeans import KMeansDense
from sknetwork.utils.check import get_probs
from sknetwork.visualization import svg_graph

from tfidf import TfIdf

In [2]:
wikivitals = load_netset('wikivitals')

Parsing files...
Done.


In [3]:
adjacency = wikivitals.adjacency
biadjacency = wikivitals.biadjacency
names = wikivitals.names
words = wikivitals.names_col
labels = wikivitals.labels

In [4]:
print(adjacency.shape, adjacency.nnz)
print(biadjacency.shape, biadjacency.nnz)

(10011, 10011) 824999
(10011, 37845) 1363301


In [5]:
def plot_distrib(values, ax, xlabel='degree', log=True, title=None, color='blue', alpha=1, label='Degree distribution'):
    counter = Counter(values)
    x, y = zip(*counter.items())
    ax.scatter(x, y, marker='+', color=color, alpha=alpha, label=label)
    ax.legend()
    ax.set_title(title)
    if log:
        ax.set_xscale('log')
        ax.set_xlabel(xlabel + ' (log)')
    else:
        ax.set_xlabel(xlabel)
    ax.set_ylabel('Frequency' + ' (log)')
    ax.set_yscale('log')

In [6]:
def is_cannonical(context, extents, intents, r, y):
    global r_new

    for k in range(len(intents[r])-1, -1, -1):
        for j in range(y, intents[r][k], -1):            
            for h in range(len(extents[r_new])):
                if context[extents[r_new][h], j] == 0:
                    h -= 1 # Necessary for next test in case last interaction of h for-loop returns False
                    break
            if h == len(extents[r_new]) - 1:
                return False
        y = intents[r][k] - 1

    for j in reversed(range(y, -1, -1)):
        for h in range(len(extents[r_new])):
            if context[extents[r_new][h], j] == 0:
                h -= 1 # Necessary for next test in case last interaction of h for-loop returns False
                break
        if h == len(extents[r_new]) - 1:
            return False
    
    return True

def extension(attrs, context):
    ext = set()
    if len(attrs) == 0:
        return np.arange(0, context.shape[0])
    for a in attrs:
        nodes = set(get_neighbors(context, node=a, transpose=True))
        if len(ext) == 0:
            ext.update(nodes)
        else:
            ext &= nodes
        if len(ext) == 0:
            break
    return np.asarray(list(ext))

def intention(nodes, context):
    intent = set()
    if len(nodes) == 0:
        return np.arange(0, context.shape[1])
    for n in nodes:
        attrs = set(get_neighbors(context, node=n))
        if len(intent) == 0:
            intent.update(attrs)
        else:
            intent &= attrs
        if len(attrs) == 0:
            break
    return np.asarray(list(intent))

In [9]:
def init_inclose(context):
    extents, intents = [], []
    extents_init = np.arange(context.shape[0])
    intents_init = []
    extents.append(extents_init) # Initalize extents with all objects from context
    intents.append(intents_init) # Initialize intents with empty set attributes
    return extents, intents

def in_close(adjacency, context, extents, intents, r=0, y=0, min_support=0, max_support=np.inf, names_col=[]):
    global r_new
    r_new = r_new + 1
    
    print(f'NEW ITERATION \n --------')
    print(f'r: {r} - r_new: {r_new}')
    # ------------------------------------------------
    print(f'|extents[r]|: {len(extents[r])} - intents[r]: {names_col[intents[r]]}')
    
    for j in np.arange(context.shape[1])[y:]:

        try:
            extents[r_new] = []
        except IndexError:
            extents.append([])

        # Form a new extent by adding extension of attribute j to current concept extent
        ext_j = set(extension([j], context))
        extents[r_new] = list(sorted(set(extents[r]).intersection(ext_j)))
        len_new_extent = len(extents[r_new])
        
        print(f'|extents|: {len(extents[r])} - intents: {names_col[intents[r]]}')
        print(f'  Candidate attr: {names_col[j]} - ({j})')
        print(f'  new extent: {extents[r_new]}')
        
        if (len_new_extent >= min_support) and (len_new_extent <= max_support):
                       
            if len_new_extent - len(extents[r]) == 0:
                print(f'  Extent size did not change -> attribute {names_col[j]} is added to intent.')
                intents[r] = list(sorted(set(intents[r]).union(set([j]))))                
            else:
                print(f'  Extent size DID change. IsCannonical: {is_cannonical(context, extents, intents, r, j - 1)}')
                if is_cannonical(context, extents, intents, r, j - 1):
                    try:
                        intents[r_new] = []
                    except IndexError:
                        intents.append([])

                    intents[r_new] = list(sorted(set(intents[r]).union(set([j]))))
                    
                    print(f'  --> Enter recursion with Intent: {names_col[intents[r_new]]}...')
                    in_close(adjacency, context, extents, intents, r=r_new, y=j+1, min_support=min_support, max_support=max_support, 
                                     names_col=names_col)

    print(f'**END FUNCTION')
    print(f'**concept: ({[*zip(extents, intents)]})')
    
    return [*zip(extents, intents)]

In [12]:
adjacency = wikivitals.adjacency
biadjacency = wikivitals.biadjacency
names = wikivitals.names
words = wikivitals.names_col
labels = wikivitals.labels

# Initialization
extents, intents = init_inclose(biadjacency)
degs = get_degrees(biadjacency, transpose=True)
global r_new
r_new = 0

# Degree of attribute = # articles in which it appears
freq_attribute = get_degrees(biadjacency.astype(bool), transpose=True)
index = np.flatnonzero((freq_attribute <= 500000) & (freq_attribute >= 0))

# Filter data with index
biadjacency = biadjacency[:, index]
words = words[index]
freq_attribute = freq_attribute[index]

In [13]:
%%time

# Algorithm
with open('logs/inclose_wikivitals.txt', 'w') as f:
    with redirect_stdout(f):
        concepts = in_close(adjacency, biadjacency, extents, intents, r=0, y=0, 
                                min_support=0, max_support=np.inf,
                                names_col=words)
        print()

print(len(concepts))

UnboundLocalError: local variable 'new_intent' referenced before assignment