In [1]:
# Load generic python packages required

import pandas as pd
import pickle
import matplotlib.pylab as plt
import tensorflow as tf
import numpy as np
import networkx as nx
import seaborn as sns
import sys, os

# Load various RSMI-NE functions. In principle one can simply do "import rsmine" but then one needs to call 
# functions with syntax like rsmine.cg_optimisiers.xxxxxxxxx() etc.

sys.path.append("/home/cluster/mkochj/GitHub/RSMI-NE/rsmine/coarsegrainer")
import build_dataset as ds
import cg_optimisers as cg_opt
import plotter
from cg_utils import *
from analysis_utils import *

  from tqdm.autonotebook import tqdm


In [2]:
############# Load the definition of the graph and construct the reference graph

examples_dir = '/home/cluster/mkochj/GitHub/RSMI-NE/examples'

edges=np.loadtxt(examples_dir+'/quasiperiodic_data/edgedata.dat')
nodes=np.loadtxt(examples_dir+'/quasiperiodic_data/nodedata.dat',usecols=[0])
nodepos=np.loadtxt(examples_dir+'/quasiperiodic_data/nodedata.dat',usecols=[1,2])
order_8 = np.loadtxt(examples_dir+'/quasiperiodic_data/order_of_8vtx.dat')

G = construct_reference_graph(edges,nodes)

In [3]:
# Set the various parameters of the system required by the code
# EV_params and data_params are generic data about samples of E,V


case_no= 18 # env, buffer, ll only come into play if case is set to "0"
V_index = 389 # 389 is the centeral 8-vertex order 4 # 196 and 253 are 8-vertices of order 2 # 211 is 8-vertex of order 0
V_order = order_8[V_index][1]

sample_no = 12121

env_size = 8 # Not currently used by us
buffer_size = 2 # Not currently used by us
ll= 3 # Not currently used by us
disc=1


#We store these in a dictionary for experiment logging purposes only
EV_params = {
    'env_size' : env_size,
    'buffer_size': buffer_size,
    'block_size': ll, #assuming square block shape
    'sample_no': sample_no,
    'V_index': V_index,
    'V_order': V_order,
    'case_no':case_no
}

# These are needed by the generator
data_params = {
    'model': 'dimer_graph',
    'lattice_type': 'networkx',
    'L': 2048, # unused currently
    'N_samples': 1, # 1 is default. Set to actual value after generating or loading data.
    'dimension': 2,
}

In [4]:
############ Load the raw MC configurations (can take *a long while* for large files) #################

Xs = iter_loadtxt('/home/cluster/sobisw/data/large_size/econfig_startseed%i_nsmaples50000.dat'%(EV_params['sample_no']),dtype=np.int8)

data_params['N_samples'] = len(Xs) # Set the correct number of samples loaded.
print("Data loaded")

Data loaded


In [5]:
# Instantiate the dataset object, using the loaded samples and the definition of the graph
# This will be used to generate the E,V splits.

generator=ds.dataset(**data_params, configurations = Xs, G=G, verbose=True)
#generator=dataset(**data_params, configurations = Xs, G=G, verbose=True)
print(data_params)
print(EV_params)

{'model': 'dimer_graph', 'lattice_type': 'networkx', 'L': 2048, 'N_samples': 50000, 'dimension': 2}
{'env_size': 8, 'buffer_size': 2, 'block_size': 3, 'sample_no': 12121, 'V_index': 389, 'V_order': 4.0, 'case_no': 18}


In [6]:
# Print the shape of Xs (num_samples,num_variables_in_graph):
Xs.shape

(50000, 51800)

In [7]:
# Double check what sample parameters are currently selected:
EV_params['sample_no'],EV_params['V_index'],EV_params['V_order']

(12121, 389, 4.0)

In [8]:
# Load definitions of membranes, regions

regions_data_dir = examples_dir + '/quasiperiodic_data/regions/'

mem_edges0_ids = eids_from_edges(edges,np.loadtxt(regions_data_dir + 'reg_mem_edgelist_svtx%i_sord%i_mem0.dat'%(EV_params['V_index'],EV_params['V_order']),dtype=int))
mem_edges1_ids = eids_from_edges(edges,np.loadtxt(regions_data_dir + 'reg_mem_edgelist_svtx%i_sord%i_mem1.dat'%(EV_params['V_index'],EV_params['V_order']),dtype=int))
mem_edges2_ids = eids_from_edges(edges,np.loadtxt(regions_data_dir + 'reg_mem_edgelist_svtx%i_sord%i_mem2.dat'%(EV_params['V_index'],EV_params['V_order']),dtype=int))
reg_edges0_ids = eids_from_edges(edges,np.loadtxt(regions_data_dir + 'reg_edgelist_svtx%i_sord%i_mem0.dat'%(EV_params['V_index'],EV_params['V_order']),dtype=int))
reg_edges1_ids = eids_from_edges(edges,np.loadtxt(regions_data_dir + 'reg_edgelist_svtx%i_sord%i_mem1.dat'%(EV_params['V_index'],EV_params['V_order']),dtype=int))
reg_edges2_ids = eids_from_edges(edges,np.loadtxt(regions_data_dir + '/reg_edgelist_svtx%i_sord%i_mem2.dat'%(EV_params['V_index'],EV_params['V_order']),dtype=int))

ball5,_ = construct_VE_edgelists(G, V_index, L_B=1, ll=5, cap=8)
ball8,_ = construct_VE_edgelists(G, V_index, L_B=1, ll=8, cap=11)
ball12,_ = construct_VE_edgelists(G, V_index, L_B=1, ll=12, cap=15)
ball16,_ = construct_VE_edgelists(G, V_index, L_B=1, ll=16, cap=19)
ball32,_ = construct_VE_edgelists(G, V_index, L_B=1, ll=32, cap=35)
ball48,_ = construct_VE_edgelists(G, V_index, L_B=1, ll=48, cap=50)
ball24,_ = construct_VE_edgelists(G, V_index, L_B=1, ll=24, cap=26)

# Construct lists of edges defnining the selected E,V if ***NOT*** using the cases, but rather topological balls.
# If using the cases, it's much safer to use the "multiple case" path, even if the case list of lenght 1.

#GV_edges,GE_edges = construct_VE_edgelists(G, V_index, L_B=buffer_size, ll=ll, cap=ll+buffer_size+env_size)

In [9]:
# Define the E,V areas via "cases" instead (more appropriate for non-spherical regions, like empires etc.)

#### Be super careful!!! IF the variables  reg_.... is initialized from some previous vertex,
#### there won't be an error, ven of lower order vertex doesn't have this membrane. 


cGV_edges = {}
cGE_edges = {}

    # Case #1
#auxGV_edges = sorted(set(reg_edges0_ids))
#auxGE_edges = sorted(set(reg_edges1_ids) -set(reg_edges0_ids) - set(mem_edges0_ids))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[1] = auxGV_edges
#cGE_edges[1] = auxGE_edges

    # Case #2
#auxGV_edges = sorted(set(reg_edges0_ids))
#auxGE_edges = sorted(set(reg_edges1_ids) | set(mem_edges1_ids) -set(reg_edges0_ids) - set(mem_edges0_ids))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[2] = auxGV_edges
#cGE_edges[2] = auxGE_edges

    # Case #3
auxGV_edges = sorted(set(reg_edges0_ids))
auxGE_edges = sorted(set(reg_edges2_ids) -set(reg_edges0_ids) - set(mem_edges0_ids))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
cGV_edges[3] = auxGV_edges
cGE_edges[3] = auxGE_edges

    # Case #4
#auxGV_edges = sorted(set(reg_edges0_ids))
#auxGE_edges = sorted(set(reg_edges2_ids) -set(reg_edges1_ids))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[4] = auxGV_edges
#cGE_edges[4] = auxGE_edges

    # Case #5
#auxGV_edges = sorted(set(reg_edges0_ids) | set(mem_edges0_ids))
#auxGE_edges = sorted(set(reg_edges2_ids) -set(reg_edges1_ids))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[5] = auxGV_edges
#cGE_edges[5] = auxGE_edges

    # Case #6
#auxGV_edges = sorted(set(reg_edges0_ids) | set(mem_edges0_ids))
#auxGE_edges = sorted(set(reg_edges2_ids) -set(reg_edges1_ids) - set(mem_edges1_ids))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[6] = auxGV_edges
#cGE_edges[6] = auxGE_edges

    # Case #7
#auxGV_edges = sorted(set(reg_edges0_ids) | set(mem_edges0_ids))
#auxGE_edges = sorted(set(ball32) -set(reg_edges2_ids))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[7] = auxGV_edges
#cGE_edges[7] = auxGE_edges

    # Case #8
#auxGV_edges = sorted(set(reg_edges0_ids) | set(mem_edges0_ids))
#auxGE_edges = sorted(set(ball32) -set(reg_edges2_ids) - set(mem_edges2_ids))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[8] = auxGV_edges
#cGE_edges[8] = auxGE_edges

    # Case #9
#auxGV_edges = sorted(set(reg_edges1_ids))
#auxGE_edges = sorted(set(reg_edges2_ids) -set(reg_edges1_ids) - set(mem_edges1_ids))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[9] = auxGV_edges
#cGE_edges[9] = auxGE_edges

    # Case #10
#auxGV_edges = sorted(set(reg_edges1_ids))
#auxGE_edges = sorted(set(reg_edges2_ids) | set(mem_edges2_ids) -set(reg_edges1_ids) - set(mem_edges1_ids))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[10] = auxGV_edges
#cGE_edges[10] = auxGE_edges

    # Case #11
#auxGV_edges = sorted(set(reg_edges1_ids))
#auxGE_edges = sorted(set(ball32) -set(reg_edges1_ids) - set(mem_edges1_ids))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[11] = auxGV_edges
#cGE_edges[11] = auxGE_edges

    # Case #12
#auxGV_edges = sorted(set(reg_edges1_ids))
#auxGE_edges = sorted(set(ball32) -set(reg_edges2_ids) - set(mem_edges2_ids))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[12] = auxGV_edges
#cGE_edges[12] = auxGE_edges

    # Case #13
#auxGV_edges = sorted(set(reg_edges1_ids))
#auxGE_edges = sorted(set(ball32) -set(reg_edges2_ids))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[13] = auxGV_edges
#cGE_edges[13] = auxGE_edges

    # Case #14
#auxGV_edges = sorted(set(reg_edges1_ids) | set(mem_edges1_ids))
#auxGE_edges = sorted(set(ball32) -set(reg_edges2_ids))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[14] = auxGV_edges
#cGE_edges[14] = auxGE_edges

    # Case #15
#auxGV_edges = sorted(set(reg_edges1_ids) | set(mem_edges1_ids))
#auxGE_edges = sorted(set(ball32) -set(reg_edges2_ids) -set(mem_edges2_ids))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[15] = auxGV_edges
#cGE_edges[15] = auxGE_edges

    # Case #16
#auxGV_edges = sorted(set(reg_edges2_ids))
#auxGE_edges = sorted(set(ball32) -set(reg_edges2_ids) -set(mem_edges2_ids))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[16] = auxGV_edges
#cGE_edges[16] = auxGE_edges

    # Case #17
#auxGV_edges = sorted(set(reg_edges2_ids))
#auxGE_edges = sorted(set(ball48) -set(reg_edges2_ids) -set(mem_edges2_ids))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[17] = auxGV_edges
#cGE_edges[17] = auxGE_edges

    # Case #18 was is ball 20 or ball 24????
#auxGV_edges = sorted(set(reg_edges2_ids))
#auxGE_edges = sorted(set(ball48) -set(ball24))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[18] = auxGV_edges
#cGE_edges[18] = auxGE_edges

    # Case #19
#auxGV_edges = sorted(set(reg_edges0_ids))
#auxGE_edges = sorted(set(ball16) -set(reg_edges1_ids))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[19] = auxGV_edges
#cGE_edges[19] = auxGE_edges

    # Case #20
#auxGV_edges = sorted(set(reg_edges0_ids))
#auxGE_edges = sorted(set(ball16) -set(reg_edges1_ids) -set(mem_edges1_ids))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[20] = auxGV_edges
#cGE_edges[20] = auxGE_edges

    # Case #21
#auxGV_edges = sorted(set(reg_edges0_ids) | set(mem_edges0_ids))
#auxGE_edges = sorted(set(ball16) -set(reg_edges1_ids))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[21] = auxGV_edges
#cGE_edges[21] = auxGE_edges

    # Case #22
#auxGV_edges = sorted(set(reg_edges0_ids) | set(mem_edges0_ids))
#auxGE_edges = sorted(set(ball16) -set(reg_edges1_ids) -set(mem_edges1_ids))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[22] = auxGV_edges
#cGE_edges[22] = auxGE_edges

    # Case #23
#auxGV_edges = sorted(set(reg_edges1_ids))
#auxGE_edges = sorted(set(ball16) -set(reg_edges1_ids) -set(mem_edges1_ids))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[23] = auxGV_edges
#cGE_edges[23] = auxGE_edges

    # Case #24
#auxGV_edges = sorted(set(reg_edges1_ids))
#auxGE_edges = sorted(set(ball24) -set(ball16))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[24] = auxGV_edges
#cGE_edges[24] = auxGE_edges

    # Case #25
#auxGV_edges = sorted(set(reg_edges1_ids) | set(mem_edges1_ids))
#auxGE_edges = sorted(set(ball24) -set(ball16))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[25] = auxGV_edges
#cGE_edges[25] = auxGE_edges

    # Case #26
#auxGV_edges = sorted(set(reg_edges1_ids) | set(mem_edges1_ids))
#auxGE_edges = sorted(set(ball16) -set(ball12))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[26] = auxGV_edges
#cGE_edges[26] = auxGE_edges

    # Case #27
#auxGV_edges = sorted(set(reg_edges2_ids))
#auxGE_edges = sorted(set(ball32) -set(ball24))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[27] = auxGV_edges
#cGE_edges[27] = auxGE_edges


    # Case #28
#auxGV_edges = sorted(set(reg_edges0_ids))
#auxGE_edges = sorted(set(ball8) - set(reg_edges0_ids) - set(mem_edges0_ids))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[28] = auxGV_edges
#cGE_edges[28] = auxGE_edges


    # Case #29
#auxGV_edges = sorted(set(reg_edges0_ids))
#auxGE_edges = sorted(set(ball12) - set(reg_edges0_ids) - set(mem_edges0_ids))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[29] = auxGV_edges
#cGE_edges[29] = auxGE_edges


     #Case #30
#auxGV_edges = sorted(set(reg_edges0_ids))
#auxGE_edges = sorted(set(ball12) - set(ball5))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[30] = auxGV_edges
#cGE_edges[30] = auxGE_edges

    # Case #31
#auxGV_edges = sorted(set(reg_edges0_ids))
#auxGE_edges = sorted(set(ball16) - set(ball5))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[31] = auxGV_edges
#cGE_edges[31] = auxGE_edges

    # Case #32
#auxGV_edges = sorted(set(reg_edges0_ids) | set(mem_edges0_ids))
#auxGE_edges = sorted(set(ball16) - set(ball5))#  #sorted(set(reg_edges2_ids)-(set(reg_edges0_ids) | set(mem_edges0_ids)))
#cGV_edges[32] = auxGV_edges
#cGE_edges[32] = auxGE_edges

In [10]:
# Double check which cases you selected:
list(cGV_edges.keys())

[3]

In [15]:
#####################################################
############ Generate E,V datasets ##################
#####################################################

In [36]:
############ Generate E,V datasets ***FOR A SINGLE CASE/custom definition of E,V*** 

Vs, Es = generator.rsmi_data_graph(GV_edges,GE_edges)

print(round((sys.getsizeof(np.array(Es)) + sys.getsizeof(np.array(Vs)))/ 1024 / 1024, 2), " MB")
#np.save(r"/home/cluster/mkochj/scratch/sobisw/data/EandV/Es_%i_vi%i_e%ib%ill%i" % (EV_params['sample_no'],EV_params['V_index'],EV_params['env_size'],EV_params['buffer_size'],EV_params['block_size']),Es)
#np.save(r"/home/cluster/mkochj/scratch/sobisw/data/EandV/Vs_%i_vi%i_e%ib%ill%i" % (EV_params['sample_no'],EV_params['V_index'],EV_params['env_size'],EV_params['buffer_size'],EV_params['block_size']),Vs)
np.save(r"/home/cluster/sobisw/rsmine_tutorial/Es_%i_vi%i_c%i" % (EV_params['sample_no'],EV_params['V_index'],case_no),Es)
np.save(r"/home/cluster/sobisw/rsmine_tutorial/Vs_%i_vi%i_c%i" % (EV_params['sample_no'],EV_params['V_index'],case_no),Vs)



Preparing the RSMI dataset...
RSMI dataset prepared.
137.33  MB


In [11]:
############ Generate E,V datasets ***FOR MULTIPLE CASES*** 

case_list = list(cGV_edges.keys())
for case_no in case_list:
    auxVs, auxEs = generator.rsmi_data_graph(cGV_edges[case_no],cGE_edges[case_no])
    print("Case_no: ",case_no,", Size: ",round((sys.getsizeof(np.array(auxEs)) + sys.getsizeof(np.array(auxVs)))/ 1024 / 1024, 2), " MB")
    np.save(r"/home/cluster/sobisw/rsmine_tutorial/Es_%i_vi%i_c%i" % (EV_params['sample_no'],EV_params['V_index'],case_no),auxEs)
    np.save(r"/home/cluster/sobisw/rsmine_tutorial/Vs_%i_vi%i_c%i" % (EV_params['sample_no'],EV_params['V_index'],case_no),auxVs)

Preparing the RSMI dataset...
RSMI dataset prepared.
Case_no:  3 , Size:  163.27  MB
