In [1]:
import os 
os.chdir('C:\\Users\\lucas\\Documents\\Luc_DOCS\\IMDEA\\GraphMaterials')
import networkx as nx
from pymatgen.core.structure import Structure
from GraphFunc.ZeoDataframeFunction import ZeoDataframe
from GraphFunc.ConexionFunction import Conexion
from pymatgen.io.cif import CifWriter
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2
from copy import deepcopy
from networkx.relabel import relabel_nodes
from pymatgen.core.structure import Structure
import math
import itertools
from itertools import combinations, combinations_with_replacement


In [None]:
mat_dic = {}

for cif_name in os.listdir('.\\CifFiles\\'):
    
    letter_name = cif_name.split('.')[0]
    
    path_nt2 = './Nt2Files/' + letter_name + '.nt2'
    stu = Structure.from_file('./CifFiles/' + letter_name + '.cif')

    # Here, we have the lattice vector of the structure we want to apply PageRank algorithm
    # We will use the point of applicattion of the lattice vector of the cell to locate them in space 
    # once we put multiple cells togehter


    # We take coodinates of the vectors A, B and C
    stu.lattice

    # Dimensions of the cell
    box = np.array([stu.lattice.abc[0], stu.lattice.abc[1], stu.lattice.abc[2]])

    # Angles of the cell's axis
    alpha = stu.lattice.alpha
    beta = stu.lattice.beta
    gamma = stu.lattice.gamma

    # Constant to change from degrees to radians
    cte = math.pi/180

    # Vectors proyections of the cell's vector over the ortogonal axis
    x_axis = stu.lattice.a
    y_axis = stu.lattice.b
    z_axis = stu.lattice.c

    # Cell's vectors
    a, b, c = round(x_axis*math.sin(beta*cte), 3) , 0 ,round(x_axis*math.cos(beta*cte), 3)
    d, e, f = round(y_axis*math.cos(gamma*cte), 3) ,round(y_axis*math.sin(gamma*cte), 3), 0
    l, m, n = 0, round(z_axis*math.cos(alpha*cte), 3), round(z_axis*math.sin(alpha*cte), 3)
    
    # Constant that limits the maximum number of cells that could expand over every direction 
    # (axis and diagonal direction)
    r = 1

    cubic_comb = []

    # Each of the cell's vectors represent the length of the cell in that direction.
    # Independently of which length it is if you multiply the length of one of the vector by two it is clear
    # that the result vector will give the information about the location of the second cell in the direction
    # of the vector we multiply by two.

    # We generate the location of the cells for a cube of length equals to len(range(-r, r + 1))
    # r = 1, len(range(-r, r + 1)) = 27
    # The information of location of the cells are saved in real coordinates using the combination of the 
    # cell's vectors and in an orthogonal base transformation using as vectors the numbers (i,j,k) we use to multiply 
    # the cell's vectors
    for i in range(-r, r+1):
        for j in range(-r, r+1):
            for k in range(-r, r+1):
                cubic_comb.append((np.array([a,b,c])*i + np.array([d,e,f])*j + np.array([l,m,n])*k,
                                   np.array([i,j,k])))



    vector_pos = []
    vector_ort = []
    # Unpack values
    # We also choose as a center the vector 0 
    lattice_vector = stu.lattice
#     print('This is the Lattice vector: {}'.format(lattice_vector.abc))

    for vec, ort in cubic_comb:
        vector_pos.append(list(vec))
        vector_ort.append(list(ort))
        
    # Later in this Notebook we are going to create a graph for every vector we generated in the previous step.
    # These graphs will be composed by nodes whose names will be an ordered number.
    # The names will be assigned by the order the graphs were generated.
    # That means every graph will have as the name of their nodes a range of numbers whose length will be equal 
    # to the name of the last node we create in the first graph.
    # In order to keep track of the order we create we are going to establish that order by numbered the orthogonal
    # vectors

    pos_order = {}
    i = 0
    for vector in vector_ort:
        name = str(vector[0]) + str(vector[1]) + str(vector[2]) 
        pos_order[name] = i
        i += 1


    six_faces_neigh = [np.array([1,0,0]), np.array([-1, 0,0]), np.array([0,1,0]), 
              np.array([0,-1,0]), np.array([0,0,1]), np.array([0,0,-1])]
    
    # Initialize a networkx's graph. 
    # We take the information about the nodes and edges from the .nt2 file and passes it out to the graph.

    # Every node in the graph will have three attributes.
    # -Cartesian coordinate
    # -Pagerank coefficient
    # -Radius of the maximus sphere that fit in that node

    # Evey edge will have one attribute:
    # -Radius of the maximus sphere that can travel through the edge

    # We are not going to save the information about edges that connect point from outside of the cell

    G = nx.Graph()

    pagerank_init_value = 100
    with open(path_nt2) as f:
        line = ' '

        while line:
            line = f.readline()

            if line == 'Vertex table:\n':

                while line:

                    line = f.readline()

                    if line == '\n':
                        break
                    else:
                        line_list = line.split()
                        key = int(line_list[0])
                        coord_x = float(line_list[1])
                        coord_y = float(line_list[2])
                        coord_z = float(line_list[3])
                        min_dist_atom = float(line_list[4])

                        G.add_node(key, coord=np.array([coord_x, coord_y, coord_z]), rad_max_sph=min_dist_atom,
                                  pagerank=pagerank_init_value)

        #                 vertex_table = vertex_table.append({'coord_x':coord_x, 'coord_y':coord_y, 'coord_z':coord_z,
        #                                                     'min_dist_atom':min_dist_atom, 'key':key},ignore_index = True)


            x_1 = len(list(range(0,(sorted(list(G.nodes), reverse=True)[0] + 1))))



            if line == 'Edge table:\n':


                while line:

                    line = f.readline()
                    if line == '':
                        break
                    line_list = line.split()
                    origin = int(line_list[0])
                    destination = int(line_list[2])
                    larger_radius = float(line_list[3])
                    x_sim = int(line_list[4])
                    y_sim = int(line_list[5])
                    z_sim = int(line_list[6])
                    if x_sim == 8:
                        G.add_node(destination + x_1, coord=G.nodes[destination]['coord'] + np.array([box[0], 0, 0]),
                                   rad_max_sph=G.nodes[destination]['rad_max_sph'], pagerank=pagerank_init_value)

                        G.add_edge(origin, destination + x_1, rad_max_sph=larger_radius)
                        print(destination + x_1)
                    else:
                        if sum([abs(x_sim), abs(y_sim), abs(z_sim)]) == 0:
                            G.add_edge(origin, destination, rad_max_sph=larger_radius)

                        else:
                            pass
                        
#     print( sorted(G.nodes)[-1])
    total_nodes_G = len(G.nodes)
    larger_dist_atom = []
    for i in G.nodes:
        larger_dist_atom.append(G.nodes[i]['rad_max_sph'])

    larger_node = max(larger_dist_atom)
    index_larger_node = np.argmax(larger_dist_atom)
    name_larger_node = list(G.nodes)[index_larger_node]
    
    # We didn't save the connection between nodes of different cells before  we are going to save it right now 
    # but in a dictionary
    # The information will be stored in six lists since the cell has six faces. In each list will contains the
    # connections between the nodes that go through that specific face.

    conx_cell = {}
    x_1 = []
    x_minus_1 = []
    y_1 = []
    y_minus_1 = []
    z_1 = []
    z_minus_1 = []
    with open(path_nt2) as f:
        line = ' '

        while line:
            line = f.readline()

            if line == 'Edge table:\n':
                while line:

                    line = f.readline()

                    if line == '':
                        break

                    line_list = line.split()
                    origin = int(line_list[0])
                    destination = int(line_list[2])
                    larger_radius = float(line_list[3])
                    x_sim = int(line_list[4])
                    y_sim = int(line_list[5])
                    z_sim = int(line_list[6])
                    if sum([abs(x_sim), abs(y_sim), abs(z_sim)]) == 0:
                        pass
                    else:
                        if x_sim == 1:
                            x_1.append((origin,destination,larger_radius))

#                             print(origin,destination)
                        elif x_sim == -1:
                             x_minus_1.append((origin,destination,larger_radius))
                        elif y_sim == 1:
                             y_1.append((origin,destination,larger_radius))
                        elif y_sim == -1:
                             y_minus_1.append((origin,destination,larger_radius))
                        elif z_sim == 1:
                             z_1.append((origin,destination,larger_radius))
                        elif z_sim == -1:
                             z_minus_1.append((origin,destination,larger_radius))
                        else:
                            print('outside of one cell radius')



    conx_cell['100'] = x_1
    conx_cell['-100'] = x_minus_1
    conx_cell['010'] = y_1
    conx_cell['0-10'] = y_minus_1
    conx_cell['001'] = z_1
    conx_cell['00-1'] = z_minus_1
    
    # Create a dictionary which contains one graph for every cell we want to use to calculate the 
    # pagerank coefficients. 
    # These new graphs are a copy of the one that we initialized in previous step so, we 
    # change the names and the cartesian coordinates for every node

    graph_mod= {}

    # As we mentioned earlier we are goint to name the nodes wit a number.
    # The name of the last node of the first graph plus one (because we start in 0) will give as the range in every 
    # graph.

    key_plus = sorted(list(G.nodes), reverse=True)[0] + 1

    # The names used as keys in the dictionary of graph are the ortogonal base transformations we calculated before
    # They names are very useful to know the position of the graph with respect to the origin (0,0,0).

    for i in range(len(vector_pos)):
        name = str(vector_ort[i][0])+str(vector_ort[i][1])+str(vector_ort[i][2])
        graph_mod[name] = deepcopy(G)

        mapping_name = {}
        multiplier = pos_order[name]
        for node in graph_mod[name].nodes:
            graph_mod[name].nodes[node]['coord'] += vector_pos[i] #*box
            mapping_name[node] = node + key_plus*multiplier
        relabel_nodes(graph_mod[name], mapping=mapping_name, copy=False)

    # As you can see here it is really to see where the graph is if we now their name
#     print('Keys of the Dictionary of Graph: {}'.format(graph_mod.keys()))


    # List with the names of the graph
    cell_list = list(graph_mod.keys())

    # Now we add to the graph the edges that connect with nodes outside of the cell. We do this for every graph 
    # in the dictionary

    for j in range(len(vector_pos)):
        for neighbor in six_faces_neigh:
            str_neighbor = str(neighbor[0]) + str(neighbor[1]) + str(neighbor[2]) 

            neig_loc = vector_ort[j] + neighbor
            str_neig_loc = str(neig_loc[0])+str(neig_loc[1])+str(neig_loc[2])

            if str_neig_loc in cell_list:
                name = str(vector_ort[j][0])+str(vector_ort[j][1])+str(vector_ort[j][2])
                multiplier_origin = pos_order[name]
                multiplier_destination = pos_order[str_neig_loc]

                for edge in conx_cell[str_neighbor]:
                    edge_1 = edge[0] + multiplier_origin*key_plus
                    edge_2 = edge[1] + multiplier_destination*key_plus
                    rad_max_sph = edge[2]
                    graph_mod[name].add_edge(edge_1, edge_2)
                    graph_mod[name].edges[edge_1, edge_2]['rad_max_sph'] = rad_max_sph
                    
                    
    x_list = []
    x_minus_list = []
    y_list = []
    y_minus_list = []
    z_list = []
    z_minus_list = []

    allcells = list(graph_mod.keys())

    for i in range(-r, r+1):
        for j in range(-r, r+1):
            for k in range(-r, r+1):
                if abs(i) == r:
                    if i == r:
                        x_list.append(str(i)+str(j)+str(k))
                    else:
                        x_minus_list.append(str(i)+str(j)+str(k))
                if abs(j) == r:
                    if j == r:
                        y_list.append(str(i)+str(j)+str(k))
                    else:
                        y_minus_list.append(str(i)+str(j)+str(k))
                if abs(k) == r:
                    if k == r:
                        z_list.append(str(i)+str(j)+str(k))
                    else:
                        z_minus_list.append(str(i)+str(j)+str(k))
                        
    for k in range(len(x_list)):
        name_origin_x = x_list[k]
        name_destination_x = x_minus_list[k]

        name_origin_y = y_list[k]
        name_destination_y = y_minus_list[k]

        name_origin_z = z_list[k]
        name_destination_z = z_minus_list[k]

        multiplier_origin_x = pos_order[name_origin_x]
        multiplier_destination_x = pos_order[name_destination_x]

        multiplier_origin_y = pos_order[name_origin_y]
        multiplier_destination_y = pos_order[name_destination_y]

        multiplier_origin_z = pos_order[name_origin_z]
        multiplier_destination_z = pos_order[name_destination_z]

        for edge in conx_cell['100']:
            edge_1 = edge[0] + multiplier_origin_x*key_plus
            edge_2 = edge[1] + multiplier_destination_x*key_plus
            rad_max_sph = edge[2]
            graph_mod[name_origin_x].add_edge(edge_1, edge_2)
            graph_mod[name_origin_x].edges[edge_1, edge_2]['rad_max_sph'] = rad_max_sph

    #         print(name_origin_x, name_destination_x)
    #         print(edge_1, edge_2)
    #         print(edge[0], edge[1])
    #         print('_____________')

        for edge in conx_cell['010']:
            edge_1 = edge[0] + multiplier_origin_y*key_plus
            edge_2 = edge[1] + multiplier_destination_y*key_plus
            rad_max_sph = edge[2]
            graph_mod[name_origin_y].add_edge(edge_1, edge_2)
            graph_mod[name_origin_y].edges[edge_1, edge_2]['rad_max_sph'] = rad_max_sph

        for edge in conx_cell['001']:
            edge_1 = edge[0] + multiplier_origin_z*key_plus
            edge_2 = edge[1] + multiplier_destination_z*key_plus
            rad_max_sph = edge[2]
            graph_mod[name_origin_z].add_edge(edge_1, edge_2)
            graph_mod[name_origin_z].edges[edge_1, edge_2]['rad_max_sph'] = rad_max_sph
            
            
    # Here, we create a final graph and we store in it the information of all the nodes and edges of the ones
    # we generate in the previous step
    final = nx.Graph()

    for key in graph_mod:
        final.add_nodes_from(graph_mod[key])
        final.add_edges_from(graph_mod[key].edges)
        for node in graph_mod[key].nodes:
            for attr in graph_mod[key].nodes[node]:
                final.nodes[node][attr] = graph_mod[key].nodes[node][attr]

        for edge in graph_mod[key].edges:
            for attr in graph_mod[key].edges[edge[0], edge[1]]:
                final.edges[edge][attr] = graph_mod[key].edges[edge][attr] 
                
                
    # We calculate the neighbors for every node in the graph
    neigh_dic = {}
    for n, nbrs in final.adj.items():
        neigh = []
        for nbr, eattr in nbrs.items():
            neigh.append(nbr)


        neigh_dic[str(n)] = neigh
            
            
    # For every node we calculate the sum of the square radius of their edges
    sum_radius = {}
    for node in final.nodes:

        sum_edges = 0
        for nei in neigh_dic[str(node)]:

            score = final[node][nei]['rad_max_sph']
            sum_edges += score**2
        sum_radius[str(node)] = sum_edges
        
    #  conexion is a dictionary that saves information of the weights that every edge has in 
    # relationship with the total number of edges in the node.
    # Every edge has a radius as attribute so we are using the proportion of the square of the radius divide by
    # the sum of the square radius in the node

    conexion = {}
    sub = {}
    for node in final.nodes:

        sub = {}
        for nei in neigh_dic[str(node)]:
            radius_node = final[node][nei]['rad_max_sph']
            all_radius = sum_radius[str(nei)] 
            sub[str(nei)] = radius_node**2/all_radius
        conexion[str(node)] = sub
        
        
    def unit_sum(pagerank_list):
        return pagerank_list/sum(pagerank_list)

    # Loop over the number of iterations we want to run pagerank
    def pagerank(graph, iteration, conexion, neigh_dic):
        node_list = sorted(list(graph.nodes))
        for i in range(iteration):
            pagerank_list = pagerank_one_iter(graph, conexion, neigh_dic, node_list)
        return pagerank_list

    # Loop over the nodes of the graph
    def pagerank_one_iter(graph, conexion, neigh_dic, node_list):
        for selfnode in node_list:
            update_pagerank(selfnode, conexion, neigh_dic, graph)
        pagerank_list = np.asarray([graph.nodes[node]['pagerank'] for node in node_list], dtype='float32')
        return pagerank_list

    # Update of the nodes attribute pagerank following the rule we describe in connections
    def update_pagerank(selfnode, conexion, neigh_dic, graph):
        in_neighbors = neigh_dic[str(selfnode)] #self.parents
        pagerank_sum = sum(graph.nodes[node]['pagerank']*conexion[str(selfnode)][str(node)]
                           for node in in_neighbors)
        graph.nodes[selfnode]['pagerank'] = pagerank_sum


    info = pagerank(final, 50, conexion, neigh_dic)
 
    most_connected_all = list(final.nodes)[np.argmax(info)]
    most_connected_all_value = info[np.argmax(info)]
    
#     print(list(G.nodes)[-1] + 1, names_nodes_center_cell[-1])
    names_nodes_center_cell = list(np.array(G.nodes) + key_plus*pos_order['000'])
    
    
    
    pgrk_nodes = []

    for node in names_nodes_center_cell:
        pgrk_nodes.append((node, final.nodes[node]['pagerank']))

    last = 0
    for node, pgrk in pgrk_nodes:

        if pgrk > last:
            max_pr = (node, pgrk)
            last = pgrk
        else:
            pass

    

#     index_center_cell = []
    
    
#     for i in range(len(final.nodes)):
#         if list(final.nodes)[i] in names_nodes_center_cell:
#             index_center_cell.append(i)
#         else:
#             pass
#     index_most_connected = np.argmax(info[index_center_cell])
    
    most_connected_node_cell = max_pr[0]-pos_order['000']*key_plus
    most_connected_node_cell_value = max_pr[1]
    print(most_connected_node_cell, max_pr[0])
    mat_dic[letter_name] = {'LargerNode':larger_node, 'IndexLN':name_larger_node, 
                     'MostConnectedNodeCell':most_connected_node_cell, 
                            'RadiusMCNC':final.nodes[most_connected_node_cell]['rad_max_sph'],
                            'MostConnectedNodeCellValue':most_connected_node_cell_value,
                     'MostConnectedAll':most_connected_all, 'RadiusMCNA':final.nodes[most_connected_all]['rad_max_sph'],
                     'MostConnectedAllValue':most_connected_all_value}

92 8973
31 1227
200 1287
103 2703
752 2799
20 9796
641 10527
534 8867
1080 8973
99 14139
1188 15119
880 16324
559 16631
279 7546
632 7825
40 8256
704 8847
655 9807
582 9855
201 7767
896 8147
183 11831
943 12543
812 13071
371 13201
52 4875
608 5193
130 8034
273 8511
60 3609
456 3821
437 6365
928 6383
775 12839
552 12991
58 7234
533 7727
282 7211
192 7461
76 2572
358 2687
13 4667
138 5011
27 1821
935 1931
264 12419
288 13089
20 3764
174 4031
159 2421
416 2435
40 5448
516 5823
82 6790
824 7223
231 10943
374 11535
1 4863
110 5235
21 1451
1347 1539
558 18069
478 18857
298 6512
220 6691
23 2883
464 3079
226 6258
1672 6495
654 22390
1272 23407
998 17534
364 17807
26 4758
304 5095
154 4106
1904 4255
1057 25809
98 26655
8 1282
396 1371
12 5160
644 5543
139 8511
688 9015
138 9082
600 9631
147 7947
642 8399
456 8802
648 8987
22 8446
2401 9071
6 31219
523 33613
263 7062
416 7321
211 5619
1960 5823
644 26124
1704 27439
1087 23239
72 23855
14 950
438 1007
13 5707
1132 6131
1051 15767
374 15847
239 5

In [None]:

mat_df = pd.DataFrame(mat_dic).transpose()

In [None]:
equal = mat_df.loc[mat_df['RadiusMCNC'] == mat_df['RadiusMCNA']]
print(len(equal))


In [48]:
mat_dic['DAC.cif']

{'LargerNode': 2.43823,
 'IndexLN': 29,
 'MostConnectedCell': 5695,
 'MostConnectedCellValue': 0.00012427567,
 'MostConnectedAll': 96,
 'MostConnectedAllValue': 0.00027131627}