In [1]:
#Import Packages
import ase
import os
from ase.io import read
import numpy as np
import csv
from ase.io.jsonio import read_json
import json
from scipy.stats import rankdata
from ase.visualize import view
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn.functional as F
from torch_geometric.data import DataLoader, Dataset, Data, InMemoryDataset
from torch_geometric.utils import dense_to_sparse, degree, add_self_loops
import torch_geometric.transforms as T
from torch_geometric.utils import degree

import glob, os

import networkx as nx

import trimesh
import pickle

In [2]:
def threshold_sort(matrix, threshold, neighbors, reverse=False, adj=False):
    '''
    reverse = True --> reverses ranking
    adj = True --> does sorting
    ''' 

    mask = matrix > threshold
    distance_matrix_trimmed = np.ma.array(matrix, mask=mask) # Removes Values above set threshold

    # Generates a rank matrix ---> assigns values a integer corresponding to size eg. [5,1,3] --> [3,1,2]
    if reverse == False:
        distance_matrix_trimmed = rankdata(
            distance_matrix_trimmed, method="ordinal", axis=1
        )
    elif reverse == True:
        distance_matrix_trimmed = rankdata(
            distance_matrix_trimmed * -1, method="ordinal", axis=1
        )
    distance_matrix_trimmed = np.nan_to_num(
        np.where(mask, np.nan, distance_matrix_trimmed)
    )
    
    distance_matrix_trimmed[distance_matrix_trimmed > neighbors + 1] = 0

    if adj == False:
        # Returns OG matrix with values above threshold set to 0

        distance_matrix_trimmed = np.where(
            distance_matrix_trimmed == 0, distance_matrix_trimmed, matrix
        )
        return distance_matrix_trimmed
    elif adj == True:
        adj_list = np.zeros((matrix.shape[0], neighbors + 1))
        adj_attr = np.zeros((matrix.shape[0], neighbors + 1))
        for i in range(0, matrix.shape[0]):
            temp = np.where(distance_matrix_trimmed[i] != 0)[0]
            adj_list[i, :] = np.pad(
                temp,
                pad_width=(0, neighbors + 1 - len(temp)),
                mode="constant",
                constant_values=0,
            )
            adj_attr[i, :] = matrix[i, adj_list[i, :].astype(int)]
        distance_matrix_trimmed = np.where(
            distance_matrix_trimmed == 0, distance_matrix_trimmed, matrix
        )
        return distance_matrix_trimmed, adj_list, adj_attr


In [3]:
# file = "C:\\Users\\GillA\\Desktop\\University\\PhD\\Projects\\Generation\\Zeolites\\pcod2_new\\9000001.cif"
# structure = ase.io.read(file) ### Relaxed Structure [No need for ]
# del structure[[atom.index for atom in structure if atom.symbol=='O']] # Removes Oxygens
# view(structure)

# distance_matrix = structure.get_all_distances(mic=True) 
# # print(distance_matrix)

# # Thresholds distance matrix, all pairwise distances above threshold are set to 0, includes a max of 4 neighbours
# distance_matrix_trimmed = threshold_sort(distance_matrix,4,4,adj=False) # matrix, threshold, neighbors, reverse=False, adj=False
# distance_matrix_trimmed = torch.Tensor(distance_matrix_trimmed)

# graph_OG = ([nx.from_numpy_array(distance_matrix_trimmed.numpy())])

# # NOTE: NOT SURE WHY THEY HAVE DONE THE BELOW LINE
# distance_matrix_trimmed[distance_matrix_trimmed != 0] = 1 # If matrix value is not zero set to 1

# # graph_tmp = nx.convert_matrix.from_numpy_matrix(distance_matrix_trimmed.numpy())
# graph_ones = ([nx.from_numpy_array(distance_matrix_trimmed.numpy())])

# # nx.write_graphml(graph_OG[0],"ZeoOG.graphml")
# # nx.write_graphml(graph_ones[0],"ZeoOnes.graphml")

In [4]:
# unit_cell = []
# zeo_graph = []

# for j in range(3):
#     for k in range(3):
#         for l in range(3):
#             distance_matrix = structure.repeat((j + 1, k + 1, l + 1)).get_all_distances(mic=True)
#             distance_matrix_trimmed = threshold_sort(distance_matrix,8,12,adj=False)
#             distance_matrix_trimmed = torch.Tensor(distance_matrix_trimmed)
#             distance_matrix_trimmed[distance_matrix_trimmed != 0] = 1
#             # graph_tmp = nx.convert_matrix.from_numpy_matrix(distance_matrix_trimmed.numpy())
#             zeo_graph.extend([nx.from_numpy_array(distance_matrix_trimmed.numpy())] * 10)
#             unit_cell.extend([i] * 10)


In [5]:
# Loads Zeolite Structure

max_nodes = 9999

# os.chdir("./MOF_data")
zeo_uc = []
i = 1
for file in glob.glob("data/prelim/cssr/cif/*.cif"): # List of Cifs of files --> glob.glob looks for .json or *<>    
    structure = ase.io.read(file) ### Relaxed Structure [No need for ]
    del structure[[atom.index for atom in structure if atom.symbol=='O']] # Removes Oxygens
    distance_matrix = structure.get_all_distances(mic=True) # Generates a pairwise distance matrix for all nodes
    num_of_nodes = distance_matrix.shape[0] # Returns number of nodes (~number of silicon atoms)
    if num_of_nodes <= max_nodes: # Only Files with less than max_nodes nodes
        zeo_uc.append(structure) # Append Unit Cells to array
    
    if i % 1000 == 0:
        print("Processed", i, "files")
        print("================================================")
    
    i = i + 1



Processed 1000 files
Processed 2000 files
Processed 3000 files
Processed 4000 files
Processed 5000 files
Processed 6000 files
Processed 7000 files
Processed 8000 files
Processed 9000 files
Processed 10000 files
Processed 11000 files
Processed 12000 files
Processed 13000 files
Processed 14000 files
Processed 15000 files
Processed 16000 files
Processed 17000 files
Processed 18000 files
Processed 19000 files
Processed 20000 files
Processed 21000 files
Processed 22000 files
Processed 23000 files
Processed 24000 files
Processed 25000 files
Processed 26000 files
Processed 27000 files
Processed 28000 files
Processed 29000 files
Processed 30000 files
Processed 31000 files
Processed 32000 files
Processed 33000 files
Processed 34000 files
Processed 35000 files
Processed 36000 files
Processed 37000 files
Processed 38000 files
Processed 39000 files
Processed 40000 files
Processed 41000 files
Processed 42000 files
Processed 43000 files
Processed 44000 files
Processed 45000 files
Processed 46000 fil



In [6]:
unit_cell = []
zeo_graph = []
for i in range(len(zeo_uc)): # Iterate over unit cells
    print("Processing ",i,"-th graph", sep="")
    s1 = zeo_uc[i] # Select a Unit Cell
    distance_matrix = s1.get_all_distances(mic=True) # Compute Pairwise Distances
    # Thresholds distance matrix, all pairwise distances above threshold are set to 0, includes a max of 4 neighbours
    distance_matrix_trimmed = threshold_sort(distance_matrix,4,4,adj=False) # matrix, threshold, neighbors, reverse=False, adj=False
    distance_matrix_trimmed = torch.Tensor(distance_matrix_trimmed)

    # NOTE: Here a 0 represents no interaction (interaction weight = 0); 1 will represent an interaction weight of 1
    # Does it make sense to set interaction weights to 0 or 1???
    # If this line is commented out, graph interaction weights will be distance, which may also not be the best representatino
    # Perhaps we can use the Lennard-Jones Potential?
    distance_matrix_trimmed[distance_matrix_trimmed != 0] = 1 # If matrix value is not zero set to 1
    # print(distance_matrix_trimmed)

    # graph_tmp = nx.convert_matrix.from_numpy_matrix(distance_matrix_trimmed.numpy())
    zeo_graph.extend([nx.from_numpy_array(distance_matrix_trimmed.numpy())])
    unit_cell.extend([i])

Processing 0-th graph
Processing 1-th graph
Processing 2-th graph
Processing 3-th graph
Processing 4-th graph
Processing 5-th graph
Processing 6-th graph
Processing 7-th graph
Processing 8-th graph
Processing 9-th graph
Processing 10-th graph
Processing 11-th graph
Processing 12-th graph
Processing 13-th graph
Processing 14-th graph
Processing 15-th graph
Processing 16-th graph
Processing 17-th graph
Processing 18-th graph
Processing 19-th graph
Processing 20-th graph
Processing 21-th graph
Processing 22-th graph
Processing 23-th graph
Processing 24-th graph
Processing 25-th graph
Processing 26-th graph
Processing 27-th graph
Processing 28-th graph
Processing 29-th graph
Processing 30-th graph
Processing 31-th graph
Processing 32-th graph
Processing 33-th graph
Processing 34-th graph
Processing 35-th graph
Processing 36-th graph
Processing 37-th graph
Processing 38-th graph
Processing 39-th graph
Processing 40-th graph
Processing 41-th graph
Processing 42-th graph
Processing 43-th grap

In [7]:
if not os.path.isdir("Data"):
    os.mkdir("Data")

with open('Data/ZeoGraphs.p', 'wb') as f:
    pickle.dump(zeo_graph, f) 

with open('Data/ZeoUnitCells.p', 'wb') as f:
    pickle.dump(unit_cell, f) 


In [14]:
max_uc_nodes = 0
for i in zeo_graph:
    if max_uc_nodes < (i.number_of_nodes()):
        max_uc_nodes = (i.number_of_nodes())
        print(max_uc_nodes)

8
14
16


In [15]:
unit_cell = []
zeo_graph = []
for i in range(len(zeo_uc)):
    print("Processing ",i,"-th graph", sep="")
    s1 = zeo_uc[i]
    for j in range(3):
        for k in range(3):
            for l in range(3):
                distance_matrix = s1.repeat((j + 1, k + 1, l + 1)).get_all_distances(mic=True)
                distance_matrix_trimmed = threshold_sort(distance_matrix,4,4,adj=False)
                distance_matrix_trimmed = torch.Tensor(distance_matrix_trimmed)
                distance_matrix_trimmed[distance_matrix_trimmed != 0] = 1
                # graph_tmp = nx.convert_matrix.from_numpy_matrix(distance_matrix_trimmed.numpy())
                # zeo_graph.extend([nx.from_numpy_array(distance_matrix_trimmed.numpy())] * 10)
                # unit_cell.extend([i] * 10)
                zeo_graph.extend([nx.from_numpy_array(distance_matrix_trimmed.numpy())])
                unit_cell.extend([i])

Processing 0-th graph
Processing 1-th graph
Processing 2-th graph
Processing 3-th graph
Processing 4-th graph
Processing 5-th graph
Processing 6-th graph
Processing 7-th graph
Processing 8-th graph
Processing 9-th graph
Processing 10-th graph
Processing 11-th graph
Processing 12-th graph
Processing 13-th graph
Processing 14-th graph
Processing 15-th graph
Processing 16-th graph
Processing 17-th graph
Processing 18-th graph
Processing 19-th graph
Processing 20-th graph
Processing 21-th graph
Processing 22-th graph
Processing 23-th graph
Processing 24-th graph
Processing 25-th graph
Processing 26-th graph
Processing 27-th graph
Processing 28-th graph
Processing 29-th graph
Processing 30-th graph
Processing 31-th graph
Processing 32-th graph
Processing 33-th graph
Processing 34-th graph
Processing 35-th graph
Processing 36-th graph
Processing 37-th graph
Processing 38-th graph
Processing 39-th graph
Processing 40-th graph
Processing 41-th graph
Processing 42-th graph
Processing 43-th grap

AttributeError: module 'os' has no attribute 'isdir'