In [1]:
import yaml
import json
import argparse

import pandas as pd
import numpy as np
import tensorflow as tf

from pathlib import Path
from pymatgen.core import Structure
from sklearn.model_selection import train_test_split

from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
from megnet.layers import MEGNetLayer, Set2Set

In [2]:
tf.__version__

'2.7.0'

In [3]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  # Restrict TensorFlow to only allocate 1GB of memory on the first GPU
  try:
    tf.config.set_logical_device_configuration(
        gpus[0],
        [tf.config.LogicalDeviceConfiguration(memory_limit=2048)])
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Virtual devices must be set before GPUs have been initialized
    print(e)

1 Physical GPUs, 1 Logical GPUs


In [4]:
def read_pymatgen_dict(file):
    with open(file, "r") as f:
        d = json.load(f)
    return Structure.from_dict(d)

In [5]:
def energy_within_threshold(prediction, target):
    # compute absolute error on energy per system.
    # then count the no. of systems where max energy error is < 0.02.
    e_thresh = 0.02
    error_energy = tf.math.abs(target - prediction)

    success = tf.math.count_nonzero(error_energy < e_thresh)
    total = tf.size(target)
    return success / tf.cast(total, tf.int64)

In [6]:
def prepare_dataset(dataset_path):
    dataset_path = Path(dataset_path)
    targets = pd.read_csv(dataset_path / "targets.csv", index_col=0)
    struct = {
        item.name.strip(".json"): read_pymatgen_dict(item)
        for item in (dataset_path / "structures").iterdir()
    }

    data = pd.DataFrame(columns=["structures"], index=struct.keys())
    data = data.assign(structures=struct.values(), targets=targets)

    return train_test_split(data, test_size=0.25, random_state=666)

In [7]:
data_path = '../data/dichalcogenides_public/'

In [12]:
args = argparse.Namespace()

args.v_shape = [32, 16]
args.e_shape = [32, 16]
args.u_shape = [32, 16]
args.pool_method = 'mean'   # 'sum'
args.num_of_megnets = 1
args.n_atom_feature= 20
args.n_bond_feature = 10
args.n_global_feature = 2
args.hidden_dense = False
args.hidden_dense = 120
args.loss = 'mse'
args.opt = 'adam'
args.batch_size = 32
args.epochs = 5

In [14]:
def prepare_model(args):
    # Define model inputs
    int32 = 'int32'
    x1 = Input(shape=(None, args.n_atom_feature)) # atom feature placeholder
    x2 = Input(shape=(None, args.n_bond_feature)) # bond feature placeholder
    x3 = Input(shape=(None, args.n_global_feature)) # global feature placeholder
    x4 = Input(shape=(None,), dtype=int32) # bond index1 placeholder
    x5 = Input(shape=(None,), dtype=int32) # bond index2 placeholder
    x6 = Input(shape=(None,), dtype=int32) # atom_ind placeholder
    x7 = Input(shape=(None,), dtype=int32) # bond_ind placeholder
    xs = [x1, x2, x3, x4, x5, x6, x7]

    # Pass the inputs to the MEGNetLayer layer
    # Here the list are the hidden units + the output unit,
    # you can have others like [n1] or [n1, n2, n3 ...] if you want.
    out = MEGNetLayer(args.v_shape, args.e_shape, args.u_shape, pool_method=args.pool_method, activation='relu')(xs)

    # the output is a tuple of new graphs V, E and u
    # Since u is a per-structure quantity,
    # we can directly use it to predict per-structure property
    out = Dense(1)(out[2])

    # Set up the model and compile it!
    model = Model(inputs=xs, outputs=out)
    
    return model

In [None]:
train, test = prepare_dataset(data_path)
model = prepare_model(args)
model.compile(loss=args.loss, optimizer=args.opt)
model.train(
    train.structures,
    train.targets,
    validation_structures=test.structures,
    validation_targets=test.targets,
    epochs=int(args.batch_size),
    batch_size=int(args.epochs),
)

In [15]:
def main(args):
    train, test = prepare_dataset(data_path)
    model = prepare_model(args)
    model.compile(loss=args.loss, optimizer=args.opt)
    model.train(
        train.structures,
        train.targets,
        validation_structures=test.structures,
        validation_targets=test.targets,
        epochs=int(args.batch_size),
        batch_size=int(args.epochs),
    )

In [16]:
main(args)

AttributeError: 'Functional' object has no attribute 'train'