In [1]:
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=1

env: CUDA_DEVICE_ORDER=PCI_BUS_ID
env: CUDA_VISIBLE_DEVICES=1


In [2]:
import os
import pymatgen
import matplotlib.pyplot as plt
import pandas as pd
from tqdm.notebook import trange, tqdm

In [3]:
structures = pd.read_csv("datasets/dichalcogenides_innopolis_202105/defects.csv", index_col=0)

In [4]:
defects_descriptors = pd.read_csv("datasets/dichalcogenides_innopolis_202105/descriptors.csv", index_col=0)

In [5]:
import pymatgen.io.cif
STRUCTURES_FOLDER = "datasets/dichalcogenides_innopolis_202105/initial/"
initial_structures = dict()
for structure_file in tqdm(os.listdir(STRUCTURES_FOLDER)):
  this_file = pymatgen.io.cif.CifParser(os.path.join(STRUCTURES_FOLDER, structure_file))
  initial_structures[os.path.splitext(structure_file)[0]] = this_file.get_structures()[0]

  0%|          | 0/3480 [00:00<?, ?it/s]


Issues encountered while parsing CIF: Some fractional co-ordinates rounded to ideal values to avoid issues with finite precision.



In [6]:
structures["initial_structure"] = structures.apply(lambda row: initial_structures[row._id], axis=1)

In [7]:
import matplotlib
import pylab

from math import floor, ceil
from random import shuffle, seed
from megnet.models import MEGNetModel
from megnet.data.graph import GaussianDistance
from megnet.data.crystal import CrystalGraph
from megnet.data.molecule import MolecularGraph
from megnet.utils.preprocessing import StandardScaler
from megnet.callbacks import ModelCheckpointMAE
from pymatgen.core import Lattice, Structure, Molecule

cm = pylab.get_cmap('Set1')

import tensorflow as tf
import numpy as np

In [8]:
import wandb
from wandb.keras import WandbCallback

In [9]:
from sklearn.model_selection import train_test_split

In [10]:
train, test = train_test_split(structures, test_size=0.25, random_state=42)

In [11]:
wandb.init(project='ai4material_design', entity='kazeev')

[34m[1mwandb[0m: Currently logged in as: [33mkazeev[0m (use `wandb login --relogin` to force relogin)


In [12]:
config = wandb.config
config.target = "energy"

In [13]:
gc = CrystalGraph(bond_converter=GaussianDistance(np.linspace(0, 5, 100), 0.5), cutoff=6)
model = MEGNetModel(nfeat_edge=100, nfeat_global=2, graph_converter=gc)#.from_file('8.9e-4')

In [14]:
scaler = StandardScaler.from_training_data(train["initial_structure"],
                                           train[config.target], is_intensive=False)
model.target_scaler = scaler

In [None]:
model.train(train["initial_structure"], train[config.target],
            validation_structures=test["initial_structure"],
            validation_targets=test[config.target],
            callbacks=[WandbCallback()],
            epochs=100, verbose=1, patience=1000)

Epoch 1/100



Converting sparse IndexedSlices(IndexedSlices(indices=Tensor("gradient_tape/model/set2set_atom/Reshape_9:0", shape=(None,), dtype=int32), values=Tensor("gradient_tape/model/set2set_atom/Reshape_8:0", shape=(None,), dtype=float32), dense_shape=Tensor("gradient_tape/model/set2set_atom/Cast:0", shape=(1,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.


Converting sparse IndexedSlices(IndexedSlices(indices=Tensor("gradient_tape/model/set2set_bond/Reshape_9:0", shape=(None,), dtype=int32), values=Tensor("gradient_tape/model/set2set_bond/Reshape_8:0", shape=(None,), dtype=float32), dense_shape=Tensor("gradient_tape/model/set2set_bond/Cast:0", shape=(1,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.


Converting sparse IndexedSlices(IndexedSlices(indices=Tensor("gradient_tape/model/set2set_atom/Reshape_27:0", shape=(None,), dtype=int32), values=Tensor("gradient_tape/model/set2set_atom/Reshape_26



INFO:megnet.callbacks:
Epoch 00001: val_mae improved from inf to 19.48412, saving model to callback/val_mae_00001_19.484121.hdf5


Epoch 2/100


INFO:megnet.callbacks:
Epoch 00002: val_mae improved from 19.48412 to 5.63743, saving model to callback/val_mae_00002_5.637435.hdf5


Epoch 3/100
Epoch 4/100


INFO:megnet.callbacks:
Epoch 00004: val_mae improved from 5.63743 to 4.95525, saving model to callback/val_mae_00004_4.955250.hdf5


Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100


INFO:megnet.callbacks:
Epoch 00009: val_mae improved from 4.95525 to 3.96739, saving model to callback/val_mae_00009_3.967393.hdf5


Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100


INFO:megnet.callbacks:
Epoch 00021: val_mae improved from 3.96739 to 3.73976, saving model to callback/val_mae_00021_3.739757.hdf5


Epoch 22/100


INFO:megnet.callbacks:
Epoch 00022: val_mae improved from 3.73976 to 3.50279, saving model to callback/val_mae_00022_3.502786.hdf5


Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100


In [None]:
# TODO(kazeevn) speedup
test.loc[:, "predicted"] = model.predict_structures(test["initial_structure"].values)

In [None]:
fig, ax = plt.subplots()
ax.scatter(test[config.target], test["predicted"])
ax.set_xlabel("True energy, eV")
ax.set_ylabel("Predicted energy, eV");