In [1]:
import yaml
import json

import pandas as pd
import numpy as np
import tensorflow as tf

from pathlib import Path
from pymatgen.io import cif
from pymatgen.core import Structure
from sklearn.model_selection import train_test_split
from megnet.models import MEGNetModel
from megnet.data.crystal import CrystalGraph

In [2]:
gpus = tf.config.list_physical_devices('GPU')
gpus

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

Public dataset import & representation

In [3]:
def read_pymatgen_dict(file):
    with open(file, "r") as f:
        d = json.load(f)
    return Structure.from_dict(d)

In [4]:
def prepare_dataset(dataset_path):
    dataset_path = Path(dataset_path)
    targets = pd.read_csv(dataset_path / "targets.csv", index_col=0)
    struct = {
        item.name.strip(".json"): read_pymatgen_dict(item)
        for item in (dataset_path / "structures").iterdir()
    }

    data = pd.DataFrame(columns=["structures"], index=struct.keys())
    data = data.assign(structures=struct.values(), targets=targets)

    return data #train_test_split(data, test_size=0.25, random_state=666)

In [5]:
sample_df = prepare_dataset('../data/dichalcogenides_public')

In [6]:
sample_df.head()

Unnamed: 0,structures,targets
6141cf0f51c1cbd9654b8870,[[1.27612629e-07 1.84192955e+00 3.71975100e+00...,1.0843
6141cf1051c1cbd9654b8872,[[1.27612629e-07 1.84192955e+00 3.71975100e+00...,1.1102
6141cf11ae4fb853db2e3f14,[[1.27612629e-07 1.84192955e+00 3.71975100e+00...,0.36
6141cf11b842c2e72e2f2d48,[[1.27612629e-07 1.84192955e+00 3.71975100e+00...,1.8068
6141cf11cc0e69a0cf28ab35,[[1.27612629e-07 1.84192955e+00 3.71975100e+00...,1.1484


In [7]:
r = sample_df.iloc[5, 0]

In [8]:
print(r, type(r))

Full Formula (Mo64 Se1 S126)
Reduced Formula: Mo64SeS126
abc   :  25.522526  25.522526  14.879004
angles:  90.000000  90.000000 120.000000
Sites (191)
  #  SP           a         b         c
---  ----  --------  --------  --------
  0  Mo    0.041667  0.083333  0.25
  1  Mo    0.041667  0.208333  0.25
  2  Mo    0.041667  0.333333  0.25
  3  Mo    0.041667  0.458333  0.25
  4  Mo    0.041667  0.583333  0.25
  5  Mo    0.041667  0.708333  0.25
  6  Mo    0.041667  0.833333  0.25
  7  Mo    0.041667  0.958333  0.25
  8  Mo    0.166667  0.083333  0.25
  9  Mo    0.166667  0.208333  0.25
 10  Mo    0.166667  0.333333  0.25
 11  Mo    0.166667  0.458333  0.25
 12  Mo    0.166667  0.583333  0.25
 13  Mo    0.166667  0.708333  0.25
 14  Mo    0.166667  0.833333  0.25
 15  Mo    0.166667  0.958333  0.25
 16  Mo    0.291667  0.083333  0.25
 17  Mo    0.291667  0.208333  0.25
 18  Mo    0.291667  0.333333  0.25
 19  Mo    0.291667  0.458333  0.25
 20  Mo    0.291667  0.583333  0.25
 21  Mo    0.

In [10]:
sample_structs = sample_df.structures

In [13]:
print(type(sample_structs))
print(type(sample_structs[0]))

<class 'pandas.core.series.Series'>
<class 'pymatgen.core.structure.Structure'>


In [9]:
cif_obj = cif.CifWriter(r)

In [10]:
cif_obj.write_file('../data/sample/sample1.cif')

In [11]:
from pymatgen.io.cif import CifParser
import nglview as nv
# from nglview import show_structure_file



In [16]:
view = nv.show_structure_file('../data/sample/sample1.cif')
# view._remote_call('setSize', args=['', '600px'])
# view.camera = 'orthographic'
# view.add_unitcell
# view

In [18]:
repr(view)

'NGLWidget()'