#### create .cif file from .json file > build cif dataset from json files

In [1]:
import yaml
import json

import pandas as pd
import numpy as np
import tensorflow as tf

from pathlib import Path
from pymatgen.io import cif
from pymatgen.core import Structure

In [2]:
def read_pymatgen_dict(file):
    with open(file, "r") as f:
        d = json.load(f)
    return Structure.from_dict(d)

In [3]:
def prepare_dataset(dataset_path):
    dataset_path = Path(dataset_path)
    targets = pd.read_csv(dataset_path / "targets.csv", index_col=0)
    struct = {
        item.name.strip(".json"): read_pymatgen_dict(item)
        for item in (dataset_path / "structures").iterdir()
    }

    data = pd.DataFrame(columns=["structures"], index=struct.keys())
    data = data.assign(structures=struct.values(), targets=targets)

    return data #train_test_split(data, test_size=0.25, random_state=666)

In [4]:
sample_df = prepare_dataset('../data/dichalcogenides_public')  ## changable param

In [None]:
sample_df.head()

Unnamed: 0,structures,targets
6141cf0f51c1cbd9654b8870,[[1.27612629e-07 1.84192955e+00 3.71975100e+00...,1.0843
6141cf1051c1cbd9654b8872,[[1.27612629e-07 1.84192955e+00 3.71975100e+00...,1.1102
6141cf11ae4fb853db2e3f14,[[1.27612629e-07 1.84192955e+00 3.71975100e+00...,0.36
6141cf11b842c2e72e2f2d48,[[1.27612629e-07 1.84192955e+00 3.71975100e+00...,1.8068
6141cf11cc0e69a0cf28ab35,[[1.27612629e-07 1.84192955e+00 3.71975100e+00...,1.1484


In [10]:
sample_df.index.values

array(['6141cf0f51c1cbd9654b8870', '6141cf1051c1cbd9654b8872',
       '6141cf11ae4fb853db2e3f14', ..., '6146e9103ac25c70a5c6cded',
       '6146ecdb3ac25c70a5c6cdef', '6147d3de31cf3ef3d4a9f846'],
      dtype=object)

In [9]:
def save_cif(struct, sid):
    cif_root = '../data/train_cif'  ## changable param
    cif_obj = cif.CifWriter(struct)
    cif_obj.write_file(cif_root+'/'+sid+'.cif')

vsave_cif = np.vectorize(save_cif)

In [11]:
vsave_cif(sample_df['structures'].values, sample_df.index.values)

array([None, None, None, ..., None, None, None], dtype=object)