In [2]:
import json
import pymatgen

import tensorflow as tf

from pymatgen.core import Structure
import pandas as pd
from pathlib import Path

In [3]:
def read_pymatgen_dict(file):
    with open(file, "r") as f:
        d = json.load(f)
    return Structure.from_dict(d)

In [6]:
dataset_path1 = Path("./data/dichalcogenides_public")

In [5]:
def to_dataframe(dataset_path):
    struct = {item.name.strip('.json'): read_pymatgen_dict(item) for item in (dataset_path/'structures').iterdir()}
    final_data = []
    for key in struct:
        name = key
        d1 = struct[key]
        df1 = Structure.as_dataframe(d1)
        data =[]
        data.append(name)
        for i in d1.lattice.abc:
            data.append(i)
        for i in d1.lattice.angles:
            data.append(i)
        data.append(d1.lattice.volume)
        for i in d1.lattice.angles:
            data.append(i)
        for i in range(len(df1)):
            for j in df1.columns:
                data.append(df1[j][i])

        final_data.append(data)
    return final_data

In [7]:
def prepare_dataset(df):
    categorical_columns = [c for c in df.columns[1:] if df[c].dtype.name == 'object']
    numerical_columns   = [c for c in df.columns if df[c].dtype.name != 'object']
    
    for c in numerical_columns:
        df[c] = df[c].fillna(-999999)
    df = df.fillna("no")
    
    df_cat = df[categorical_columns].astype("str")
    df_num = df[numerical_columns]
    df_code = df[0]
    df_cat = pd.get_dummies(df_cat)
    
    df_final = pd.concat((df_code, df_cat, df_num), axis = 1)
    df_final = pd.DataFrame(df_final)
    return df_final