# Creating Dataset of Structurally Unique Materials with OQMD
---

In [1]:
import os
import sys

import pickle
import pandas as pd

from ase.visualize import view

from protosearch.build_bulk.oqmd_interface import OqmdInterface

# Read Data

In [2]:
db_path = os.path.join(
    os.environ["PROJ_DATA"],
    "05_prototype_ML_2/oqmd_data_from_meng",
    "oqmd_ver3.db")

DB_inter = OqmdInterface(db_path, verbose=True)

# Creating Unique Structures

In [3]:
df_a2b1 = DB_inter.create_proto_data_set(
    chemical_formula="Al2O")

df_a3b1 = DB_inter.create_proto_data_set(
    chemical_formula="Al3O")

# #######################################
# #######################################
# #######################################

df_ab1 = DB_inter.create_proto_data_set(
    chemical_formula="AlO")

df_ab2 = DB_inter.create_proto_data_set(
    chemical_formula="AlO2")

df_ab3 = DB_inter.create_proto_data_set(
    chemical_formula="AlO3")

df_ab4 = DB_inter.create_proto_data_set(
    chemical_formula="AlO4")

Number of unique prototypes: 
393


100%|██████████| 393/393 [06:05<00:00,  1.03it/s]


Number of unique prototypes: 
195


100%|██████████| 195/195 [02:57<00:00,  1.22it/s]


Number of unique prototypes: 
289


100%|██████████| 289/289 [14:19<00:00,  1.23it/s]


Number of unique prototypes: 
393


100%|██████████| 393/393 [06:11<00:00,  1.01it/s]


Number of unique prototypes: 
195


100%|██████████| 195/195 [03:02<00:00,  1.18it/s]


Number of unique prototypes: 
89


100%|██████████| 89/89 [01:18<00:00,  1.12it/s]


# Combining Outputs into Single Dataframe

In [9]:
df_a2b1["stoich"] = ["A2B1" for i in range(len(df_a2b1))]
df_a3b1["stoich"] = ["A3B1" for i in range(len(df_a3b1))]

df_ab1["stoich"] = ["AB1" for i in range(len(df_ab1))]
df_ab2["stoich"] = ["AB2" for i in range(len(df_ab2))]
df_ab3["stoich"] = ["AB3" for i in range(len(df_ab3))]
df_ab4["stoich"] = ["AB4" for i in range(len(df_ab4))]

frames = [df_a2b1, df_a3b1, df_ab1, df_ab2, df_ab3, df_ab4]

df_m = pd.concat(frames)

In [19]:
df_m

Unnamed: 0,atoms,existing_structure,proto_name,stoich
0,"(Atom('Al', [1.8062224413921302, 0.0, 0.489358...",False,AB2_10_a10_a20_8,A2B1
1,"(Atom('Al', [0.0, 0.0, 15.7710626871], index=0...",False,AB2_10_a2b8_a8b2c10_156,A2B1
2,"(Atom('Al', [0.0, 0.0, 3.0208166625], index=0)...",False,AB2_10_a4b6_a6b4c10_156,A2B1
3,"(Atom('Al', [0.0, 0.0, 0.6061072583999999], in...",False,AB2_10_a9b_ab9c10_156,A2B1
4,"(Atom('Al', [0.0, 0.0, 0.0], index=0), Atom('A...",False,AB2_10_ag2_g5_58,A2B1
5,"(Atom('Al', [0.0, 0.0, 0.0], index=0), Atom('A...",False,AB2_10_ai_fij_136,A2B1
6,"(Atom('Al', [0.0, 3.071655, 3.96144587709], in...",False,AB2_10_cg_abg2_85,A2B1
7,"(Atom('Al', [4.4660282891035, 1.26889, 0.54309...",False,AB2_11_a11_a22_8,A2B1
8,"(Atom('Al', [0.0, 0.0, 0.6059060092], index=0)...",False,AB2_11_a5b6_a6b5c11_156,A2B1
9,"(Atom('Al', [1.454046630964, 1.368446040684, 0...",False,AB2_12_a12_a24_1,A2B1


# Saving Data to Pickle

In [11]:
import pickle

with open("__outdata__/Al-O_AB1_2_3_4_data.pickle", "wb") as fle:
    pickle.dump(df_m, fle)

# Viewing ASE atoms objects

In [18]:
# view(df_m[df_m["atoms"].notnull()]["atoms"].tolist())
# view(df_m[df_m["stoich"] == "A3B1"]["atoms"].tolist())