In [3]:
# first attempt at multi task learning and integration with topological quantum chemistry database
import e3nn.util
import torch
import torch.nn as nn
import torch.optim as optim
import torch_geometric
import torch_scatter

import e3nn
from e3nn import o3
#from e3nn.util.datatypes import DataPeriodicNeighbors
#from e3nn.nn._gate import GatedConvParityNetwork
#from e3nn.math._linalg import Kernel

import pymatgen as mg
import pymatgen.io
from pymatgen.core.structure import Structure
import pymatgen.analysis.magnetism.analyzer as pg
from mp_api.client import MPRester
import numpy as np
import pickle
from mendeleev import element
import matplotlib.pyplot as plt

from sklearn.metrics import average_precision_score
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score

import io
import random
import math
import sys
import time, os
import datetime
from pathlib import Path
from dotenv import load_dotenv


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
load_dotenv()
load_dotenv(Path("/Users/abiralshakya/Documents/Research/Topological_Insulators_OnGithub/generative_nmti/Integrated_Magnetic_Topological/matprojectapi.env"))
api_key = os.getenv("MP_API_KEY")

In [10]:
# %% Process Materials Project Data
order_list_mp = []
structures_list_mp = []
formula_list_mp = []
sites_list = []
id_list_mp = []
y_values_mp = []
order_encode = {"NM": 0, "AFM": 1, "FM": 2, "FiM": 2}
topo_encode = {False: 0, True: 1}


mp_structures_dict = torch.load('/Users/abiralshakya/Documents/Research/Topological_Insulators_OnGithub/generative_nmti/Integrated_Magnetic_Topological/magnetic_order/preload_data/mp_structures_2025-04-07_12-52.pt', 
                                weights_only= False)
structures = mp_structures_dict['structures']
materials = mp_structures_dict['materials_id']
formulas = mp_structures_dict['formulas']
orders = mp_structures_dict['order']
nsites = mp_structures_dict['nsites']

structures_copy = structures.copy()
# for struc in structures_copy:
#     print(struc)
#     # if len(struc["structur"]) > 250:
#     #     structures.remove(struc)
#     #     print("MP Structure Deleted")

print(type(structures))

# If it's a dictionary:
if isinstance(structures, dict):
    print("Keys:", list(structures.keys())[:5])
    print("Sample value type:", type(list(structures.values())[0]))

# If it's something else (like a pandas DataFrame), this will help too:
try:
    print(structures.head())
except:
    pass

<class 'list'>


In [6]:
order_list = []
for i in range(len(structures)):
    order = pg.CollinearMagneticStructureAnalyzer(structures[i])
    order_list.append(order.ordering.name)
id_NM = []
id_FM = []
id_AFM = []
for i in range(len(structures)):
    if order_list[i] == 'NM':
        id_NM.append(i)
    if order_list[i] == 'AFM':
        id_AFM.append(i)
    if order_list[i] == 'FM' or order_list[i] == 'FiM':
        id_FM.append(i)
np.random.shuffle(id_FM)
np.random.shuffle(id_NM)
np.random.shuffle(id_AFM)
id_AFM, id_AFM_to_delete = np.split(id_AFM, [int(len(id_AFM))])
id_NM, id_NM_to_delete = np.split(id_NM, [int(1.2 * len(id_AFM))])
id_FM, id_FM_to_delete = np.split(id_FM, [int(1.2 * len(id_AFM))])

structures_mp = [structures[i] for i in id_NM] + [structures[j] for j in id_FM] + [structures[k] for k in id_AFM]
np.random.shuffle(structures_mp)


In [9]:
for structure in structures_mp:
    analyzed_structure = pg.CollinearMagneticStructureAnalyzer(structure)
    order_list_mp.append(analyzed_structure.ordering)
    structures_list_mp.append(structure)
    # formula_list_mp.append(structure.)
    # formula_list_mp.append(structure)
    # id_list_mp.append(structure.material_id)
    # sites_list.append(structure.nsites)



def get_topological_insulator_label(material_id):
    with MPRester(api_key = api_key) as mpr:
        try:
            data = mpr.get_data(material_id, fields=["is_topological_insulator"])
            return data[0]["is_topological_insulator"]
        except Exception as e:
            print(f"Error getting topological insulator label for {material_id}: {e}")
            return False


In [11]:
import numpy as np
import torch
import pymatgen as pg
from pymatgen.ext.matproj import MPRester
from pymatgen.analysis.magnetism import CollinearMagneticStructureAnalyzer

order_list_mp = []
structures_list_mp = []
formula_list_mp = []
sites_list = []
id_list_mp = []
y_values_mp = []

order_encode = {"NM": 0, "AFM": 1, "FM": 2, "FiM": 2}
topo_encode = {False: 0, True: 1}

# Load data
mp_structures_dict = torch.load('/Users/abiralshakya/Documents/Research/Topological_Insulators_OnGithub/generative_nmti/Integrated_Magnetic_Topological/magnetic_order/preload_data/mp_structures_2025-04-07_12-52.pt', 
                                weights_only=False)

structures = mp_structures_dict['structures']
materials = mp_structures_dict['materials_id']
formulas = mp_structures_dict['formulas']
orders = mp_structures_dict['order']
nsites = mp_structures_dict['nsites']


In [12]:
order_list = []

for struct in structures:
    analyzer = CollinearMagneticStructureAnalyzer(struct)
    order_list.append(analyzer.ordering.name)

In [13]:
id_NM = [i for i, order in enumerate(order_list) if order == 'NM']
id_AFM = [i for i, order in enumerate(order_list) if order == 'AFM']
id_FM = [i for i, order in enumerate(order_list) if order in ['FM', 'FiM']]

# Shuffle
np.random.shuffle(id_NM)
np.random.shuffle(id_FM)
np.random.shuffle(id_AFM)

# Balance dataset (keeping AFM as reference size)
id_AFM, id_AFM_to_delete = np.split(id_AFM, [int(len(id_AFM))])
id_NM, id_NM_to_delete = np.split(id_NM, [int(1.2 * len(id_AFM))])
id_FM, id_FM_to_delete = np.split(id_FM, [int(1.2 * len(id_AFM))])

# Final index list
selected_ids = np.concatenate((id_NM, id_FM, id_AFM))
np.random.shuffle(selected_ids)

In [14]:
for idx in selected_ids:
    structure = structures[idx]
    material_id = materials[idx]
    formula = formulas[idx]
    nsite = nsites[idx]

    analyzer = CollinearMagneticStructureAnalyzer(structure)
    ordering = analyzer.ordering

    structures_list_mp.append(structure)
    id_list_mp.append(material_id)
    formula_list_mp.append(formula)
    sites_list.append(nsite)
    order_list_mp.append(ordering)


In [19]:
topo_encode = {False: 0, True: 1}
topo_labels = []

from mp_api.client import MPRester
m = MPRester(api_key=api_key)

for material_id in id_list_mp:
    try:
        result = m.materials.summary.search(material_ids=[material_id])
        if result and hasattr(result[0], "is_topological"):
            label = result[0].is_topological
            topo_labels.append(topo_encode[label])
        else:
            print(f"No topological info for {material_id}")
            topo_labels.append(topo_encode[False])
    except Exception as e:
        print(f"Error retrieving TI label for {material_id}: {e}")
        topo_labels.append(topo_encode[False])


Retrieving SummaryDoc documents: 100%|██████████| 1/1 [00:00<00:00, 10837.99it/s]


No topological info for mp-1055932


Retrieving SummaryDoc documents: 100%|██████████| 1/1 [00:00<00:00, 27776.85it/s]


No topological info for mp-90


Retrieving SummaryDoc documents: 100%|██████████| 1/1 [00:00<00:00, 28532.68it/s]


No topological info for mp-11421


Retrieving SummaryDoc documents: 100%|██████████| 1/1 [00:00<00:00, 33288.13it/s]


No topological info for mp-11343


Retrieving SummaryDoc documents: 100%|██████████| 1/1 [00:00<00:00, 20763.88it/s]


No topological info for mp-20071


Retrieving SummaryDoc documents: 100%|██████████| 1/1 [00:00<00:00, 27594.11it/s]


No topological info for mp-74


Retrieving SummaryDoc documents: 100%|██████████| 1/1 [00:00<00:00, 22192.08it/s]


No topological info for mp-1184067


Retrieving SummaryDoc documents: 100%|██████████| 1/1 [00:00<00:00, 16710.37it/s]


No topological info for mp-1193227


Retrieving SummaryDoc documents: 100%|██████████| 1/1 [00:00<00:00, 7037.42it/s]

No topological info for mp-1184113





In [16]:
for material_id in id_list_mp:
    label = get_topological_insulator_label(material_id, api_key)
    y_values_mp.append(topo_encode[label])


Error getting topological insulator label for mp-1055932: 'MPRester' object has no attribute 'get_data'
Error getting topological insulator label for mp-90: 'MPRester' object has no attribute 'get_data'
Error getting topological insulator label for mp-11421: 'MPRester' object has no attribute 'get_data'
Error getting topological insulator label for mp-11343: 'MPRester' object has no attribute 'get_data'
Error getting topological insulator label for mp-20071: 'MPRester' object has no attribute 'get_data'
Error getting topological insulator label for mp-74: 'MPRester' object has no attribute 'get_data'
Error getting topological insulator label for mp-1184067: 'MPRester' object has no attribute 'get_data'
Error getting topological insulator label for mp-1193227: 'MPRester' object has no attribute 'get_data'
Error getting topological insulator label for mp-1184113: 'MPRester' object has no attribute 'get_data'
