In [5]:
import os
from dotenv import load_dotenv, find_dotenv
from mp_api.client import MPRester
import torch
import torch.nn as nn
import torch.optim as optim
import torch_geometric
import e3nn
from e3nn import o3
import e3nn.util.datatypes
import numpy as np
import pickle
from mendeleev import element
import matplotlib.pyplot as plt
from sklearn.metrics import average_precision_score
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
import io
import random
import math
import sys
import time, os
import datetime
from pathlib import Path

# Load environment variables
load_dotenv()
load_dotenv(Path("/Users/abiralshakya/Documents/Research/Topological_Insulators_OnGithub/generative_nmti/Integrated_Magnetic_Topological/matprojectapi.env"))
api_key = os.getenv("MP_API_KEY")

# Initialize variables for storing data
order_list_mp = []
structures_list_mp = []
formula_list_mp = []
sites_list = []
id_list_mp = []
y_values_mp = []
order_encode = {"NM": 0, "AFM": 1, "FM": 2, "FiM": 2}

# Initialize MPRester client
m = MPRester(api_key=api_key)

# Define element batches for querying
element_batches = [
    ["Ga", "Tm", "Y", "Dy", "Nb", "Pu"],
    ["Th", "Er", "U", "Cr", "Sc", "Pr"],
    ["Re", "Ni", "Np", "Nd", "Yb", "Ce"],
    ["Ti", "Mo", "Cu", "Fe", "Sm", "Gd"],
    ["V", "Co", "Eu", "Ho", "Mn", "Os"],
    ["Tb", "Ir", "Pt", "Rh", "Ru"]
]

# Storage for results
all_materials = []

# Fetch data in batches with proper approach
for batch in element_batches:
    try:
        print(f"Querying elements: {', '.join(batch)}")
        
        # Process each element separately
        for element in batch:
            print(f"  - Querying for materials with {element}")
            
            # Using the standard query format
            docs = m.materials.summary.search(
                chemsys=element,  # Use chemsys to search for the element
                fields=[
                    "material_id", 
                    "formula_pretty", 
                    "structure", 
                    "nsites", 
                    "is_magnetic", 
                    "ordering",
                    "total_magnetization"
                ]
            )
            
            # Add results to collection
            element_docs = list(docs)
            print(f"    Found {len(element_docs)} materials")
            all_materials.extend(element_docs)
            
            # Pause briefly to avoid rate limiting
            time.sleep(0.5)
    
    except Exception as e:
        print(f"Error processing batch {batch}: {e}")

print(f"Total materials retrieved: {len(all_materials)}")

# Process retrieved data
for doc in all_materials:
    try:
        material_id = doc.material_id
        formula = doc.formula_pretty
        structure = doc.structure
        nsites = doc.nsites
        
        # Extract magnetic ordering - directly from the ordering field
        magnetic_ordering = None
        if hasattr(doc, 'ordering') and doc.ordering:
            magnetic_ordering = doc.ordering
        
        # Store data in lists
        id_list_mp.append(material_id)
        formula_list_mp.append(formula)
        structures_list_mp.append(structure)
        sites_list.append(nsites)
        
        # Handle magnetic ordering data
        if magnetic_ordering and magnetic_ordering in order_encode:
            order_list_mp.append(magnetic_ordering)
            y_values_mp.append(order_encode[magnetic_ordering])
        else:
            # If structure not magnetic or ordering unknown
            if hasattr(doc, 'is_magnetic') and not doc.is_magnetic:
                order_list_mp.append("NM")  # Non-magnetic
                y_values_mp.append(order_encode["NM"])
            else:
                order_list_mp.append("Unknown")
                y_values_mp.append(-1)  # Mark as unknown
            
    except Exception as e:
        print(f"Error processing material {doc.material_id if hasattr(doc, 'material_id') else 'unknown'}: {e}")

# Remove duplicate materials (if any)
unique_indices = []
seen_ids = set()
for i, material_id in enumerate(id_list_mp):
    if material_id not in seen_ids:
        seen_ids.add(material_id)
        unique_indices.append(i)

if len(unique_indices) < len(id_list_mp):
    print(f"Removing {len(id_list_mp) - len(unique_indices)} duplicate materials")
    id_list_mp = [id_list_mp[i] for i in unique_indices]
    formula_list_mp = [formula_list_mp[i] for i in unique_indices]
    structures_list_mp = [structures_list_mp[i] for i in unique_indices]
    sites_list = [sites_list[i] for i in unique_indices]
    order_list_mp = [order_list_mp[i] for i in unique_indices]
    y_values_mp = [y_values_mp[i] for i in unique_indices]

print(f"Processed {len(id_list_mp)} unique materials with structures")
print(f"Materials with known magnetic ordering: {sum(1 for y in y_values_mp if y >= 0)}")

# Optional: Create a summary of magnetic orders
mag_order_counts = {}
for order in order_list_mp:
    if order in mag_order_counts:
        mag_order_counts[order] += 1
    else:
        mag_order_counts[order] = 1

print("Magnetic ordering distribution:")
for order, count in mag_order_counts.items():
    print(f"  {order}: {count}")

  from .autonotebook import tqdm as notebook_tqdm
  Referenced from: <4A3195B8-9E71-3AE7-AE80-DBA66ADAC535> /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/torch_scatter/_scatter_cpu.so
  Expected in:     <DA215AD3-6EAE-3755-B6A5-A8EB4EF952B0> /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/torch/lib/libtorch_cpu.dylib


Querying elements: Ga, Tm, Y, Dy, Nb, Pu
  - Querying for materials with Ga


Retrieving SummaryDoc documents: 100%|██████████| 8/8 [00:00<00:00, 76959.71it/s]


    Found 8 materials
  - Querying for materials with Tm


Retrieving SummaryDoc documents: 100%|██████████| 5/5 [00:00<00:00, 92385.55it/s]


    Found 5 materials
  - Querying for materials with Y


Retrieving SummaryDoc documents: 100%|██████████| 4/4 [00:00<00:00, 76959.71it/s]


    Found 4 materials
  - Querying for materials with Dy


Retrieving SummaryDoc documents: 100%|██████████| 5/5 [00:00<00:00, 97997.76it/s]


    Found 5 materials
  - Querying for materials with Nb


Retrieving SummaryDoc documents: 100%|██████████| 6/6 [00:00<00:00, 165564.63it/s]


    Found 6 materials
  - Querying for materials with Pu


Retrieving SummaryDoc documents: 100%|██████████| 9/9 [00:00<00:00, 199728.76it/s]


    Found 9 materials
Querying elements: Th, Er, U, Cr, Sc, Pr
  - Querying for materials with Th


Retrieving SummaryDoc documents: 100%|██████████| 2/2 [00:00<00:00, 37786.52it/s]


    Found 2 materials
  - Querying for materials with Er


Retrieving SummaryDoc documents: 100%|██████████| 5/5 [00:00<00:00, 115864.75it/s]


    Found 5 materials
  - Querying for materials with U


Retrieving SummaryDoc documents: 100%|██████████| 14/14 [00:00<00:00, 378840.36it/s]


    Found 14 materials
  - Querying for materials with Cr


Retrieving SummaryDoc documents: 100%|██████████| 6/6 [00:00<00:00, 146312.93it/s]


    Found 6 materials
  - Querying for materials with Sc


Retrieving SummaryDoc documents: 100%|██████████| 11/11 [00:00<00:00, 225060.21it/s]


    Found 11 materials
  - Querying for materials with Pr


Retrieving SummaryDoc documents: 100%|██████████| 10/10 [00:00<00:00, 135300.13it/s]


    Found 10 materials
Querying elements: Re, Ni, Np, Nd, Yb, Ce
  - Querying for materials with Re


Retrieving SummaryDoc documents: 100%|██████████| 5/5 [00:00<00:00, 90785.80it/s]


    Found 5 materials
  - Querying for materials with Ni


Retrieving SummaryDoc documents: 100%|██████████| 6/6 [00:00<00:00, 102300.10it/s]


    Found 6 materials
  - Querying for materials with Np


Retrieving SummaryDoc documents: 100%|██████████| 3/3 [00:00<00:00, 58798.65it/s]


    Found 3 materials
  - Querying for materials with Nd


Retrieving SummaryDoc documents: 100%|██████████| 4/4 [00:00<00:00, 95325.09it/s]


    Found 4 materials
  - Querying for materials with Yb


Retrieving SummaryDoc documents: 100%|██████████| 4/4 [00:00<00:00, 81442.80it/s]


    Found 4 materials
  - Querying for materials with Ce


Retrieving SummaryDoc documents: 100%|██████████| 6/6 [00:00<00:00, 103563.06it/s]


    Found 6 materials
Querying elements: Ti, Mo, Cu, Fe, Sm, Gd
  - Querying for materials with Ti


Retrieving SummaryDoc documents: 100%|██████████| 10/10 [00:00<00:00, 255750.24it/s]


    Found 10 materials
  - Querying for materials with Mo


Retrieving SummaryDoc documents: 100%|██████████| 8/8 [00:00<00:00, 177536.68it/s]


    Found 8 materials
  - Querying for materials with Cu


Retrieving SummaryDoc documents: 100%|██████████| 8/8 [00:00<00:00, 148470.94it/s]


    Found 8 materials
  - Querying for materials with Fe


Retrieving SummaryDoc documents: 100%|██████████| 10/10 [00:00<00:00, 181571.60it/s]


    Found 10 materials
  - Querying for materials with Sm


Retrieving SummaryDoc documents: 100%|██████████| 4/4 [00:00<00:00, 75573.05it/s]


    Found 4 materials
  - Querying for materials with Gd


Retrieving SummaryDoc documents: 100%|██████████| 5/5 [00:00<00:00, 113359.57it/s]


    Found 5 materials
Querying elements: V, Co, Eu, Ho, Mn, Os
  - Querying for materials with V


Retrieving SummaryDoc documents: 100%|██████████| 3/3 [00:00<00:00, 66576.25it/s]


    Found 3 materials
  - Querying for materials with Co


Retrieving SummaryDoc documents: 100%|██████████| 8/8 [00:00<00:00, 151146.09it/s]


    Found 8 materials
  - Querying for materials with Eu


In [1]:
import torch
from datetime import datetime

In [2]:
time_stamp = datetime.today().strftime('%Y-%m-%d_%H-%M')


In [3]:
struct_dict = {
    "structures": structures
}

NameError: name 'structures' is not defined

In [4]:
torch.save(struct_dict, f'mp_structures_{time_stamp}.pt')


NameError: name 'struct_dict' is not defined

In [None]:
structures_loaded = torch.load(f'mp_structures_{time_stamp}.pt')
