In [None]:
%pip install pandas numpy pymatgen

# Materials Project API Initialization

This script demonstrates how to set up and authenticate a connection to the **Materials Project API** using Python.  
It also imports the required libraries for data handling and structure processing.


In [None]:
import pandas as pd
import numpy as np
from mp_api.client import MPRester
from pymatgen.core.structure import Structure

API_KEY = "t2hGqYArE0QKiedIBVHZqKM7lf5BmIDX"
mpr = MPRester(API_KEY)

# Querying Material Data from the Materials Project API

This example demonstrates how to:
1. Connect to the **Materials Project API**.
2. Retrieve specific fields for a small set of materials.
3. Use pagination to limit results.
4. Display retrieved material information.

In [None]:
from pymatgen.ext.matproj import MPRester


# Define the fields to retrieve.
fields = [
    "material_id",
    "formula_pretty",
    "total_magnetization",
    "ordering",
    "symmetry"  # Returns spacegroup info as a dictionary.
]

with MPRester(API_KEY) as mpr:
    # Use pagination parameters: num_chunks=1 and chunk_size=5 to get ~5 documents.
    docs = list(mpr.materials.summary.search(fields=fields, num_chunks=1, chunk_size=5))
    print(f"Fetched {len(docs)} documents")
    for doc in docs:
        print(f"Material ID: {doc.material_id}, Formula: {doc.formula_pretty}")


In [None]:
# # Define fields to query (we now request "symmetry" instead of "spacegroup.symbol")
# fields = [
#     "material_id",
#     "formula_pretty",
#     "total_magnetization",
#     "ordering",
#     "symmetry"  # This will include spacegroup info as a dictionary
# ]

# # Query all compounds (in practice, you might want to restrict the query)
# docs = list(mpr.materials.summary.search(fields=fields))

# # Filter for magnetic compounds:
# # Here, a compound is considered magnetic if its "ordering" is not "Nonmagnetic" and the magnetization is > 0.
# magnetic_docs = [
#     doc for doc in docs
#     if doc.ordering not in [None, "Nonmagnetic"] and doc.total_magnetization is not None and doc.total_magnetization > 0
# ]

# # Define a helper to extract space group symbol from the symmetry field.
# def get_spacegroup_symbol(symmetry):
#     # symmetry is expected to be a dictionary with a key "spacegroup_symbol"
#     if isinstance(symmetry, dict) and "spacegroup_symbol" in symmetry:
#         return symmetry["spacegroup_symbol"]
#     return "NA"

# # Sort the magnetic compounds by space group symbol.
# magnetic_docs.sort(key=lambda x: get_spacegroup_symbol(x.symmetry))

# Magnetic Materials by Space Group — Script Overview
This script queries the **Materials Project API** for all 230 crystallographic space groups (1–230).  
For each space group:
- Retrieves basic material information (`material_id`, `formula_pretty`, `total_magnetization`, `ordering`, `symmetry`).
- Filters results to **magnetic compounds** (excluding `NM`/`Nonmagnetic` and requiring positive magnetization).
- Prints the count of magnetic compounds found per space group.
- Collects all results into a master list.

After scanning all space groups:
- Removes duplicates (based on `material_id`).
- Prints the total count of **unique magnetic compounds**.

**Use case:** Quickly identify which crystal symmetries host magnetic materials for further analysis.
**Note:** This filter excludes antiferromagnets with zero net magnetization — adjust logic if needed.


In [None]:
import numpy as np
import pandas as pd
from pymatgen.core import Structure
from pymatgen.ext.matproj import MPRester

# Fields we want to retrieve from the API.
fields = [
    "material_id",
    "formula_pretty",
    "total_magnetization",
    "ordering",
    "symmetry"
]

all_magnetic_docs = []

# Iterate through all 230 space group numbers.
with MPRester(API_KEY) as mpr:
    for sg_num in range(1, 231):
        try:
            # Query compounds belonging to the given space group number.
            docs = mpr.summary.search(spacegroup_number=sg_num, fields=fields)
        except Exception as e:
            print(f"Error for space group number {sg_num}: {e}")
            continue
        
        # Filter for magnetic compounds.
        mag_docs = [
            doc for doc in docs 
            if doc.ordering 
            and doc.ordering.upper() not in ["NM", "NONMAGNETIC"]
            and doc.total_magnetization and doc.total_magnetization > 0
        ]
        
        if mag_docs:
            # Print info about the space group (using the first document's symbol, if available)
            sg_symbol = mag_docs[0].symmetry.symbol if mag_docs[0].symmetry else "NA"
            print(f"Space group number {sg_num} ({sg_symbol}): found {len(mag_docs)} magnetic compounds")
            all_magnetic_docs.extend(mag_docs)

# Remove duplicate entries (if the same material appears in more than one space group search)
unique_docs = []
seen_ids = set()
for doc in all_magnetic_docs:
    if doc.material_id not in seen_ids:
        unique_docs.append(doc)
        seen_ids.add(doc.material_id)

print(f"\nTotal unique magnetic compounds across space groups: {len(unique_docs)}")

In [None]:
print(unique_docs)

# Export Magnetic Material Data to CSV
This script takes a list of `unique_docs` (retrieved from the Materials Project API) and:
- Extracts key fields: **Material ID**, **Formula**, **Ordering**, **Total Magnetization**, and **Space Group**.
- Stores the data in a pandas DataFrame for easy manipulation.
- Sorts the table by **Space Group** for better organization.
- Saves the data to `magnetic_materials.csv` in the current directory.
- Prints the DataFrame for a quick visual check.

**Use case:** Convert API query results into a clean, sortable CSV for further analysis or sharing.


In [None]:
import pandas as pd

# Prepare a list to hold our data for CSV export
data = []
for doc in unique_docs:
    # Extract fields from each document.
    # Convert MPID to string if needed.
    material_id = str(doc.material_id)
    formula = doc.formula_pretty
    ordering = doc.ordering
    total_magnetization = doc.total_magnetization
    space_group = doc.symmetry.symbol if doc.symmetry else None

    data.append({
        "Material ID": material_id,
        "Formula": formula,
        "Ordering": ordering,
        "Total Magnetization": total_magnetization,
        "Space Group": space_group
    })

# Create a DataFrame from the list
df = pd.DataFrame(data)

# Optionally, sort the DataFrame by space group or any other field
df.sort_values("Space Group", inplace=True)

# Export to CSV; the file "magnetic_materials.csv" will be saved in the current working directory.
df.to_csv("magnetic_materials.csv", index=False)

# Print out the DataFrame to visually inspect the result.
print(df)


# Visualizing Total Magnetization by Space Group
This script creates a scatter plot showing how **Total Magnetization** varies across different **Space Groups**.

Steps:
1. Ensure `Total Magnetization` is numeric and `Space Group` is a categorical string.
2. Count the number of unique space groups and generate a color palette with enough distinct colors.
3. Plot a scatter chart where:
   - **X-axis:** Space Group  
   - **Y-axis:** Total Magnetization  
   - **Color:** Represents each space group (legend hidden for clarity with large counts).
4. Rotate x-axis labels for readability and adjust layout for a clean display.

**Use case:** Quickly visualize relationships or trends between magnetization and crystal symmetry classifications.


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Example: Load your data into a DataFrame
# df = pd.read_csv("your_data.csv")

# Ensure that 'Total Magnetization' is numeric, and 'Space Group' is treated as a categorical variable
df['Total Magnetization'] = pd.to_numeric(df['Total Magnetization'], errors='coerce')
df['Space Group'] = df['Space Group'].astype(str)

# Get the number of unique space groups
num_groups = df['Space Group'].nunique()

# Create a color palette with enough colors for each space group
palette = sns.color_palette("hsv", num_groups)

plt.figure(figsize=(14, 8))
# Create a scatter plot with each point colored by its space group.
sns.scatterplot(
    data=df,
    x="Space Group",
    y="Total Magnetization",
    hue="Space Group",
    palette=palette,
    legend=False  # Turn off the legend because it will be too large with 230 items.
)

plt.title("Total Magnetization vs. Space Group")
plt.xlabel("Space Group")
plt.ylabel("Total Magnetization")
plt.xticks(rotation=90)  # Rotate x-axis labels for readability
plt.tight_layout()
plt.show()


In [None]:
# import numpy as np
# import pandas as pd
# from pymatgen.core import Structure
# from pymatgen.ext.matproj import MPRester

# # Prepare an empty list to store the computed data
# data_list = []

# # Process only the first 1000 compounds in magnetic_docs
# for doc in unique_docs[:1000]:
#     material_id = doc.material_id
#     sg = doc.symmetry.symbol if doc.symmetry else "NA"
#     formula = doc.formula_pretty
#     mag_moment = doc.total_magnetization
#     ordering = doc.ordering

#     # Fetch structure data using the updated method:
#     try:
#         structure = mpr.get_structure_by_material_id(material_id)
#     except Exception as e:
#         print(f"Error fetching structure for {material_id}: {e}")
#         continue

#     # 1. Select the central atom (first site in the structure)
#     central_site = structure[0]
#     central_coords = np.array(central_site.coords)

#     # 2. Compute the Euclidean distances from the central atom to all other atoms
#     distances = []
#     for site in structure:
#         # Skip the central atom itself
#         if site == central_site:
#             continue
#         # Compute the Euclidean distance
#         dist = np.linalg.norm(np.array(site.coords) - central_coords)
#         distances.append(dist)
    
#     # 3. Sort the list of distances
#     distances.sort()
    
#     # 4. Take the 0-index element as the first nearest neighbour distance (if available)
#     nearest_distance = distances[0] if distances else None

#     # 5. Append the computed data for this material
#     data_list.append({
#         "Space Group": sg,
#         "Material ID": material_id,
#         "Formula": formula,
#         "Magnet Moment": mag_moment,
#         "Nature of Magnetic": ordering,
#         "Nearest Neighbour Distance": nearest_distance
#     })

# # Optionally, convert to a pandas DataFrame and save to CSV
# df = pd.DataFrame(data_list)
# df.to_csv("first_1000_magnetic_compounds_nn.csv", index=False)

# print(df.head())


# Analyzing Local Atomic Environments in Magnetic Compounds

This script processes a set of magnetic material documents from the Materials Project to extract detailed **neighbor shell information** for each compound.

## Key Steps:
1. **Setup**
   - Uses `pymatgen` and the Materials Project API (`MPRester`) to fetch crystal structures.
   - Defines a **distance tolerance** (`0.1 Å`) to group atoms into neighbor shells.

2. **Processing Each Material**
   - Retrieves **space group**, **material ID**, **formula**, **total magnetization**, and **magnetic ordering**.
   - Fetches the crystal structure for the material.
   - Selects the **first atom** in the structure as the central reference atom.
   - Calculates distances and vectors from the central atom to all others.
   - Sorts atoms by distance and groups them into **neighbor shells** (atoms within the distance tolerance are in the same shell).
   - Captures data for **up to 4 shells**, including:
     - Average distance
     - Number of neighbors
     - Cartesian vectors to neighbors
     - Chemical species of neighbors

3. **Parallel Processing**
   - Uses `ThreadPoolExecutor` for multithreaded processing to speed up structure analysis.

4. **Output**
   - Compiles results into a pandas DataFrame.
   - Saves as `magnetic_compounds_4shells.csv`.
   - Prints the first few rows for quick inspection.

**Use case:**  
Ideal for studying coordination environments, structural symmetries, and atomic arrangements in magnetic compounds.


In [None]:
import numpy as np
import pandas as pd
from pymatgen.core import Structure
from pymatgen.ext.matproj import MPRester
from concurrent.futures import ThreadPoolExecutor, as_completed

# Distance tolerance for considering atoms at "same" distance (in Ångstroms)
DISTANCE_TOLERANCE = 0.1

def process_doc(doc):
    material_id = doc.material_id
    sg = doc.symmetry.symbol if doc.symmetry else "NA"
    formula = doc.formula_pretty
    mag_moment = doc.total_magnetization
    ordering = doc.ordering

    # Fetch structure data
    try:
        structure = mpr.get_structure_by_material_id(material_id)
    except Exception as e:
        print(f"Error fetching structure for {material_id}: {e}")
        return None

    # Select the central atom (first site in the structure)
    central_site = structure[0]
    central_coords = np.array(central_site.coords)

    # Compute distances and vectors to all other atoms
    neighbor_data = []
    for site in structure:
        if site == central_site:
            continue
        vector = np.array(site.coords) - central_coords
        dist = np.linalg.norm(vector)
        neighbor_data.append({
            'distance': dist,
            'vector': vector,
            'species': site.species_string
        })

    # Sort by distance
    neighbor_data.sort(key=lambda x: x['distance'])

    # Group neighbors into shells based on distance tolerance
    shells = []
    current_shell = []
    for neighbor in neighbor_data:
        if not current_shell or abs(neighbor['distance'] - current_shell[0]['distance']) <= DISTANCE_TOLERANCE:
            current_shell.append(neighbor)
        else:
            shells.append(current_shell)
            current_shell = [neighbor]
    if current_shell:
        shells.append(current_shell)

    # Initialize neighbor information up to 4 shells
    neighbors_info = {
        **{f"Shell {i} {field}": "NA"
           for i in range(1, 5)
           for field in ["Distance", "Count", "Vectors", "Species"]}
    }

    # Fill in available shell information (up to 4 shells)
    for i, shell in enumerate(shells[:4], 1):
        avg_distance = round(np.mean([n['distance'] for n in shell]), 4)
        count = len(shell)
        vectors = [str(np.round(n['vector'], 4).tolist()) for n in shell]
        species = list({n['species'] for n in shell})

        neighbors_info[f"Shell {i} Distance"] = avg_distance
        neighbors_info[f"Shell {i} Count"]    = count
        neighbors_info[f"Shell {i} Vectors"]  = "; ".join(vectors)
        neighbors_info[f"Shell {i} Species"]  = ", ".join(species)

    return {
        "Space Group": sg,
        "Material ID": material_id,
        "Formula": formula,
        "Magnet Moment": mag_moment,
        "Nature of Magnetic": ordering,
        "Central Atom": central_site.species_string,
        "Central Coordinates": str(np.round(central_coords, 4).tolist()),
        **neighbors_info
    }

# Assuming unique_docs is a list of your document objects, process only the first 1000
docs_to_process = unique_docs
data_list = []

# Use ThreadPoolExecutor to run processing in 10 threads
with ThreadPoolExecutor(max_workers=5) as executor:
    # Submit all tasks and collect results as they complete
    future_to_doc = {executor.submit(process_doc, doc): doc for doc in docs_to_process}
    for future in as_completed(future_to_doc):
        result = future.result()
        if result is not None:
            data_list.append(result)

# Convert the results into a DataFrame and save to CSV
df = pd.DataFrame(data_list)
df.to_csv("magnetic_compounds_4shells.csv", index=False)
print(df.head())
