In [1]:
import requests
from bs4 import BeautifulSoup

materials = []

url = "https://www.cryst.ehu.es/magndata/search.php?show_db=1"
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")

for td in soup.find_all("td"):
    text = td.get_text(strip=True)
    if text and text[0] in "01234" and text[1] == ".":
        parts = text.split()  # split "0.1LaMnO3" → ["0.1LaMnO3"] if no space
        label = ''.join(filter(lambda c: c.isdigit() or c == '.', text[:5]))  # safe parse of index (e.g. "0.1")
        link = f"https://www.cryst.ehu.es/magndata/index.php?index={label}"
        materials.append((label, link))

print(f"✅ Found {len(materials)} magnetic materials.")
print(materials[:-5])  # Preview first 5 to verify


✅ Found 2329 magnetic materials.
[('0.1', 'https://www.cryst.ehu.es/magndata/index.php?index=0.1'), ('0.2', 'https://www.cryst.ehu.es/magndata/index.php?index=0.2'), ('0.3', 'https://www.cryst.ehu.es/magndata/index.php?index=0.3'), ('0.4', 'https://www.cryst.ehu.es/magndata/index.php?index=0.4'), ('0.5', 'https://www.cryst.ehu.es/magndata/index.php?index=0.5'), ('0.6', 'https://www.cryst.ehu.es/magndata/index.php?index=0.6'), ('0.7', 'https://www.cryst.ehu.es/magndata/index.php?index=0.7'), ('0.8', 'https://www.cryst.ehu.es/magndata/index.php?index=0.8'), ('0.9', 'https://www.cryst.ehu.es/magndata/index.php?index=0.9'), ('0.10', 'https://www.cryst.ehu.es/magndata/index.php?index=0.10'), ('0.11', 'https://www.cryst.ehu.es/magndata/index.php?index=0.11'), ('0.12', 'https://www.cryst.ehu.es/magndata/index.php?index=0.12'), ('0.13', 'https://www.cryst.ehu.es/magndata/index.php?index=0.13'), ('0.14', 'https://www.cryst.ehu.es/magndata/index.php?index=0.14'), ('0.15', 'https://www.cryst.ehu.

In [3]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd

# Step 1: Request HTML page
url = "https://www.cryst.ehu.es/magndata/search.php?show_db=1"
response = requests.get(url)

# Step 2: Parse the HTML content
soup = BeautifulSoup(response.content, "html.parser")

materials = []

# Step 3: Loop through all <td> entries
for td in soup.find_all("td"):
    text = td.get_text(strip=True)

    # Extract numeric label (e.g., 0.1) from full label (e.g., 0.1LaMnO3)
    match = re.match(r"^([0-4]\.\d+)", text)
    if match:
        numeric_label = match.group(1)
        full_label = text
        link = f"https://www.cryst.ehu.es/magndata/index.php?this_label={numeric_label}"
        materials.append({
            "Full Label": full_label,
            "Numeric Label": numeric_label,
            "URL": link
        })

# Step 4: Convert to DataFrame and export to CSV
df = pd.DataFrame(materials)
csv_filename = "MAGNDATA Magnetic Materials.csv"
df.to_csv(csv_filename, index=False)

# Done
print(f"✅ Saved {len(df)} entries to '{csv_filename}'")
print(df.head())  # Show a preview


✅ Saved 2329 entries to 'MAGNDATA Magnetic Materials.csv'
     Full Label Numeric Label  \
0     0.1LaMnO3           0.1   
1   0.2Cd2Os2O7           0.2   
2  0.3Ca3LiOsO6           0.3   
3    0.4NiCr2O4           0.4   
4      0.5Cr2S3           0.5   

                                                 URL  
0  https://www.cryst.ehu.es/magndata/index.php?th...  
1  https://www.cryst.ehu.es/magndata/index.php?th...  
2  https://www.cryst.ehu.es/magndata/index.php?th...  
3  https://www.cryst.ehu.es/magndata/index.php?th...  
4  https://www.cryst.ehu.es/magndata/index.php?th...  


In [6]:
import pandas as pd

# Load the original CSV
df = pd.read_csv("MAGNDATA Magnetic Materials.csv")

# Keep only the first 2168 rows (up to index 2168, which is row 2169 in Excel/Calc)
df_trimmed = df.iloc[:2167]

# Save to new file
df_trimmed.to_csv("MAGNDATA_Commensurate_Only.csv", index=False)

print(f"✅ Saved {len(df_trimmed)} commensurate materials to 'MAGNDATA_Commensurate_Only.csv'")


✅ Saved 2167 commensurate materials to 'MAGNDATA_Commensurate_Only.csv'


In [7]:
print(df.head())  # Show a preview


     Full Label  Numeric Label  \
0     0.1LaMnO3            0.1   
1   0.2Cd2Os2O7            0.2   
2  0.3Ca3LiOsO6            0.3   
3    0.4NiCr2O4            0.4   
4      0.5Cr2S3            0.5   

                                                 URL  
0  https://www.cryst.ehu.es/magndata/index.php?th...  
1  https://www.cryst.ehu.es/magndata/index.php?th...  
2  https://www.cryst.ehu.es/magndata/index.php?th...  
3  https://www.cryst.ehu.es/magndata/index.php?th...  
4  https://www.cryst.ehu.es/magndata/index.php?th...  


In [10]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
import time

# Load dataset
df = pd.read_csv("MAGNDATA_Commensurate_Only.csv")

# Function to extract propagation vector
def get_propagation_vector(url):
    try:
        response = requests.get(url, timeout=10)
        soup = BeautifulSoup(response.content, "html.parser")
        text = soup.get_text()
        
        # Look for: Propagation vector: k1 (1/2, 0, 1/2)
        match = re.search(r"Propagation vector:\s*k\d?\s*\((.*?)\)", text)
        if match:
            return match.group(1).strip()
        else:
            return "Not found"
    except Exception as e:
        return f"Error: {e}"

# Prepare new column
propagation_vectors = []

# Loop and extract with live printing
for i, (label, url) in enumerate(zip(df["Full Label"], df["URL"]), start=1):
    vector = get_propagation_vector(url)
    propagation_vectors.append(vector)
    print(f"🔍 {i:4d}/{len(df)} — {label:<20} → Propagation vector: {vector}")
    time.sleep(0.5)  # be polite

# Add new column and save result
df["Propagation Vector"] = propagation_vectors
df.to_csv("MAGNDATA_Commensurate_WithVectors.csv", index=False)
print("✅ All done! Results saved to 'MAGNDATA_Commensurate_WithVectors.csv'")


🔍    1/2167 — 0.1LaMnO3            → Propagation vector: 0, 0, 0
🔍    2/2167 — 0.2Cd2Os2O7          → Propagation vector: 0, 0, 0
🔍    3/2167 — 0.3Ca3LiOsO6         → Propagation vector: 0, 0, 0
🔍    4/2167 — 0.4NiCr2O4           → Propagation vector: 0, 0, 0
🔍    5/2167 — 0.5Cr2S3             → Propagation vector: 0, 0, 0
🔍    6/2167 — 0.6YMnO3             → Propagation vector: 0, 0, 0
🔍    7/2167 — 0.7ScMnO3            → Propagation vector: 0, 0, 0
🔍    8/2167 — 0.8ScMnO3            → Propagation vector: 0, 0, 0
🔍    9/2167 — 0.9GdB4              → Propagation vector: 0, 0, 0
🔍   10/2167 — 0.10DyFeO3           → Propagation vector: 0, 0, 0
🔍   11/2167 — 0.11DyFeO3           → Propagation vector: 0, 0, 0
🔍   12/2167 — 0.12U3Ru4Al12        → Propagation vector: 0, 0, 0
🔍   13/2167 — 0.13Ca3Co2-xMnxO6    → Propagation vector: 0, 0, 0
🔍   14/2167 — 0.14Gd5Ge4           → Propagation vector: 0, 0, 0
🔍   15/2167 — 0.15MnF2             → Propagation vector: 0, 0, 0
🔍   16/2167 — 0.16EuTiO3 

In [18]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
import time
import numpy as np

df = pd.read_csv("MAGNDATA_Commensurate_Only.csv")
subset = df.copy()

def clean_float(val):
    val = val.replace("\u2212", "-")  # Unicode minus
    val = re.sub(r"\([^)]*\)", "", val)  # Remove uncertainty like (7)
    val = re.sub(r"[^\d\.\-]+", "", val)  # Remove anything non-numeric
    try:
        return float(val)
    except:
        return None

def extract_all_moments(url):
    try:
        response = requests.get(url, timeout=10)
        soup = BeautifulSoup(response.content, "html.parser")

        moments = []

        # Look for all tables with Mx, My, Mz as last 3 headers
        for table in soup.find_all("table"):
            headers = table.find_all("th")
            if len(headers) >= 3 and [h.get_text(strip=True) for h in headers[-3:]] == ["Mx", "My", "Mz"]:
                rows = table.find_all("tr")[1:]  # skip header
                for row in rows:
                    cols = row.find_all("td")
                    if len(cols) < 3:
                        continue
                    mx = clean_float(cols[-3].get_text())
                    my = clean_float(cols[-2].get_text())
                    mz = clean_float(cols[-1].get_text())
                    if None not in (mx, my, mz):
                        vec = np.array([mx, my, mz])
                        if np.linalg.norm(vec) > 0.01:
                            moments.append(vec)

        return moments

    except Exception as e:
        print(f"⚠️ Error parsing {url}: {e}")
        return None

def classify_order(moments):
    if not moments or len(moments) < 2:
        return "Unknown"

    moments = np.array(moments)
    norms = np.linalg.norm(moments, axis=1)

    if np.all(norms < 0.0005):
        return "NM"

    directions = moments / norms[:, None]
    ref_dir = directions[0]

    # Check if all directions ~ parallel to ref_dir
    all_parallel = np.allclose(directions, ref_dir, atol=0.15)

    if all_parallel:
        # Now check if magnitudes are also equal → FM
        if np.allclose(norms, norms[0], rtol=0.1):  # 10% tolerance
            return "FM"
        else:
            return "FiM"

    # If not all parallel, check if all are either aligned or anti-aligned
    if all(np.allclose(d, ref_dir, atol=0.15) or np.allclose(d, -ref_dir, atol=0.15) for d in directions):
        return "AFM"

    return "Complex"


# Run for subset
orders = []
for i, (label, url) in enumerate(zip(subset["Full Label"], subset["URL"])):
    all_moments = extract_all_moments(url)
    order = classify_order(all_moments)
    orders.append(order)
    print(f"{i+1:2d}. {label:<20} → Magnetic Order: {order}")
    time.sleep(0.5)

subset["Magnetic Order"] = orders
subset.to_csv("MAGNDATA_MagneticOrder_AllSpecies.csv", index=False)
print("✅ Saved to 'MAGNDATA_MagneticOrder_AllSpecies.csv'")


 1. 0.1LaMnO3            → Magnetic Order: AFM
 2. 0.2Cd2Os2O7          → Magnetic Order: Complex
 3. 0.3Ca3LiOsO6         → Magnetic Order: AFM
 4. 0.4NiCr2O4           → Magnetic Order: AFM
 5. 0.5Cr2S3             → Magnetic Order: AFM
 6. 0.6YMnO3             → Magnetic Order: Complex
 7. 0.7ScMnO3            → Magnetic Order: Complex
 8. 0.8ScMnO3            → Magnetic Order: Complex
 9. 0.9GdB4              → Magnetic Order: Complex
10. 0.10DyFeO3           → Magnetic Order: Complex
11. 0.11DyFeO3           → Magnetic Order: Complex
12. 0.12U3Ru4Al12        → Magnetic Order: Complex
13. 0.13Ca3Co2-xMnxO6    → Magnetic Order: AFM
14. 0.14Gd5Ge4           → Magnetic Order: AFM
15. 0.15MnF2             → Magnetic Order: AFM
16. 0.16EuTiO3           → Magnetic Order: AFM
17. 0.17FePO4            → Magnetic Order: Complex
18. 0.18BaMn2As2         → Magnetic Order: AFM
19. 0.19MnTiO3           → Magnetic Order: AFM
20. 0.20MnTe2            → Magnetic Order: Complex
21. 0.21PbNiO3      

In [1]:
import pandas as pd

# Load your classified file
df = pd.read_csv("MAGNDATA_MagneticOrder_AllSpecies.csv")

# Replace Unknown and Complex with AFM
df["Magnetic Order"] = df["Magnetic Order"].replace({"Unknown": "AFM", "Complex": "AFM"})

# Save new CSV
df.to_csv("MAGNDATA_Reclassified.csv", index=False)

print("✅ Saved updated file as 'MAGNDATA_Reclassified.csv'")


✅ Saved updated file as 'MAGNDATA_Reclassified.csv'


In [4]:
import pandas as pd

# Load both datasets
magndata = pd.read_csv("MAGNDATA_Reclassified.csv")
materials = pd.read_csv("All Data Imported.csv")

# Extract formula from MAGNDATA label (e.g. '0.123ABC' → 'ABC')
magndata["formula"] = magndata["Full Label"].str.extract(r"\d*\.\d+(.*)")[0].str.strip()
materials["formula"] = materials["formula"].str.strip()

# Left join: keep only MAGNDATA entries
merged = pd.merge(magndata, materials, on="formula", how="left")

# Save result
merged.to_csv("MAGNDATA_Enriched_WithMP.csv", index=False)
print("✅ Saved merged file: 'MAGNDATA_Enriched_WithMP.csv'")


✅ Saved merged file: 'MAGNDATA_Enriched_WithMP.csv'
