In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pltsci import whole_plot_set, half_plot_set, set_ticks, cm

whole_plot_set()

from pymatgen.core.composition import Composition
from pymatgen.entries.computed_entries import ComputedEntry
from pymatgen.analysis.phase_diagram import PhaseDiagram, PDPlotter

In [None]:
# 杂相能量来自Materials Project
df_E_f=pd.read_csv("data/all_formation_energies.csv")
df_E_f

# 相图计算方法

In [None]:
def get_third_element(formula_pretty: str) -> str:
    """
    从化学式中提取第三个元素
    :param formula_pretty: 化学式
    :return: 第三个元素的符号
    """
    for element in Composition(formula_pretty).elements:
        if element.symbol not in ["Al", "Cu"]:
            return element.symbol
    return "no_return"
third_elements=df_E_f["formula_pretty"].apply(get_third_element).unique().tolist()
# 删除其中的"no_return"
third_elements = [elem for elem in third_elements if elem != "no_return"]
print(*third_elements, sep=", ")
print(len(third_elements), "种第三元素")

In [None]:
def 由这些元素组成(comp: Composition, elements: list[str]) -> bool:
    """
    判断comp是否由elements组成
    :param comp: 化合物
    :param elements: 元素列表
    :return: bool
    """
    comp_elements = set([element.symbol for element in comp.elements])  # 转换为字符串形式
    if len(comp_elements) > len(elements):
        return False
    for element in comp_elements:
        if element not in elements:
            return False
    return True

def epm2e(row:pd.Series)->float:
    """
    将能量从eV/atom转换为eV
    :param row: 包含能量的行
    :return: 转换后的能量值
    """
    comp = Composition(row["formula_pretty"])
    return row["energy_per_atom"] * comp.num_atoms
    

In [None]:
from tqdm import tqdm
import os

process_bar = tqdm(total=len(third_elements), desc="计算凸包进度")
for third_element in third_elements:
    process_bar.set_description(f"计算凸包进度: {third_element}")
    filtered_df = df_E_f[df_E_f["formula_pretty"].apply(lambda x: 由这些元素组成(Composition(x), ["Al", "Cu", third_element]))]
    save_root = f"计算凸包结果/Al-Cu-{third_element}"
    if not os.path.exists(save_root):
        os.makedirs(save_root)
    energy_col = "formation_energy"
    filtered_df_copy = filtered_df.copy()
    filtered_df_copy.rename(columns={energy_col: "energy_per_atom"}, inplace=True)
    filtered_df_copy["energy"] = filtered_df_copy.apply(epm2e, axis=1)
    filtered_df_copy = filtered_df_copy[["material_id", "formula_pretty", "energy"]].rename(columns={"formula_pretty": "composition"})

    entries = [ComputedEntry(Composition(entry["composition"]), entry["energy"], entry_id=entry["material_id"]) for entry in filtered_df_copy.to_dict(orient="records")]
    phase_diagram = PhaseDiagram(entries)

    energy_above_hulls = {}
    for entry in entries:
        energy_above_hulls[entry.entry_id] = phase_diagram.get_e_above_hull(entry)
    plotter = PDPlotter(phase_diagram)
    lines, stable_entries_plot, unstable_entries_plot = plotter.pd_plot_data
    entries_plot = {}
    columns = "reduced_formula energy x y energy_above_hull stable".split()
    for (x, y), entry in stable_entries_plot.items():
        entries_plot[entry.entry_id] = [entry.reduced_formula, entry.energy, x, y, energy_above_hulls[entry.entry_id], True]
    for entry,(x, y) in unstable_entries_plot.items():
        entries_plot[entry.entry_id] = [entry.reduced_formula, entry.energy, x, y, energy_above_hulls[entry.entry_id], False]
    df_entries_plot = pd.DataFrame.from_dict(entries_plot, orient="index", columns=columns)
    df_entries_plot.index.name = "material_id"
    df_entries_plot.to_csv(f"{save_root}/entries_plot.csv", index=True)

    lines_rows = [{"x1": i[0][0], "y1": i[1][0], "x2": i[0][1], "y2": i[1][1]} for i in lines]
    df_lines = pd.DataFrame(lines_rows)
    df_lines.to_csv(f"{save_root}/lines.csv", index=False)

    process_bar.update(1)
process_bar.close()

In [None]:
import re


# 创建更复杂的正则表达式来处理括号
def to_latex(formula_pretty: str) -> str:
    # 先处理括号后的数字：)数字 -> )_{数字}
    formula = re.sub(r"\)(\d+)", r")_{\1}", formula_pretty)

    # 再处理元素后的数字：元素数字 -> 元素_{数字}
    # 但要避免处理括号中已经处理过的部分
    formula = re.sub(r"([A-Z][a-z]?)(\d+)(?![}])", r"\1_{\2}", formula)

    # 包裹在 LaTeX 的 mathrm 中，确保正体显示
    return f"$\\mathrm{{{formula}}}$"


# 测试示例
test_formulas = ["Al2", "Mg(Al2Cu)4", "Ca(OH)2", "Al2O3", "Mg3(PO4)2"]
for formula in test_formulas:
    print(f"{formula} -> {to_latex(formula)}")
to_latex("Mg(Al2Cu)4")

In [None]:
from __future__ import annotations

from dataclasses import dataclass
from pathlib import Path
from typing import Optional, Iterable

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.tri import Triangulation
from adjustText import adjust_text
from pymatgen.core.composition import Composition
from scipy.spatial import cKDTree


@dataclass(frozen=True)
class HighlightSpec:
    formula: str
    color: str
    marker: str = "*"
    size: int = 100
    text_color: Optional[str] = None


def _energy_per_atom(series_formula: pd.Series, series_energy: pd.Series) -> np.ndarray:
    # 向量化：一次性解析 Composition（仍然有开销，但比 iterrows 清晰且更快）
    num_atoms = series_formula.map(lambda f: Composition(f).num_atoms).to_numpy(dtype=float)
    return series_energy.to_numpy(dtype=float) / num_atoms


def _reconstruct_triangles_from_lines(points: np.ndarray, df_lines: pd.DataFrame, tol: float = 1e-6) -> list[list[int]]:
    """
    用线段端点在 stable 点集中做近邻匹配，然后从邻接关系恢复三角形 (i,j,k)。
    """
    if len(points) < 3 or df_lines.empty:
        return []

    tree = cKDTree(points)
    adjacency = [set() for _ in range(len(points))]
    tol_dist = tol

    # 近邻查询：每个端点找最近的 stable 点
    p1 = df_lines[["x1", "y1"]].to_numpy(float)
    p2 = df_lines[["x2", "y2"]].to_numpy(float)

    d1, idx1 = tree.query(p1, k=1)
    d2, idx2 = tree.query(p2, k=1)

    # 建图：只接受匹配距离 < tol 的端点
    for a, b, da, db in zip(idx1, idx2, d1, d2):
        if da <= tol_dist and db <= tol_dist and a != b:
            adjacency[a].add(b)
            adjacency[b].add(a)

    # 从邻接表恢复三角形：i-j-k，且 i<j<k 去重
    triangles: list[list[int]] = []
    for i in range(len(points)):
        ni = adjacency[i]
        for j in (x for x in ni if x > i):
            common = ni.intersection(adjacency[j])
            for k in (x for x in common if x > j):
                triangles.append([i, j, k])

    return triangles


def _plot_highlights(ax, df: pd.DataFrame, specs: Iterable[HighlightSpec], zorder: int = 100):
    texts = []
    formulas = set(df["reduced_formula"].values) if not df.empty else set()

    for spec in specs:
        if spec.formula not in formulas:
            continue
        sub = df[df["reduced_formula"] == spec.formula]
        x = sub["x"].iloc[0]
        y = sub["y"].iloc[0]

        ax.scatter([x], [y], marker=spec.marker, color=spec.color, s=spec.size, zorder=zorder)

        # 可选文字
        tc = spec.text_color if spec.text_color is not None else spec.color
        t = ax.text(
            x,
            y,
            to_latex(spec.formula),
            fontsize=10,
            ha="center",
            va="center",
            zorder=zorder + 1,
            weight="regular",
            color=tc,
        )
        texts.append(t)

    return texts


def plot_phase_diagram(
    ax, third_element: str, cmap: str = "viridis", vmin: float | None = None, vmax: float | None = None
) -> int:
    """
    绘制相图并返回三角形数量（重构成功用重构的三角形，否则用默认 Delaunay）。
    """
    save_root = Path(f"计算凸包结果/Al-Cu-{third_element}")
    entries_path = save_root / f"entries_plot.csv"
    lines_path = save_root / f"lines.csv"

    if not entries_path.exists() or not lines_path.exists():
        raise FileNotFoundError(f"Missing input CSV: {entries_path} or {lines_path}")

    df_entries_plot = pd.read_csv(entries_path)
    df_lines = pd.read_csv(lines_path)

    df_stable = df_entries_plot[df_entries_plot["stable"] == True].copy()
    df_unstable = df_entries_plot[df_entries_plot["stable"] == False].copy()

    triangle_count = 0

    # ---------- 填充凸包区域（stable 点 >=3 才能三角剖分） ----------
    if len(df_stable) >= 3:
        points = df_stable[["x", "y"]].to_numpy(float)
        epa = _energy_per_atom(df_stable["reduced_formula"], df_stable["energy"])

        triangles = _reconstruct_triangles_from_lines(points, df_lines, tol=1e-6)
        if triangles:
            tri = Triangulation(points[:, 0], points[:, 1], np.asarray(triangles, dtype=int))
            triangle_count = len(triangles)
        else:
            tri = Triangulation(points[:, 0], points[:, 1])  # fallback: matplotlib 内部 Delaunay
            triangle_count = len(tri.triangles)

        contour = ax.tricontourf(
            tri, epa, levels=50, cmap=cmap, alpha=0.7, zorder=1, extend="both", vmin=vmin, vmax=vmax
        )
        cbar = plt.colorbar(contour, ax=ax, shrink=0.8)
        cbar.set_label("Formation Energy (eV/atom)", fontsize=10)
        cbar.ax.tick_params(labelsize=8)
        half_plot_set(cbar.ax)

    # ---------- 边界线 ----------
    if not df_lines.empty:
        ax.plot(
            np.c_[df_lines["x1"], df_lines["x2"]].T,
            np.c_[df_lines["y1"], df_lines["y2"]].T,
            c="#555555",
            linewidth=0.5,
            zorder=9,
            alpha=0.9,
        )

    # ---------- 散点 ----------
    if not df_stable.empty:
        ax.scatter(df_stable["x"], df_stable["y"], marker="o", color="green", s=30, label="Stable", zorder=30)

    if not df_unstable.empty:
        ax.scatter(df_unstable["x"], df_unstable["y"], marker="d", c="lightgray", s=10, label="Unstable", zorder=35)

    # ---------- 高亮相（统一处理，避免重复代码） ----------
    highlights_stable = [
        HighlightSpec(formula=f"Al8Cu4{third_element}", color="red", text_color="red"),
        HighlightSpec(formula=f"Al8{third_element}Cu4", color="red", text_color="red"),
        HighlightSpec(formula=f"{third_element}(Al2Cu)4", color="red", text_color="red"),
    ]
    highlights_unstable = [
        HighlightSpec(formula=f"Al8Cu4{third_element}", color="blue", text_color="blue"),
        HighlightSpec(formula=f"Al8{third_element}Cu4", color="blue", text_color="blue"),
        HighlightSpec(formula=f"{third_element}(Al2Cu)4", color="blue", text_color="blue"),
    ]

    texts = []
    texts += _plot_highlights(ax, df_stable, highlights_stable, zorder=100)
    texts += _plot_highlights(ax, df_unstable, highlights_unstable, zorder=100)

    # ---------- stable 标签（排除已高亮者） ----------
    skip = {h.formula for h in highlights_stable}  # 高亮的 stable 不再重复标注
    for _, row in df_stable.iterrows():
        formula = row["reduced_formula"]
        if formula in skip:
            continue
        if formula == f"{third_element}(Al2Cu)4":
            formula = "Al8Cu4" + formula[:2]
        texts.append(
            ax.text(
                row["x"],
                row["y"],
                to_latex(formula),
                fontsize=10,
                ha="center",
                va="center",
                zorder=100,
                weight="regular",
                color="black",
            )
        )

    if texts:
        adjust_text(texts, ax=ax)

    # ---------- 轴样式 ----------
    ax.set_xticks([])
    ax.set_yticks([])
    for side in ("top", "right", "bottom", "left"):
        ax.spines[side].set_visible(False)
    ax.set_xlim(-0.05, 1.05)
    ax.set_ylim(-0.05, 0.95)

    return triangle_count

In [None]:
from tqdm import tqdm
import os
import numpy as np

# 使用封装的函数
process_i=0
third_elements="Ag Au Ba Be Bi Ca Cd Ce Co Cr Dy Er Eu Fe Ga Ge Hf Ho In Ir K La Li Lu Mg Mn Mo Na Nb Nd Ni Os Pb Pd Pr Pt Rb Re Rh Ru Sb Sc Sm Sn Sr Ta Tb Tc Te Ti Tl Tm V W Y Zn Zr".split()
process_bar = tqdm(total=len(third_elements), desc="计算凸包进度")
for third_element in third_elements:
    process_bar.set_description(f"计算凸包进度: {third_element}")
    filtered_df = df_E_f[df_E_f["formula_pretty"].apply(lambda x: 由这些元素组成(Composition(x), ["Al", "Cu", third_element]))]
    process_i+=1
    save_root = f"计算凸包结果/Al-Cu-{third_element}"
    
    fig, ax = plt.subplots(figsize=(cm(15), cm(10)))
    
    # 调用封装的函数
    triangle_count = plot_phase_diagram(ax, third_element, cmap='viridis')
    
    # print(f"{third_element}-{temp}K: 识别到 {triangle_count} 个三角形")
    
    process_bar.update(1)
    # fig.savefig(f"{save_root}/Al-Cu-{third_element}-{temp}K.webp", 
    #            bbox_inches="tight", dpi=500)
    fig.savefig(f"{save_root}/Al-Cu-{third_element}.svg", 
                bbox_inches="tight", dpi=500,transparent=True)
    fig.savefig(f"{save_root}/Al-Cu-{third_element}.jpg", 
                bbox_inches="tight", dpi=1200)
    plt.close(fig)
process_bar.close()
fig

In [None]:
fig,axs = plt.subplots(2,2, figsize=(cm(30),cm(20)),dpi=300)
third_elements = ["Ca", "Sc", "Sr", "Y"]
# plt.tight_layout()
abcd = ['(a)', '(b)', '(c)', '(d)']

for i,third_element in enumerate(third_elements):
    ax=axs[i//2,i%2]
    plot_phase_diagram(ax, third_element, cmap='viridis', )
    ax.text(0.02, 0.95, abcd[i], transform=ax.transAxes, fontsize=16, verticalalignment='top')
    # break
    # ax.set_title(f"Al-Cu-{third_element} (300K)", fontsize=14)
plt.tight_layout()
# adjust_text(texts_out)

In [None]:
fig.savefig(f"计算凸包结果/Al-Cu-第三元素.svg", bbox_inches="tight", dpi=1200)
fig.savefig(f"计算凸包结果/Al-Cu-第三元素.webp", bbox_inches="tight", dpi=500)
fig.savefig(f"计算凸包结果/Al-Cu-第三元素.jpg", bbox_inches="tight", dpi=500)
fig.savefig(f"计算凸包结果/Al-Cu-第三元素.pdf", bbox_inches="tight", dpi=500)
fig.savefig(f"计算凸包结果/Al-Cu-第三元素.png", bbox_inches="tight", dpi=500)