In [None]:
import re

In [None]:
# 正则表达式模式
pattern = r"\*\*\*\*\*\*\*\*\*\* BEGIN PRINT Important cfgspace INFO \*\*\*\*\*\*\*\*\*\*\n\n\s+ncsf \| symmetry \|  nstates \|  spintwo \|     ncfg \|   energy \n\s+(\d+) \| +(\d+) \| +(\d+) \| +(\d+) \| +(\d+) \| +(-?\d+\.\d+),"


In [None]:
import os

# 定义存储结果的字典
results = {}

# 遍历 data/gock_ene 目录下以 .out 结尾的文件
for filename in os.listdir("../data/gfock_ene"):
    if filename.endswith(".out") and filename.startswith("cr2_rdm1"):
        # 分割文件名，提取 bondlength 和 基组名称
        parts = filename.split("_")
        bondlength = int(parts[2]) / 100  # 恢复为原始 bondlength
        basis_set = parts[3].replace(".out", "")

        # 打开文件并读取内容
        with open(os.path.join("../data/gfock_ene", filename), "r") as file:
            content = file.read()
            
        # 使用正则表达式匹配内容
        matches = re.findall(pattern, content)
        # print(matches)
        if matches and len(matches) > 1:
            # 只保留第二个匹配结果
            second_match = matches[1]
            results[(bondlength, basis_set)] = {
                "ncsf": int(second_match[0]),
                "symmetry": int(second_match[1]),
                "nstates": int(second_match[2]),
                "spintwo": int(second_match[3]),
                "ncfg": int(second_match[4]),
                "energy": float(second_match[5]),
            }

# 打印结果字典
print(results)

In [None]:
import matplotlib.pyplot as plt

# 按照 basis_set 分组
grouped_results = {}
for (bondlength, basis), data in results.items():
    if basis not in grouped_results:
        grouped_results[basis] = {"bondlengths": [], "energies": []}
    grouped_results[basis]["bondlengths"].append(bondlength)
    grouped_results[basis]["energies"].append(data["energy"])

    # 按照键长从小到大排序
    for basis, data in grouped_results.items():
        sorted_indices = sorted(range(len(data["bondlengths"])), key=lambda i: data["bondlengths"][i])
        data["bondlengths"] = [data["bondlengths"][i] for i in sorted_indices]
        data["energies"] = [data["energies"][i] for i in sorted_indices]

# 绘制图形
plt.figure(figsize=(10, 6))
for basis, data in grouped_results.items():
    plt.plot(data["bondlengths"], data["energies"], marker='o', label=basis)

plt.xlabel("Bondlength (Å)")
plt.ylabel("Energy (Hartree)")
plt.title("Energy vs Bondlength for Different Basis Sets")
plt.legend()
plt.grid(True)
plt.show()

# the result is correct ! 