In [42]:
import json
import os
import numpy as np
from typing import Any, Dict, List

# 类型声明
Dataset = Dict[str, Any]

# 数据集错误类
class DatasetError(Exception):
    pass

# 递归映射函数，用于处理嵌套列表
def recursive_map(f, x):
    if isinstance(x, list):
        return [recursive_map(f, xx) for xx in x]
    else:
        return f(x)

# 检查数据集函数
def check_dataset(dataset: Dataset):
    components = dataset.get('components', [])
    phases = dataset.get('phases', [])
    
    # 检查 values 字段是否存在，如果不存在则设置为空列表
    values = dataset.get('values', [])

    # 如果是单相数据集，则可能没有 conditions 字段
    conditions = dataset.get('conditions', {})

    if 'solver' in dataset.keys():
        solver = dataset['solver']
        sublattice_configurations = solver['sublattice_configurations']
        sublattice_site_ratios = solver['sublattice_site_ratios']
        sublattice_occupancies = solver.get('sublattice_occupancies', None)

        if sublattice_occupancies is None and any(isinstance(subl, list) for config in sublattice_configurations for subl in config):
            raise DatasetError('混合的子晶格需要定义占位率！')

    if 'P' in conditions and 'T' in conditions:
        num_pressure = np.atleast_1d(conditions['P']).size
        num_temperature = np.atleast_1d(conditions['T']).size
        values_shape = np.array(values).shape if values else (0,)
        num_configs = len(dataset['solver']['sublattice_configurations'])
        conditions_shape = (num_pressure, num_temperature, num_configs)

        if conditions_shape != values_shape:
            raise DatasetError(f'条件和数值的形状不匹配：条件形状 {conditions_shape}，值形状 {values_shape}')
    else:
        # 如果没有压力和温度条件，不进行条件检查
        pass

# 清理数据集函数，确保数值类型正确
def clean_dataset(dataset: Dataset) -> Dataset:
    if 'conditions' in dataset:
        dataset["conditions"] = {k: recursive_map(float, v) for k, v in dataset["conditions"].items()}
    
    solver = dataset.get("solver")
    if solver is not None:
        solver["sublattice_site_ratios"] = recursive_map(float, solver["sublattice_site_ratios"])
        if solver.get("sublattice_occupancies") is not None:
            solver["sublattice_occupancies"] = recursive_map(float, solver["sublattice_occupancies"])

    dataset["values"] = recursive_map(float, dataset.get("values", []))  # 设置 values 的默认值为 []

    return dataset

# 加载并检查数据集
def load_and_check_dataset(file_path: str) -> Dataset:
    with open(file_path, 'r') as file:
        dataset = json.load(file)
    check_dataset(dataset)  # 检查数据集是否符合标准
    dataset = clean_dataset(dataset)  # 清理数据集
    return dataset

# 替换占位率，确保格式与 sublattice_configurations 一致
def replace_occupancies_with_values(sublattice_configurations, occupancy_values, sublattice_site_ratios):
    sublattice_occupancies = []
    value_index = 0

    for config in sublattice_configurations:
        current_occupancies = []
        for element in config:
            if isinstance(element, list):
                occ = occupancy_values[value_index:value_index + len(element)]
                current_occupancies.append(occ)
                value_index += len(element)
            else:
                if element == "VA":
                    current_occupancies.append(sublattice_site_ratios[len(current_occupancies)])
                else:
                    occ = occupancy_values[value_index]
                    current_occupancies.append(occ)
                    value_index += 1
        sublattice_occupancies.append(current_occupancies)

    return sublattice_occupancies

# 自定义格式化函数，确保 JSON 输出没有过多换行
def custom_json_formatter(data):
    formatted_json = "{\n"
    formatted_json += f'  "components": {json.dumps(data["components"])},\n'
    formatted_json += f'  "phases": {json.dumps(data["phases"])},\n'
    formatted_json += '  "solver": {\n'
    formatted_json += f'    "mode": {json.dumps(data["solver"]["mode"])},\n'
    formatted_json += f'    "sublattice_site_ratios": {json.dumps(data["solver"]["sublattice_site_ratios"])},\n'
    formatted_json += f'    "sublattice_occupancies": {json.dumps(data["solver"]["sublattice_occupancies"], separators=(",", ": "))},\n'
    formatted_json += f'    "sublattice_configurations": {json.dumps(data["solver"]["sublattice_configurations"], separators=(",", ": "))}\n'
    formatted_json += '  },\n'
    formatted_json += f'  "conditions": {json.dumps(data.get("conditions", {}), separators=(",", ": "))},\n'
    formatted_json += f'  "output": {json.dumps(data["output"])},\n'
    formatted_json += f'  "values": {json.dumps(data["values"], separators=(",", ": "))},\n'
    formatted_json += f'  "reference": {json.dumps(data["reference"])},\n'
    formatted_json += f'  "comment": {json.dumps(data["comment"])}\n'
    formatted_json += "}\n"
    return formatted_json

# 动态生成每个相的变量数据
def generate_variable_data(phase_name, sublattice_model=None, sublattice_site_ratios=None, occupancy_values_list=None, conditions=None, output="", values=None, reference="", comment=""):
    sublattice_configurations = sublattice_model

    sublattice_occupancies = []
    sublattice_configs = []
    for occupancy_values in occupancy_values_list:
        occupancies = replace_occupancies_with_values(sublattice_configurations, occupancy_values, sublattice_site_ratios)
        sublattice_occupancies.append(occupancies)
        sublattice_configs.append(sublattice_configurations)

    return {
        "sublattice_occupancies": sublattice_occupancies,
        "sublattice_configurations": sublattice_configs,
        "conditions": conditions,
        "output": output,
        "values": values,
        "reference": reference,
        "comment": comment
    }

# 生成 JSON 文件并创建相应的文件夹
def create_phase_files(input_data, output_dir, variable_data_func):
    os.makedirs(output_dir, exist_ok=True)

    components = input_data['components']
    for phase_name, phase_data in input_data['phases'].items():
        sublattice_site_ratios = phase_data['sublattice_site_ratios']
        sublattice_model = phase_data['sublattice_model']

        # 动态生成数据
        variable_data = variable_data_func(phase_name, sublattice_model, sublattice_site_ratios)

        # 创建文件夹
        phase_folder = os.path.join(output_dir, phase_name)
        os.makedirs(phase_folder, exist_ok=True)

        # 生成 JSON 数据
        new_json_data = {
            "components": components,
            "phases": [phase_name],
            "solver": {
                "mode": "manual",
                "sublattice_site_ratios": sublattice_site_ratios,
                "sublattice_occupancies": variable_data["sublattice_occupancies"],
                "sublattice_configurations": variable_data["sublattice_configurations"]
            },
            "conditions": variable_data.get("conditions", {}),
            "output": variable_data["output"],
            "values": variable_data["values"],
            "reference": variable_data["reference"],
            "comment": variable_data["comment"]
        }

        # 保存 JSON 文件到文件夹
        output_file_path = os.path.join(phase_folder, f'{phase_name}.json')
        with open(output_file_path, 'w') as json_file:
            json_file.write(custom_json_formatter(new_json_data))

        print(f'{phase_name} 相的文件已生成：{output_file_path}')

# 主函数，执行整个流程
def main():
    input_file_path = 'Ag-Cu-input.json'  # 读取输入文件
    output_directory = './output-json'  # 输出文件夹

    # 加载并检查输入数据
    input_data = load_and_check_dataset(input_file_path)

    # 动态生成数据的函数
    def variable_data_func(phase_name, sublattice_model, sublattice_site_ratios):
        if phase_name == "FCC_A1":
            occupancy_values_list = [
                [0.95, 0.05],
                [0.9, 0.1],
                [0.859, 0.141],
                [0.049, 0.951],
                [0.02, 0.98]
            ]
            conditions = {"P": 101325, "T": 1052}
            output = "HM_MIX"
            values = [[[1200, 2250, 3000, 1740, 760]]]
            reference = "Expt., Hultgren et al. (1052 K)"
            comment = "calculations Table 7"
        elif phase_name == "LIQUID":
            occupancy_values_list = [
                [0.95, 0.05],
                [0.02, 0.98]
            ]
            conditions = {"T": 1373, "P": 101325}
            output = "HM_MIX"
            values = [[[1500, 2500, 3200]]]
            reference = "Expt., Smith et al."
            comment = "LIQUID phase data"
        else:
            occupancy_values_list = []
            conditions = {}
            output = ""
            values = []
            reference = ""
            comment = ""

        return generate_variable_data(
            phase_name,
            sublattice_model=sublattice_model,
            sublattice_site_ratios=sublattice_site_ratios,
            occupancy_values_list=occupancy_values_list,
            conditions=conditions,
            output=output,
            values=values,
            reference=reference,
            comment=comment
        )

    # 生成 JSON 文件
    create_phase_files(input_data, output_directory, variable_data_func)

if __name__ == "__main__":
    main()


LIQUID 相的文件已生成：./output-json\LIQUID\LIQUID.json
FCC_A1 相的文件已生成：./output-json\FCC_A1\FCC_A1.json
