In [1]:
import json
with open('../temp_data/combined_temp_data.json') as f:
    temp_data = json.load(f)

In [2]:
print(temp_data[0])

{'material composition': 'Aluminum', 'formula': 'Al', 'crystal system': 'cubic', 'temperature': 300, 'pressure': 1, 'elastic constant': [1.0824, 0.6216, 0.2841], 'elastic tensor voigt notation': [[108.24, 62.16, 62.16, 0.0, 0.0, 0.0], [62.16, 108.24, 62.16, 0.0, 0.0, 0.0], [62.16, 62.16, 108.24, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 28.41, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 28.41, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 28.41]]}


In [3]:
from pymatgen.core.periodic_table import Element
import math
# 获取所有元素的电负性
electronegativities = {element.symbol: element.X for element in Element}

# 获取所有元素的离化能
ionization_energy = {element.symbol: round(element.ionization_energy, 3) if element.ionization_energy is not None and not math.isnan(element.ionization_energy) else None for element in Element}

# 获取所有元素的modulus
bulk_modulus = {element.symbol: element.bulk_modulus for element in Element}
youngs_modulus = {element.symbol: element.youngs_modulus for element in Element}
poissons_ratio = {element.symbol: element.poissons_ratio for element in Element}

# 获取所有元素的原子半径
atomic_radius = {element.symbol: element.atomic_radius_calculated for element in Element}



In [4]:
def append_property_info(element, property_value, property_name, unit=""):
    if property_value[element]:
        return f"{property_name} of {property_value[element]}{unit}, "
    return ""

from pymatgen.core.composition import Composition
def get_element_info(composition: str) -> str:
    elem_info = ""
    composition = Composition(composition)
    for element in composition.elements:
        element = element.symbol
        elem_info += f"{element} has "
        elem_info += append_property_info(element, electronegativities, "an electronegativity")
        elem_info += append_property_info(element, ionization_energy, "an ionization energy", " eV")
        elem_info += append_property_info(element, bulk_modulus, "a bulk modulus", "")
        elem_info += append_property_info(element, youngs_modulus, "a Young's modulus", "")
        elem_info += append_property_info(element, poissons_ratio, "a Poisson's ratio")
        elem_info += append_property_info(element, atomic_radius, "an atomic radius", " Å")
        if elem_info.endswith(", "):
            elem_info = elem_info[:-2] + ". "
        if elem_info == f"{element} has ":
            elem_info = ""
    return elem_info

In [5]:
for i in range(len(temp_data)):
    if temp_data[i]['pressure'] != 1 or temp_data[i]['temperature'] == 0:
        continue
    formula = temp_data[i]['formula']
    composition = Composition(temp_data[i]['formula'])
    reduced_dict = composition.to_reduced_dict
    sum_value = sum(reduced_dict.values())
    ratio_dict = {element: round((amount / sum_value)*100, 2) for element, amount in reduced_dict.items()}
    if temp_data[i]['crystal system'] == 'trigonal 1':
        cs = 'trigonal'
    elif temp_data[i]['crystal system'] == 'trigonal 2':
        cs = 'trigonal'
    elif temp_data[i]['crystal system'] == 'tetragonal 1':
        cs = 'tetragonal'
    elif temp_data[i]['crystal system'] == 'tetragonal 2':
        cs = 'tetragonal'
    else:
        cs = temp_data[i]['crystal system']
    intro = f"The material is {composition.reduced_formula} with crystal system {cs} and composition ratio {ratio_dict} (total is 100%). "
    if len(ratio_dict) == 1:
        interval = "The information about the material is as follows. "
    else:
        interval = "The information about the elements contained in the material is as follows. "
    elem_info = get_element_info(formula)
    if elem_info != "":
        elem_info = interval + elem_info
    temp_data[i]['description'] = intro + elem_info

In [6]:
def build_alpaca_dataset(datapoint):
    dic = dict()
    dic['instruction'] = f"Given a material description, predict its elastic tensor at {datapoint['temperature']}K temperature accurately and directly using scientific logic. Provide the answer as a 6x6 Python matrix without additional comments, descriptions, or explanations. "
    dic['input'] = f"{datapoint['description']}"
    dic['output'] = f"{datapoint['elastic tensor voigt notation']}"
    return dic

In [7]:
temp_formula_data = [build_alpaca_dataset(datapoint) for datapoint in temp_data if 'description' in datapoint]

In [8]:
print(len(temp_formula_data))
with open('temp_formula_data.json', 'w') as f:
    f.write(
        '[' +
        ',\n'.join(json.dumps(i) for i in temp_formula_data) +
        ']\n')

1266


### 构建预测体模量，剪切模量数据

In [9]:
import json
with open('../temp_data/combined_temp_data.json') as f:
    temp_data = json.load(f)

In [11]:
from pymatgen.core.periodic_table import Element
import math
# 获取所有元素的电负性
electronegativities = {element.symbol: element.X for element in Element}

# 获取所有元素的离化能
ionization_energy = {element.symbol: round(element.ionization_energy, 3) if element.ionization_energy is not None and not math.isnan(element.ionization_energy) else None for element in Element}

# 获取所有元素的modulus
bulk_modulus = {element.symbol: element.bulk_modulus for element in Element}
youngs_modulus = {element.symbol: element.youngs_modulus for element in Element}
poissons_ratio = {element.symbol: element.poissons_ratio for element in Element}

# 获取所有元素的原子半径
atomic_radius = {element.symbol: element.atomic_radius_calculated for element in Element}

In [12]:
def append_property_info(element, property_value, property_name, unit=""):
    if property_value[element]:
        return f"{property_name} of {property_value[element]}{unit}, "
    return ""

from pymatgen.core.composition import Composition
def get_element_info(composition: str) -> str:
    elem_info = ""
    composition = Composition(composition)
    for element in composition.elements:
        element = element.symbol
        elem_info += f"{element} has "
        elem_info += append_property_info(element, electronegativities, "an electronegativity")
        elem_info += append_property_info(element, ionization_energy, "an ionization energy", " eV")
        elem_info += append_property_info(element, bulk_modulus, "a bulk modulus", "")
        elem_info += append_property_info(element, youngs_modulus, "a Young's modulus", "")
        elem_info += append_property_info(element, poissons_ratio, "a Poisson's ratio")
        elem_info += append_property_info(element, atomic_radius, "an atomic radius", " Å")
        if elem_info.endswith(", "):
            elem_info = elem_info[:-2] + ". "
        if elem_info == f"{element} has ":
            elem_info = ""
    return elem_info

In [13]:
for i in range(len(temp_data)):
    if temp_data[i]['pressure'] != 1 or temp_data[i]['temperature'] == 0:
        continue
    formula = temp_data[i]['formula']
    composition = Composition(temp_data[i]['formula'])
    reduced_dict = composition.to_reduced_dict
    sum_value = sum(reduced_dict.values())
    ratio_dict = {element: round((amount / sum_value)*100, 2) for element, amount in reduced_dict.items()}
    if temp_data[i]['crystal system'] == 'trigonal 1':
        cs = 'trigonal'
    elif temp_data[i]['crystal system'] == 'trigonal 2':
        cs = 'trigonal'
    elif temp_data[i]['crystal system'] == 'tetragonal 1':
        cs = 'tetragonal'
    elif temp_data[i]['crystal system'] == 'tetragonal 2':
        cs = 'tetragonal'
    else:
        cs = temp_data[i]['crystal system']
    intro = f"The material is {composition.reduced_formula} with crystal system {cs} and composition ratio {ratio_dict} (total is 100%). "
    if len(ratio_dict) == 1:
        interval = "The information about the material is as follows. "
    else:
        interval = "The information about the elements contained in the material is as follows. "
    elem_info = get_element_info(formula)
    if elem_info != "":
        elem_info = interval + elem_info
    temp_data[i]['description'] = intro + elem_info

In [14]:
from pymatgen.analysis.elasticity.elastic import ElasticTensor
def build_alpaca_dataset(datapoint, label):
    dic = dict()
    dic['input'] = f"{datapoint['description']}"
    if label == "b":
        dic['instruction'] = f"Given a material description, predict its Voigt bulk modulus (unit: GPa) at {datapoint['temperature']}K temperature accurately and directly using scientific logic. Provide the answer in the form of individual numerical value without additional comments, descriptions, or explanations. "
        dic['output'] = f"{round(ElasticTensor.from_voigt(datapoint['elastic tensor voigt notation']).k_voigt, 2)}"
    return dic

In [15]:
b = [build_alpaca_dataset(datapoint, 'b') for datapoint in temp_data if 'description' in datapoint]
w = b

In [16]:
print(len(w))
with open('temp_formula_data_moduli.json', 'w') as f:
    f.write(
        '[' +
        ',\n'.join(json.dumps(i) for i in w) +
        ']\n')

1266
