In [20]:
import os
from langchain.llms import AzureOpenAI
import yaml
import pandas as pd
import json
import re
from langchain.prompts.prompt import PromptTemplate
from langchain.prompts.few_shot import FewShotPromptTemplate
from prospectordb import ProspectorDB
from langchain.schema.runnable import RunnableLambda, RunnableBranch

In [21]:
os.environ["OPENAI_API_KEY"] = "xxx"
os.environ["OPENAI_API_TYPE"] = "azure"
os.environ["OPENAI_API_BASE"] = "xxx"

In [22]:
os.environ["OPENAI_API_VERSION"] = "2022-12-01"
# Create an instance of Azure OpenAI
llm = AzureOpenAI(
    deployment_name = "xxx",
    max_tokens = 500,
    temperature = 0,
)
# Run the LLM
llm("How is the weather today?")

'\n\nI am an AI and do not have the ability to experience weather. Can I assist you with anything else?'

In [23]:
all_property_table = "all_properties_v2.csv"
all_pro_info_df = pd.read_csv(all_property_table, encoding="utf-8")

In [24]:
def label_convert(label):
    return label.upper().replace(" ", "_")

In [25]:
all_pro_info_df["Is table property"]

0      0.0
1      0.0
2      0.0
3      0.0
4      0.0
      ... 
255    1.0
256    1.0
257    1.0
258    1.0
259    1.0
Name: Is table property, Length: 260, dtype: float64

In [26]:
label_convert("Notched Izod Impact")

'NOTCHED_IZOD_IMPACT'

In [27]:
ProspectorDB.getUnitOf("Notched Izod Impact")

'J/m'

In [28]:
ProspectorDB.getUnitOf("Flexural Strength at Yield")

'MPa'

In [29]:
def strip_parser(any_string):
    return any_string.strip()

In [30]:
re_middle_lb = re.compile(r"(.*\n)(.*)")

In [31]:
def partAfterLb(a_string):
    if re_middle_lb.match(a_string):
        return re_middle_lb.match('Rate\nppm/°C').group(2)
    else:
        return a_string

In [32]:
def get_units_output_parser(any_string):
    any_string = strip_parser(any_string)
    items = any_string.split(", ")
    items = map(lambda unit: "%" if "/" in unit and unit.split("/")[0] == unit.split("/")[1] else unit, items)
    items = map(partAfterLb, items)
    return ", ".join(items)
print(get_units_output_parser('in/in, mm/mm, m/m'))
print(get_units_output_parser('kg/m³, kg/m3'))
print(get_units_output_parser('Rate\nppm/°C, µm/m°C, in/in°F, cm³/s'))

%, %, %
kg/m³, kg/m3
ppm/°C, µm/m°C, in/in°F, cm³/s


In [33]:
# get other forms of a unit
unit_forms_examples = [
    {"question": "Generate several forms for the unit of MPa",
    "answer": "mpa"},
    {"question": "Generate several forms for the unit of J/m",
    "answer": "j/m"},
    {"question": "Generate several forms for the unit of %",
    "answer": "%"},
    {"question": "Generate several forms for the unit of J/kg/°C",
    "answer": "j/kg/°c"},
    {"question": "Generate several forms for the unit of g/cm³",
    "answer": "g/cm³, g/cm3"},
    {"question": "Generate several forms for the unit of kg/m³",
    "answer": "kg/m³, kg/m3"},
]

unit_form_example_prompt = PromptTemplate(input_variables=["question", "answer"], template="Question: {question}\n{answer}")

unit_form_prompt = FewShotPromptTemplate(
    examples=unit_forms_examples,
    example_prompt=unit_form_example_prompt,
    suffix="Question: Generate several forms for the unit of {input}",
    input_variables=["input"]
)

_unit_form_chain = unit_form_prompt | llm | RunnableLambda(strip_parser)

unit_form_chain = RunnableBranch(
    (lambda x: x["input"] == "%", RunnableLambda(lambda _: "%")),
    _unit_form_chain,
)

unit_form_chain.invoke({"input": "%"})

'%'

In [34]:
# get other forms of a unit
get_units_examples = [
    {"question": "Give me several units for the property of Frequency",
    "answer": "hertz, kilohertz, megahertz, gigahertz"},
    {"question": "Give me several units for the property of Power",
    "answer": "watt, kilowatt, milliwatt, btu, hp"},
    {"question": "Give me several units for the property of Density",
    "answer": "g/cm³, kg/m³, g/l, lb/gal"},
    {"question": "Give me several units for the property of Flexural Strength",
    "answer": "atm, kPa, MPa, msi"},
    {"question": "Give me several units for the property of Notched Izod Impact",
    "answer": "J/m, J/cm"},
    {"question": "Give me several units for the property of Tensile Elongation at Yield",
    "answer": "%"},
]

get_units_example_prompt = PromptTemplate(input_variables=["question", "answer"], template="Question: {question}\n{answer}")

get_units_prompt = FewShotPromptTemplate(
    examples=get_units_examples,
    example_prompt=get_units_example_prompt,
    suffix="Question: Give me several units for the property of {input}",
    input_variables=["input"]
)

get_units_chain = get_units_prompt | llm | RunnableLambda(get_units_output_parser)

get_units_chain.invoke({"input": "CLTE, Transverse"})

'µm/m°C, in/in°F, ppm/°C'

In [35]:
ProspectorDB.getUnitOf("Water Vapor Transmission")

'g/m²/24 hr'

In [36]:
get_units_chain.invoke({"input": "DTUL Unannealed"})

'°C, °F, K'

In [37]:
unit_form_chain.invoke({"input": "°F"}).split(", ")

['°F', 'deg F', 'degrees Fahrenheit']

In [38]:
# get parsable properties
parsable_properties = all_pro_info_df[all_pro_info_df["Is table property"] == 1]["Name from prospector database"].values

In [39]:
# test
# parsable_properties = ["Tensile Elongation at Yield", "CLTE, Flow", "DTUL Unannealed"]
# parsable_properties = ["DTUL Unannealed"]

In [None]:
generated_property_config = {}
for property_name in parsable_properties:
    property_dict = {}
    additional_units = get_units_chain.invoke({"input": property_name}).split(", ")
    db_unit = ProspectorDB.getUnitOf(property_name)
    property_dict["implicit_unit"] = db_unit if db_unit else additional_units[0] if additional_units else None
    property_dict["value_types"] = ["single_value", "range"]
    property_dict["unbound_range_support"] = {
        "and_under_regex": "<|less than",
        "and_over_regex": ">|more than",
        "lower_limit": 0,
        "upper_limit": "infinity"}
    units = set()
    units.add(property_dict["implicit_unit"])
    units.update(additional_units)
    units = list(units)
    units_list = []
    for unit in units:
        if unit:
            unit_dict = {}
            unit_dict["norm"] = unit
            unit_forms = set()
            unit_forms.add(unit.lower())
            other_unit_forms = unit_form_chain.invoke({"input": unit}).split(", ")
            unit_forms.update(filter(lambda x: x.lower(), other_unit_forms))
            unit_dict["forms"] = list(unit_forms)
            unit_dict["allow_fractions"] = True
            units_list.append(unit_dict)
        property_dict["units"] = units_list
    generated_property_config[label_convert(property_name)] = property_dict


print(yaml.safe_dump(generated_property_config, encoding="utf-8"))


In [None]:
generated_property_config

In [None]:
# load dc vp config
with open("dc_vp_config.yaml", "r", encoding="utf-8") as f:
    final_config = yaml.safe_load(f)

In [None]:
# write the result into file

final_config["label_profiles"].update(generated_property_config)
with open("property_value_parser_config.yml", "wt", encoding="utf-8") as f:
    yaml.safe_dump(final_config, f, default_flow_style=False)