In [1]:
# a good article about tensile strength (true, engineering) https://yasincapar.com/engineering-stress-strain-vs-true-stress-strain/
from typing import Optional, Literal

import dspy
from pydantic import BaseModel, Field

  from .autonotebook import tqdm as notebook_tqdm


In [25]:
lm = dspy.LM('openai/gpt-4o-mini', max_tokens=2000, cache=False)
dspy.configure(lm=lm)

In [3]:
# constants
FABRICATION_METHODS = Literal['induction melting', 'arc melting', 'mechanical alloying', 'powder metallurgy', 'additive manufacturing', 'gas atomization', 'spark plasma sintering', 'other']
POST_PROCESSINGS = Literal['annealed', 'homogenized', 'aged', 'quenched', 'cold-rolled', 'hot-rolled', 'high-pressure torsion', 'cryogenic', 'other']
MICRO_PHASES = Literal['FCC', 'BCC', 'B2', 'L12', 'HCP', 'Laves', 'IM', 'other']
PROPERTIES = Literal['YS', 'UTS', 'Elongation']
NOT_GIVEN = Literal['NOT GIVEN']
TYPE = Literal['tensile', 'compression']

In [38]:
# define the data schema for the data we want to extract
class Fabrication(BaseModel):
    method: str = Field(description='The method used to fabricate the material')
    
class PostProcessing(BaseModel):
    method: str = Field(description='The method used to post-process the material')
    condition: Optional[dict] = Field(description='The condition of the post-processing method, e.g. temperature: 900 Â°C or duration: 4 h, etc.')

class Property(BaseModel):
    property: str = Field(description='The mechanical property of the alloy, choose from YS, UTS, Elongation')
    value: float
    unit: str

class TestParameter(BaseModel):
    temperature: str
    strain_rate: str

class AlloyRecord(BaseModel):
    composition: str = Field(description='The nominal composition of the alloy')
    property_ys: Optional[float] = Field(description='the value of yield strength, convert to MPa if the unit is not MPa, e.g. 1GPa -> 1000 MPa')
    property_uts: Optional[float] = Field(description='the value of ultimate tensile strength, convert to MPa if the unit is not MPa, e.g. 1GPa -> 1000 MPa')
    property_elongation: Optional[float] = Field(description='the value of elongation, convert to percentage if the unit is not percentage, e.g. 1%')
    fabrication: str = Field(description='The fabrication method of the alloy, choose from induction melting, arc melting, mechanical alloying, powder metallurgy, additive manufacturing, gas atomization, spark plasma sintering, other')
    post_processings: str  = Field(description='The sequential post-processing steps of the alloy separated by vertical bar "|", be briefly, eg., annealed at 900 Â°C for 4 h | homogenized at 1200 Â°C for 2 h')

    test_type: TYPE
    test_temperature: Optional[str] = Field(description='The temperature at which the mechanical properties were tested, e.g. 25 Â°C')
    test_strain_rate: Optional[str] = Field(description='The strain rate at which the mechanical properties were tested, e.g. 0.001/s')

In [39]:
AlloyRecord.model_json_schema()

{'properties': {'composition': {'description': 'The nominal composition of the alloy',
   'title': 'Composition',
   'type': 'string'},
  'property_ys': {'anyOf': [{'type': 'number'}, {'type': 'null'}],
   'description': 'the value of yield strength, convert to MPa if the unit is not MPa, e.g. 1GPa -> 1000 MPa',
   'title': 'Property Ys'},
  'property_uts': {'anyOf': [{'type': 'number'}, {'type': 'null'}],
   'description': 'the value of ultimate tensile strength, convert to MPa if the unit is not MPa, e.g. 1GPa -> 1000 MPa',
   'title': 'Property Uts'},
  'property_elongation': {'anyOf': [{'type': 'number'}, {'type': 'null'}],
   'description': 'the value of elongation, convert to percentage if the unit is not percentage, e.g. 1%',
   'title': 'Property Elongation'},
  'fabrication': {'description': 'The fabrication method of the alloy, choose from induction melting, arc melting, mechanical alloying, powder metallurgy, additive manufacturing, gas atomization, spark plasma sintering, o

In [40]:
class ExtractRecord(dspy.Signature):
    """extract all alloy yield strength from table along with the fabrication and post-processing methods sequentially"""
    text: str = dspy.InputField()
    record: list[AlloyRecord] = dspy.OutputField()

extractor = dspy.ChainOfThought(signature=ExtractRecord)

In [41]:
text = """Material Composition
Nominal Composition: V10Cr15Mn5Fe35Co10Ni25 (atomic percent).
Synthesis Methods
Fabrication: Vacuum induction melting
post-processings
Homogenization: 1100 Â°C for 6 hours under an Ar atmosphere.
Cooling: Water quenching.
Cold Rolling: Reduction ratio: â‰ˆ79% (thickness reduced from 6.2 mm to 1.3 mm).
Annealing:
Fine-Grained (FG): 900 Â°C for 10 minutes.
Coarse-Grained (CG): 1100 Â°C for 60 minutes.
High-Pressure Torsion (HPT):
Pressure: 6 GPa.
Rotation rate: 1 RPM.
Number of turns: 1/4, 1, 5.

test_strain_rate: 1 Ã— 10-3 s-1
test_temperature: room temperature
Material Properties
Mechanical Properties (Stress-Strain Data):
Yield Strength (YS) and Ultimate Tensile Strength (UTS) values extracted for FG and CG samples under different HPT conditions:
HPT Turns	Sample	YS (MPa)	UTS (MPa)	Elongation to Failure (%)
0	FG	430	720	48.1
0	CG	230	532	57.6
1/4	FG	1120	1447	15.9
1/4	CG	1270	1502	17.3
1	FG	1630	1813	12.9
1	CG	1660	1854	14.3
5	FG	1940	1986	6.0
5	CG	1950	2015	6.3"""

In [54]:
from langchain_core.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate

examples = [
    {"input": "2 ðŸ¦œ 2", "output": "4"},
    {"input": "2 ðŸ¦œ 3", "output": "5"},
]
examples = []
# This is a prompt template used to format each individual example.
example_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{input}"),
        ("ai", "{output}"),
    ]
)
few_shot_prompt = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples=examples,
)

print(few_shot_prompt.invoke({}).to_messages())

[]


In [56]:
final_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are an expert in HEAs domain, please give a brief discription of the alloy synthsis and post-processing while focusing on the method and condition of each step."),
        few_shot_prompt,
        ("human", "{input}"),
    ]
)
final_prompt.invoke({'input': '2 ðŸ¦œ 2'})

ChatPromptValue(messages=[SystemMessage(content='You are a wondrous wizard of math.', additional_kwargs={}, response_metadata={}), HumanMessage(content='2 ðŸ¦œ 2', additional_kwargs={}, response_metadata={})])