In [32]:
from concurrent.futures import ThreadPoolExecutor, as_completed
from threading import Lock
from functools import partial

from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from tqdm import tqdm

from sisyphus.utils.helper_functions import get_plain_articledb

In [33]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
vector_store = Chroma(
    collection_name="example_collection",
    embedding_function=embeddings,
)
article_db = get_plain_articledb('300_heas')

In [34]:
def embed_one(article):
    docs = article_db.get(article)
    with ThreadPoolExecutor(max_workers=20) as worker:
        futures = [worker.submit(vector_store.add_documents, [doc]) for doc in docs]
        for future in futures:
            future.result()

In [35]:
import os

articles = os.listdir('articles_processed')[:10]
articles

['10.1016&sol;j.jallcom.2022.168515.html',
 '10.1016&sol;j.actamat.2024.120498.html',
 '10.1016&sol;j.matlet.2021.130646.html',
 '10.1016&sol;j.jmst.2023.08.054.html',
 '10.1016&sol;j.mtcomm.2024.108835.html',
 '10.1016&sol;j.matchar.2023.113059.html',
 '10.1016&sol;j.msea.2024.147366.html',
 '10.1016&sol;s1003-6326(23)66223-5.html',
 '10.1016&sol;j.msea.2023.144725.html',
 '10.1016&sol;j.msea.2022.143712.html']

In [5]:
with ThreadPoolExecutor(max_workers=10) as worker:
    list(worker.map(embed_one, articles))

In [6]:
import re

K = 3
QUERY_SYN = """Experimental procedures describing the synthesis and processing of materials, including methods such as melting, casting, rolling, annealing, heat treatment, or other fabrication techniques. Details often include specific temperatures (e.g., °C), durations (e.g., hours, minutes), atmospheric conditions (e.g., argon, vacuum), mechanical deformation (e.g., rolling reduction), and microstructural characterization steps. Mentions of material compositions, purity levels, and equipment used are common indicators."""
QUERY_MECHANICAL = """Mechanical properties of high entropy alloys, stress-strain curves, yield strength, ultimate tensile strength, tensile strain, elongation, alloy composition, alloying effects on strength, ductility, engineering stress-strain behavior."""
QUERY_PHASE = """Phase characterization of high entropy alloys, microstructure analysis, crystal structures, phase transitions, XRD patterns, lattice parameters, grain morphology, recrystallization, secondary phases, alloying effects on phases, defect structures, and phase stability."""
def match_subtitles(docs, pattern):
    sub_titles = list(set([doc.metadata["sub_titles"] for doc in docs]))
    target_titles = []
    for title in sub_titles:
        if pattern.search(title):
            target_titles.append(title)
    return target_titles

syn_pattern = re.compile(r'(experiment)|(preparation)|(method)', re.I)
res_pattern = re.compile(r'result', re.I)
test_pattern = re.compile(r'strain\srate', re.I)
def retrieve(vector_store, article, query, sub_titles):
    if sub_titles is None:
        filter_ = {"source": article}
    else:
        filter_ = {"$and":[{
            "sub_titles": {
                "$in": sub_titles
            }},
            {"source": article
        }]}
    return vector_store.similarity_search(
        query,
        k=K,
        filter=filter_
    )

In [7]:
import dspy
lm = dspy.LM('openai/gpt-4o-mini', cache=False)
dspy.configure(lm=lm)

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
from typing import Literal
class ClassifySyn(dspy.Signature):
    """assign topic to paragraphs of HEAs(high entropy alloys) papers. The topics include synthesis, characterization, and others.
    Note: a qualified synthesis paragraph should include the synthesis and processing of materials, including methods such as melting, casting, rolling, annealing, heat treatment.be very strict about your decision."""
    paragraph: str = dspy.InputField()
    topic: Literal['synthesis', 'characterization', 'others'] = dspy.OutputField()

classfier_syn = dspy.ChainOfThought(signature=ClassifySyn)

In [9]:
def get_syn_para(article):
    docs = article_db.get(article)
    sub_titles = match_subtitles(docs, syn_pattern) or None
    candidates = retrieve(vector_store, article, QUERY_SYN, sub_titles)
    final = []
    for candidate in candidates:
        if classfier_syn(paragraph=candidate.page_content).topic == 'synthesis':
            final.append(candidate)
    return final

In [10]:
def get_target_para(article, query, pattern, classifier, class_):
    docs = article_db.get(article)
    sub_titles = match_subtitles(docs, pattern) or None
    candidates = retrieve(vector_store, article, query, sub_titles)
    final = []
    with ThreadPoolExecutor(5) as worker:
        futures = [worker.submit(classifier, paragraph=candidate.page_content) for candidate in candidates]
        future_doc = {future: candidate for future, candidate in zip(futures, candidates)}
        for future in as_completed(futures):
            if future.result().topic == class_:
                final.append(future_doc[future])
    return final

In [15]:
class ClassifyMech(dspy.Signature):
    """assign topic to paragraphs of HEAs(high entropy alloys) papers. The topics include tensile/compressive with value, tensile/compressive without value, characterization or others.
    Note: a qualified tensile/compressive with value paragraph should explicitly mention at least one value related to yield strength, ultimate yield strength, elongation, or compressive strain, otherwise it should be classified as tensile/compressive without value."""
    paragraph: str = dspy.InputField()
    topic: Literal['tensile/compressive with value', 'tensile/compressive without value', 'characterization', 'others'] = dspy.OutputField()
classifier_mech = dspy.ChainOfThought(signature=ClassifyMech)

In [19]:
res = get_target_para(articles[3], QUERY_MECHANICAL, res_pattern, classifier_mech, 'tensile/compressive with value')

In [20]:
res

[Document(metadata={'doi': '10.1016/j.jmst.2023.08.054', 'source': '10.1016&sol;j.jmst.2023.08.054.html', 'sub_titles': '3 Results/3.2 Mechanical properties', 'title': 'Hierarchical microstructure enables high strength and good ductility in as-cast Fe27Ni35Cr18.25Al13.75Co2Ti2Mo2 high-entropy alloy'}, page_content='Fig. 6 (a) displays a uniaxial engineering tensile stress-strain curve of the as-cast Fe27Ni35Cr18.25Al13.75Co2Ti2Mo2 alloy. The alloy with hierarchical microstructure exhibits a notable strengthening behavior, displaying a YS (σ YS) of ~1056 MPa, an UTS (σ UTS) of ~1526 MPa and a total El of ~15.6%, suggesting that the alloy achieves an outstanding combination of strength and ductility in the as-cast state. The strain hardening rate (SHR) (Θ) during tensile deformation is a key factor contributing to the excellent mechanical properties of the alloy. Fig. 6(b) illustrates the SHR (Θ = ∂σ/∂ε) of this alloy. The Θ during tensile deformation shows a two-stage behavior, lacking 

In [21]:
lm.inspect_history(3)





[34m[2025-01-09T11:43:45.147027][0m

[31mSystem message:[0m

Your input fields are:
1. `paragraph` (str)

Your output fields are:
1. `reasoning` (str)
2. `topic` (Literal[tensile/compressive with value, tensile/compressive without value, characterization, others])

All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## paragraph ## ]]
{paragraph}

[[ ## reasoning ## ]]
{reasoning}

[[ ## topic ## ]]
{topic}        # note: the value you produce must be one of: tensile/compressive with value; tensile/compressive without value; characterization; others

[[ ## completed ## ]]

In adhering to this structure, your objective is: 
        assign topic to paragraphs of HEAs(high entropy alloys) papers. The topics include tensile/compressive with value, tensile/compressive without value, characterization or others.
        Note: a qualified tensile/compressive with value paragraph should explicitly mention at least one value related to yie

In [22]:
class ClassifyPha(dspy.Signature):
    """assign topic to paragraphs of HEAs(high entropy alloys) papers. The topics include characterization_phase, characterization_others, or others.
    Note: a qualified phase charaterization paragraph should include the descripion of XRD patterns indicating the crystal structures such as FCC, BCC, HCP or other structures."""
    paragraph: str = dspy.InputField()
    topic: Literal['characterization_phase', 'characterization_others', 'others'] = dspy.OutputField()
classifier_pha = dspy.ChainOfThought(signature=ClassifyPha)

In [23]:
res = get_target_para(articles[0], QUERY_PHASE, res_pattern, classifier_pha, 'characterization_phase')

In [24]:
res

[Document(metadata={'doi': '10.1016/j.jallcom.2022.168515', 'source': '10.1016&sol;j.jallcom.2022.168515.html', 'sub_titles': '3 Result and discussion/3.1 Alloy microstructure', 'title': 'Revealing the nano-grained microstructure and mechanical properties of electrochemical boronized AlCoCrFeNi2.1 eutectic high entropy alloy'}, page_content='Fig. 2 shows the microstructure of the as-cast AlCoCrFeNi2.1 alloy. Fig. 2(a) shows the EBSD phase mapping of the typical FCC/B2 lamellar microstructure. Fine B2 lamellae (about 4 µm thick) and FCC lamellae were distributed in the matrix. The volume fraction of FCC and B2 phases was about 73% and 27%. Fig. 2(b) shows the XRD pattern of the as-cast AlCoCrFeNi2.1 alloy, which further confirmed that the typical lamellar dual-phase microstructure of the AlCoCrFeNi2.1 EHEA with ordered body-centered cubic (B2) and face-centered cubic (FCC) phase [7,39,40].')]

In [27]:
lm.inspect_history(3)





[34m[2025-01-09T16:32:31.190992][0m

[31mSystem message:[0m

Your input fields are:
1. `paragraph` (str)

Your output fields are:
1. `reasoning` (str)
2. `topic` (Literal[characterization_phase, characterization_others, others])

All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## paragraph ## ]]
{paragraph}

[[ ## reasoning ## ]]
{reasoning}

[[ ## topic ## ]]
{topic}        # note: the value you produce must be one of: characterization_phase; characterization_others; others

[[ ## completed ## ]]

In adhering to this structure, your objective is: 
        assign topic to paragraphs of HEAs(high entropy alloys) papers. The topics include characterization_phase, characterization_others, or others.
        Note: a qualified phase charaterization paragraph should include the descripion of XRD patterns indicating the crystal structures such as FCC, BCC, HCP or other structures.


[31mUser message:[0m

[[ ## paragraph ## ]]
Fig.

In [48]:
test_syn_1 = """The HEA with a nominal composition of V10Cr15Mn5Fe35Co10Ni25 (at%) was fabricated using vacuum induction melting furnace using pure elements of V, Cr, Mn, Fe, Co, and Ni (purity >99.9%). The as-cast sample was subjected to homogenization heat treatment at 1100 °C for 6 h under an Ar atmosphere, followed by water quenching. The homogenized sample was cold rolled through multiple passes with a final rolling reduction ratio of ≈79% (from 6.2 to 1.3 mm). The disk-shaped samples (10 mm diameter) were prepared from the cold rolled sheet using electro-discharge machining. The disk samples were annealed at two different conditions (900 °C for 10 min and 1100 °C for 60 min) to obtain microstructure with fine grains and coarse grains, respectively. Finally, the HPT process was carried out on the annealed samples at different turns (N = 1/4, 1, and 5) using a pressure of 6 GPa and a rotation rate of 1 revolution per minute (rpm)."""
test_syn_2 = """The composition of the new alloy is Al16Co16Cr16Fe16Ni30Mo3.8Nb1Hf0.2C1 (in atomic ratio). As-cast 5 kg ingot with 99.9% purity Al, Co, Cr, Fe, Mo, Ni, Nb, Hf, and C elements was prepared by vacuum inductions melting furnace. Chromic acid electrolytic corrosion was used for microstructure observation. The microstructures were observed by Hitachi S-4300 scanning electron microscope and Titan ETEM G2 transmission electron microscope. The box resistance furnace was used for annealing treatments. In this paper, annealing treatment means that the sample was cooled in the air after kept at a certain temperature for a period of time, and the annealing temperature and time were 973 K and 2 ~ 48 h, respectively. The phase analysis instruments were German double single channel scanning inductively coupled plasma atomic emission spectrometer (ICP-AES) and APD-10 X-ray diffractometer (CoKα, 30 kV, 25 mA). The scanning angle was from 20° to 100° and the scanning speed was 4°/min. Tensile tests were conducted with an Instran tensile testing machine. The diameter of the tensile specimen was 5 mm, and the gauge distance was 25 mm."""
test_syn_3 = """Three alloys were prepared with the compositions Al19Co23Fe21Ni37, Al18Co27Fe20Ni35, and Al18Co22Fe20Ni35V5 (atomic percent), hereinafter referred to as Co23, Co27, and Co22V5. Constituent elements (purity better than 99.99 wt%) Al, Co, Fe, Ni, and V were alloyed by vacuum arc melting in a Ti-gettered high-purity argon atmosphere. The melting was repeated at least five times to obtain good chemical homogeneity of the alloys. These ingots of molten alloys were then cast into a water-cooled copper mold. To analyze the phase structure of the alloys, a Bruker D8 X-ray diffraction (XRD) was used with Cu-Kα radiation, scanned in the 2θ range of 20-100° at a speed of 0.06° s-1. Microstructure and chemical composition were analyzed using a field emission scanning electron microscope (SEM, FEI Quanta 650F) equipped with electron backscatter diffraction (EBSD) for grain orientation analysis. The nanoscale microstructure of the cast and deformed specimens was characterized using a spherical-aberration corrected transmission electron microscope (TEM, FEI-Titan-Themis) with an energy dispersive X-ray spectrometry (EDS) detector at 300 kV. The atomic-resolution images were acquired with a high-angle annular dark-field (HAADF) detector. Samples for EBSD and TEM were prepared by grinding with 800, 1200, and 2000 grit SiC papers, followed by ion milling with a GATAN PIPS II 695 instrument. Tensile tests were conducted at room temperature on an INSTRON-5966 material testing machine at a constant strain rate of 1.0 × 10-3 s-1. All tensile specimens were dog-bone shaped with a gauge length of 22 mm, a width of 2.5 mm, and a thickness of 1 mm. Four samples were used in each tensile test to ensure reproducibility."""
test_syn_4 = """The master alloy of AlCoCrFeNi2.1 was prepared from commercially pure elements (Al, Co, Ni: 99.8 wt %; Cr, Fe: 99.5-99.5 wt %). The experimented material was received in vacuum arc remelted condition, the chemical composition of which is listed in Table 1 . The fatigue specimens were prepared according to ASTM standard-E606 [7] in flat sheet geometry and mirror-polished to remove any stress concentration. The material was subjected to a fully reversal (R = -1) strain-controlled push-pull fatigue at strain amplitudes of 0.04, 0.02, 0.015 and 0.01 with a triangular waveform and frequency of 0.1 Hz. The initial microstructure was examined through Electron Backscatter Diffraction (EBSD) by the step size of 500 nm, and the collected data were analyzed through OIM-TSL7 software. X-ray diffraction by Cu Kα radiation was also utilized to characterize the constituent phases. The corresponding fracture surfaces were studied employing field emission scanning electron microscopy (FESEM)."""

In [99]:
class ExtractSteps(dspy.Signature):
    """Extract synthesis steps from a HEAs experimental section. Formated as below:
    Material: [material]
    Synthesis methods
    Fabrication: [fabrication] (methods like induction melting, additive manufacturing etc.)
    Thermo-mechanical processings (if any)
    [processing_1]: [processing_1_parameters] (e.g. 900 °C, 6 h)
    [processing_2]: [processing_2_parameters]
    ...

    Note:
    Only extract the synthesis steps, do not include the characterization steps.
    For those only have melting-casting processings, do not include any thermo-mechanical processings.
    If the author indicates different material results from different processing conditions, include the corresponding material after each condition"""
    paragraph: str = dspy.InputField()
    steps: str = dspy.OutputField()
steps_extractor = dspy.Predict(signature=ExtractSteps)

In [63]:
steps_extractor(paragraph=test_syn_1)

Prediction(
    steps='Material: V10Cr15Mn5Fe35Co10Ni25 (at%)\nSynthesis methods\nFabrication: Vacuum induction melting\nThermo-mechanical processings\nHomogenization: 1100 °C, 6 h\nCold rolling: Final reduction ratio ≈79%\nAnnealing (fine grains): 900 °C, 10 min\nAnnealing (coarse grains): 1100 °C, 60 min\nHPT: Pressure 6 GPa, Rotation rate 1 rpm, Turns (N = 1/4, 1, and 5)'
)

In [100]:
one_shot_example_1 = """Ingots of Co60Ni40 alloy and Co20Cr40Ni40 MEA were fabricated by vacuum arc-melting of pure metals (purity > 99.9 wt.%) under an inert gas (high-purity argon) atmosphere. After melting, they were cooled in a water-cooled copper mold and flipped and re-melted five times to improve compositional homogeneity. Subsequently, the ingots were cold-rolled to a 30% reduction in thickness and homogenized at 1100 °C for 24 h. Then, the homogenized plate of the Co60Ni40 alloy and the Co20Cr40Ni40 MEA were further cold-rolled to a 92% reduction in thickness and annealed at 750 °C for 120 s and 850 °C for 3.6 ks, respectively. These processes yielded fully-recrystallized microstructures of FCC single phase having similar mean grain sizes of about 3 μm (including annealing twins) in the two alloys."""
one_shot_example_2 = """The master alloy of AlCoCrFeNi2.1 was prepared from commercially pure elements (Al, Co, Ni: 99.8 wt %; Cr, Fe: 99.5-99.5 wt %). The experimented material was received in vacuum arc remelted condition, the chemical composition of which is listed in Table 1 ."""

In [101]:
answer = ["""Material: Co60Ni40 alloy and Co20Cr40Ni40 MEA
Synthesis methods
Fabrication: Vacuum arc-melting
Thermo-mechanical processings
Cold-rolling: 30% reduction in thickness
Homogenization: 1100 °C, 24 h
Cold-rolling: 92% reduction in thickness
Annealing:
- 750 °C, 120 s (Co60Ni40 alloy)
- 850 °C, 3.6 ks (Co20Cr40Ni40 MEA)""",
"Material: AlCoCrFeNi2.1\nSynthesis methods\nFabrication: Vacuum arc remelting"]
examples = [dspy.Example(paragraph=para, steps=s).with_inputs('paragraph') for para, s in zip([one_shot_example_1, one_shot_example_2], answer)]

In [102]:
compiler = dspy.LabeledFewShot()
two_shot_steps_extractor = compiler.compile(steps_extractor, trainset=examples)

In [116]:
class ExtractPhases(dspy.Signature):
    """Extract HEAs material microstructure phases from text. Formated as below:
    [material]: [phases] (choose from FCC, BCC, HCP, B2, Laves, L12 or others)
    ...(if multiple materials)
    Note: if there are multiple phases, separate them with commas"""
    paragraph: str = dspy.InputField()
    phases: str = dspy.OutputField()
phases_extractor = dspy.Predict(signature=ExtractPhases)


In [1]:
import langchain
import langchain_openai
import langchain_core

model = langchain_openai.ChatOpenAI(model='gpt-4o-mini-2024-07-18', temperature=0.0)

In [2]:
import langchain_core.prompts


template = langchain_core.prompts.ChatPromptTemplate(
    [
        ('system', """You are a domain expert of HEAs (high entropy alloys). You are provided with following fields:\n1. synthesis steps\n2. phases\n3. text or table description of tensile/compressive properties of HEAs.\n Give your response as user request."""),
        ('user', """Synthesis steps:\n{steps}\nPhases:\n{phases}\n{content}\nPlase extract the tensile or compressive properties along with synthesis steps and phases from the text.""")
    ]
)

In [3]:
from pydantic import BaseModel, Field
from typing import Literal
from typing import Optional
class AlloyRecord(BaseModel):
    composition: str = Field(description='The nominal composition of the alloy')
    phase: Optional[str] = Field(description='The phase of the alloy, such as FCC, BCC, HCP etc. If there are multiple phases, separate them with commas')
    ys: Optional[float] = Field(description='the value of yield strength, convert to MPa if the unit is not MPa, e.g. 1 GPa -> 1000 MPa')
    uts: Optional[float] = Field(description='the value of ultimate tensile strength, convert to MPa if the unit is not MPa, e.g. 1GPa -> 1000 MPa')
    elongation: Optional[float] = Field(description='the value of elongation, convert to percentage if the unit is not percentage, e.g. 1%')
    fabrication: str = Field(description='The fabrication method of the alloy, choose from induction melting, arc melting, mechanical alloying, powder metallurgy, additive manufacturing, gas atomization, spark plasma sintering, other or unknown')
    thermal_mechanical_processings: Optional[str]  = Field(description='The sequential post-processing steps of the alloy separated by vertical bar "|", be briefly, eg., annealed at 900 °C for 4 h | homogenized at 1200 °C for 2 h')

    test_type: Literal['tensile', 'compressive']
    test_temperature: Optional[str] = Field(description='The temperature at which the mechanical properties were tested, e.g. 25 °C')

In [4]:
text = """Material Composition
Nominal Composition: V10Cr15Mn5Fe35Co10Ni25 (atomic percent).
Synthesis Methods
Fabrication: Vacuum induction melting
post-processings
Homogenization: 1100 °C for 6 hours under an Ar atmosphere.
Cooling: Water quenching.
Cold Rolling: Reduction ratio: ≈79% (thickness reduced from 6.2 mm to 1.3 mm).
Annealing:
Fine-Grained (FG): 900 °C for 10 minutes.
Coarse-Grained (CG): 1100 °C for 60 minutes.
High-Pressure Torsion (HPT):
Pressure: 6 GPa.
Rotation rate: 1 RPM.
Number of turns: 1/4, 1, 5.

test_temperature: room temperature
Material Properties
Mechanical Properties (Stress-Strain Data):
Yield Strength (YS) and Ultimate Tensile Strength (UTS) values extracted for FG and CG samples under different HPT conditions:
HPT Turns	Sample	YS (MPa)	UTS (MPa)	Elongation to Failure (%)
0	FG	430	720	48.1
0	CG	230	532	57.6
1/4	FG	1120	1447	15.9
1/4	CG	1270	1502	17.3
1	FG	1630	1813	12.9
1	CG	1660	1854	14.3
5	FG	1940	1986	6.0
5	CG	1950	2015	6.3"""

In [5]:
steps = """Material Composition
Nominal Composition: V10Cr15Mn5Fe35Co10Ni25 (atomic percent).
Synthesis Methods
Fabrication: Vacuum induction melting
post-processings
Homogenization: 1100 °C for 6 hours under an Ar atmosphere.
Cooling: Water quenching.
Cold Rolling: Reduction ratio: ≈79% (thickness reduced from 6.2 mm to 1.3 mm).
Annealing:
Fine-Grained (FG): 900 °C for 10 minutes.
Coarse-Grained (CG): 1100 °C for 60 minutes.
High-Pressure Torsion (HPT):
Pressure: 6 GPa.
Rotation rate: 1 RPM.
Number of turns: 1/4, 1, 5."""
phases = """V10Cr15Mn5Fe35Co10Ni25: FCC"""
content = """Mechanical Properties (Stress-Strain Data):
Yield Strength (YS) and Ultimate Tensile Strength (UTS) values extracted for FG and CG samples under different HPT conditions:
HPT Turns	Sample	YS (MPa)	UTS (MPa)	Elongation to Failure (%)
0	FG	430	720	48.1
0	CG	230	532	57.6
1/4	FG	1120	1447	15.9
1/4	CG	1270	1502	17.3
1	FG	1630	1813	12.9
1	CG	1660	1854	14.3
5	FG	1940	1986	6.0
5	CG	1950	2015	6.3"""

In [6]:
from typing import List, Optional
class Records(BaseModel):
    records: Optional[List[AlloyRecord]] = Field(description='The records of the alloy properties')

In [8]:
chain = template | model.with_structured_output(Records, method='json_schema')
chain.invoke({'steps': steps, 'phases': phases, 'content': content})

Records(records=[AlloyRecord(composition='V10Cr15Mn5Fe35Co10Ni25 (atomic percent)', phase='FCC', ys=430.0, uts=720.0, elongation=48.1, fabrication='Vacuum induction melting', thermal_mechanical_processings='Homogenization: 1100 °C for 6 hours under an Ar atmosphere. | Cooling: Water quenching. | Cold Rolling: Reduction ratio: ≈79% (thickness reduced from 6.2 mm to 1.3 mm). | Annealing: Fine-Grained (FG): 900 °C for 10 minutes.', test_type='tensile', test_temperature='unknown'), AlloyRecord(composition='V10Cr15Mn5Fe35Co10Ni25 (atomic percent)', phase='FCC', ys=230.0, uts=532.0, elongation=57.6, fabrication='Vacuum induction melting', thermal_mechanical_processings='Homogenization: 1100 °C for 6 hours under an Ar atmosphere. | Cooling: Water quenching. | Cold Rolling: Reduction ratio: ≈79% (thickness reduced from 6.2 mm to 1.3 mm). | Annealing: Coarse-Grained (CG): 1100 °C for 60 minutes.', test_type='tensile', test_temperature='unknown'), AlloyRecord(composition='V10Cr15Mn5Fe35Co10Ni25 

In [9]:
answer = _
print(answer.records)

[AlloyRecord(composition='V10Cr15Mn5Fe35Co10Ni25 (atomic percent)', phase='FCC', ys=430.0, uts=720.0, elongation=48.1, fabrication='Vacuum induction melting', thermal_mechanical_processings='Homogenization: 1100 °C for 6 hours under an Ar atmosphere. | Cooling: Water quenching. | Cold Rolling: Reduction ratio: ≈79% (thickness reduced from 6.2 mm to 1.3 mm). | Annealing: Fine-Grained (FG): 900 °C for 10 minutes.', test_type='tensile', test_temperature='unknown'), AlloyRecord(composition='V10Cr15Mn5Fe35Co10Ni25 (atomic percent)', phase='FCC', ys=230.0, uts=532.0, elongation=57.6, fabrication='Vacuum induction melting', thermal_mechanical_processings='Homogenization: 1100 °C for 6 hours under an Ar atmosphere. | Cooling: Water quenching. | Cold Rolling: Reduction ratio: ≈79% (thickness reduced from 6.2 mm to 1.3 mm). | Annealing: Coarse-Grained (CG): 1100 °C for 60 minutes.', test_type='tensile', test_temperature='unknown'), AlloyRecord(composition='V10Cr15Mn5Fe35Co10Ni25 (atomic percent)

In [11]:
answer.records[0]

AlloyRecord(composition='V10Cr15Mn5Fe35Co10Ni25 (atomic percent)', phase='FCC', ys=430.0, uts=720.0, elongation=48.1, fabrication='Vacuum induction melting', thermal_mechanical_processings='Homogenization: 1100 °C for 6 hours under an Ar atmosphere. | Cooling: Water quenching. | Cold Rolling: Reduction ratio: ≈79% (thickness reduced from 6.2 mm to 1.3 mm). | Annealing: Fine-Grained (FG): 900 °C for 10 minutes.', test_type='tensile', test_temperature='unknown')

In [18]:
from typing import Optional

from pydantic import BaseModel, Field


# Pydantic
class Joke(BaseModel):
    """Joke to tell user."""

    setup: str = Field(description="The setup of the joke")
    punchline: str = Field(description="The punchline to the joke")
    rating: Optional[int] = Field(
        default=None, description="How funny the joke is, from 1 to 10"
    )


structured_llm = model.with_structured_output(Joke, method='json_schema')

structured_llm.invoke("Tell me a joke about cats")

BadRequestError: Error code: 400 - {'error': {'message': "Invalid parameter: 'response_format' of type 'json_schema' is not supported with this model. Learn more about supported models at the Structured Outputs guide: https://platform.openai.com/docs/guides/structured-outputs", 'type': 'invalid_request_error', 'param': None, 'code': None}}

In [30]:
from pydantic import BaseModel
from openai import OpenAI

client = OpenAI()
template = langchain_core.prompts.ChatPromptTemplate(
    [
        ('system', """You are a domain expert of HEAs (high entropy alloys). You are provided with following fields:\n1. synthesis steps\n2. phases\n3. text or table description of tensile/compressive properties of HEAs.\n Give your response as user request."""),
        ('user', """Synthesis steps:\n{steps}\nPhases:\n{phases}\n{content}\nPlase extract the tensile or compressive properties along with synthesis steps and phases from the text.""")
    ]
)
completion = client.beta.chat.completions.parse(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": """You are a domain expert of HEAs (high entropy alloys). You are provided with following fields:\n1. synthesis steps\n2. phases\n3. text or table description of tensile/compressive properties of HEAs.\n Give your response as user request."""},
        {"role": "user", "content": """Synthesis steps:\n{steps}\nPhases:\n{phases}\n{content}\nPlase extract the tensile or compressive properties along with synthesis steps and phases from the text.""".format(steps=steps, phases=phases, content=content)},
    ],
    response_format=Records,
)

event = completion.choices[0].message.parsed

In [31]:
event.records

[AlloyRecord(composition='V10Cr15Mn5Fe35Co10Ni25 (atomic percent)', phase='FCC', ys=430.0, uts=720.0, elongation=48.1, fabrication='Vacuum induction melting', thermal_mechanical_processings='Homogenization: 1100 °C for 6 hours under an Ar atmosphere | Cooling: Water quenching | Cold Rolling: Reduction ratio: ≈79% (thickness reduced from 6.2 mm to 1.3 mm) | Annealing: Fine-Grained (FG): 900 °C for 10 minutes | High-Pressure Torsion (HPT): Pressure: 6 GPa | Rotation rate: 1 RPM | Number of turns: 0 | Test Type":"tensile"', test_type='tensile', test_temperature=None),
 AlloyRecord(composition='V10Cr15Mn5Fe35Co10Ni25 (atomic percent)', phase='FCC', ys=230.0, uts=532.0, elongation=57.6, fabrication='Vacuum induction melting', thermal_mechanical_processings='Homogenization: 1100 °C for 6 hours under an Ar atmosphere | Cooling: Water quenching | Cold Rolling: Reduction ratio: ≈79% (thickness reduced from 6.2 mm to 1.3 mm) | Annealing: Coarse-Grained (CG): 1100 °C for 60 minutes | High-Pressur