In [1]:
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

True

In [2]:
import json
from ast import literal_eval
from typing import Optional
from functools import partial

import dspy
from pydantic import BaseModel, Field, field_validator
from langchain_openai import ChatOpenAI

from sisyphus.heas.label import label_paras
from sisyphus.chain.paragraph import Paragraph, ParagraphExtend
from sisyphus.chain import Filter, Writer
from sisyphus.strategy.run_strategy import ExtractStrategy
from sisyphus.strategy.pydantic_models_general import Processing, Material, MaterialDescriptionBase
from sisyphus.strategy.utils import get_paras_with_props, get_synthesis_paras
from sisyphus.heas.prompt import *
from sisyphus.utils.helper_functions import get_plain_articledb, get_create_resultdb
from sisyphus.heas.synthesis import get_synthesis_prompt
from sisyphus.strategy.llm_models import categorize_agent


lm = dspy.LM('openai/gpt-4.1')
dspy.configure(lm=lm)
chat_model = ChatOpenAI(model='gpt-4.1-mini')

class Strength(BaseModel):
    """Tensile/Compressive test results"""
    ys: Optional[str] = Field(description="Yield strength with unit")
    uts: Optional[str] = Field(description="Ultimate tensile/compressive strength with unit")
    strain: Optional[str] = Field(description="Fracture strain with unit %")
    temperature: Optional[str] = Field(description="Test temperature with unit, if not specified, return 'room temperature'")
    strain_rate: Optional[str] = Field(description="Strain rate with unit")
    other_test_conditions: Optional[str] = Field(description="Other test conditions, like in salt environment, hydrogen charging, etc. return None if no extra tensile/compressive test conditions mentioned")

class Phase(BaseModel):
    """Phase information"""
    phases: list[str] = Field(description="List of phases present in the material")

class Processing(BaseModel):
    """Processing route for a material
    Return field steps as '[]' if you cannot find any. For fields with unknown value, filled with empty string"""
    steps: str = Field(description="""List of processing steps in chronological order, form them as json object. For example: [{"induction melting": {"temperature": "1500 K"}}, {"annealed": {"temperature": "800 K", "duration": "1h"}}]""")

    @field_validator('steps', mode='after')
    @classmethod
    def load(cls, value: str):
        try:
            value = json.loads(value)
        except:
            value = literal_eval(value)
        return value

prompt_config = {
    'contextualized': {
        'strength': (EXTRACT_PROPERTY_SYS_GENERIC_PROMPT, STRENGTH_PROMPT),
        'phase': (EXTRACT_PROPERTY_SYS_GENERIC_PROMPT, PHASE_PROMPT),
        'synthesis': (EXTRACT_PROCESS_SYS_GENERIC_PROMPT, PROCESS_PROMPT)
    },
    'isolated': {
        'strength': (EXTRACT_PROPERTY_SYS_GENERIC_PROMPT, STRENGTH_PROMPT),
        'phase': (EXTRACT_PROPERTY_SYS_GENERIC_PROMPT, PHASE_PROMPT),
        'synthesis': (EXTRACT_PROCESS_SYS_GENERIC_PROMPT, PROCESS_ISOLATED_PROMPT)
    }
}

def reconstr_c(paragraphs):
    p_str = ParagraphExtend.from_paragraphs(get_synthesis_paras(paragraphs) + get_paras_with_props(paragraphs, 'strength') + get_paras_with_props(paragraphs, 'strain_rate'), type='strength')
    p_phase = ParagraphExtend.from_paragraphs(get_synthesis_paras(paragraphs) + get_paras_with_props(paragraphs, 'phase'), type='phase')
    p_exp = ParagraphExtend.from_paragraphs(get_synthesis_paras(paragraphs) + get_paras_with_props(paragraphs, 'composition'), type='synthesis')
    return{
        "strength": p_str,
        "phase": p_phase,
        "synthesis": p_exp
    }

def reconstr_i(paragraphs):
    p_str = ParagraphExtend.from_paragraphs(get_paras_with_props(paragraphs, 'strength') + get_paras_with_props(paragraphs, 'strain_rate'), type='strength')
    p_phase = ParagraphExtend.from_paragraphs(get_paras_with_props(paragraphs, 'phase'), type='phase')
    p_exp = ParagraphExtend.from_paragraphs(get_synthesis_paras(paragraphs), type='synthesis')
    return{
        "strength": p_str,
        "phase": p_phase,
        "synthesis": p_exp
    }

models_d = {
    'strength': Strength,
    'phase': Phase,
    'synthesis': Processing
}
 


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
es = ExtractStrategy(
    reconstruct_paragraph_context_func=reconstr_c,
    reconstruct_paragraph_isolate_func=reconstr_i,
    formatted_func=get_synthesis_prompt,
    categorize_agent=categorize_agent,
    pydantic_models_dict=models_d,
    save_to='op.jsonl'
)
es.build(prompt_config=prompt_config, chat_model=chat_model)
db = get_plain_articledb('heas_1531')
getter = Filter(db)
result_db = get_create_resultdb('context_isolated')
writer = Writer(result_db)



In [4]:
from sisyphus.chain.chain_elements import run_chains_with_extarction_history_multi_threads
chain = getter + label_paras + es + writer
run_chains_with_extarction_history_multi_threads(chain, 'heas_test', 5, 'context_isolated', 5)

  0%|          | 0/5 [00:17<?, ?it/s]


TypeError: list.append() takes exactly one argument (2 given)