The following is an example of extraction for **one paragraph**.   
Import `extract_synthesis_parameters` from `llm.py` and call GPT model to extract synthesis condition from paragraph.

In [None]:
from llm import extract_synthesis_parameters

paragraph = "2.1. Synthesis of [PbCl 2 (4,4′ -bipy)] (1) PbCl 2 (27.1 mg, 0.10 mmol), 4,4′ -bipyridine (4,4′ bipy) (15.7 mg, 0.10 mmol) and 0.80 ml of distilled water were sealed in an evacuated Pyrex tube (7 mm ID, 11 cm long). The reaction was heated to 130 C at 1C/min, soaked at 130 C for 10 h, and cooled to 30 C at 0.2 C/min. Colorless crystals formed in the tube."
results = extract_synthesis_parameters(
    paragraph=paragraph,
    example_size=4, 
    model='gpt-3.5-turbo', 
    rag_method="BM25", 
    temperature=0
    )
print(results)

The following is an example of synthesis condition extraction for **multiple paragraphs**.   
The `FILE_NAME` contains all paragraphs you want to extract. It should be a JSON file in such format:
```JSON
[
    {
        "paragraph": "xxxx",
    },
    {
        "paragraph": "xxxx",
    }
]
```
Each entry should have key `paragraph`. The value of `paragraph` is the synthesis paragraph.

In [None]:
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
from llm import extract_synthesis_parameters
import json

FILE_NAME = "example_input.json"

def extract_one_paragraph(mof):
    try:
        result = extract_synthesis_parameters(
            paragraph=mof["paragraph"], 
            example_size=4, 
            model="gpt-4-0125-preview"
            )
        mof["result"] = result
    except:
        mof["result"] = None
    return mof

with open(FILE_NAME) as f:
    mofs = json.load(f)

with ThreadPoolExecutor() as executor:
    _ = list(tqdm(executor.map(extract_one_paragraph, mofs), total=len(mofs)))
with open("example_output.json", "w") as f:
    f.write(json.dumps(mofs, indent=2, ensure_ascii=False))