**Library Imports**

In [2]:
import json
import pandas as pd
from openai import OpenAI
import numpy as np
import os

**Excel Empty Row Cleaning**

In [None]:

df = pd.read_excel('matching_new.xlsx')
df = df.dropna(subset=['DOI'])
print(df)


                                            Author_name     Year  \
0                                            Abadi, B.   (2023).   
1     Abbasi, T., N. J. H. Fard, F. Madadizadeh, H. ...  (2023).   
2     Abbate, E., M. Mirpourian, C. Brondi, A. Balla...  (2022).   
3      Abdalfattah, I. A., W. S. Mogawer and K. Stuart   (2022).   
4            Accorsi, R., G. Baruffaldi and R. Manzini   (2020).   
...                                                 ...      ...   
1423  Ochigue, P. C. D., R. G. Dingcong, J. C. S. Bo...  (2024).   
1424  Stegmann, P., V. Daioglou, M. Londo, D. P. van...  (2022).   
1425                              Zheng, J. and S. Suh   (2019).   
1426  Bachmann, M., C. Zibunas, J. Hartmann, V. Tulu...  (2023).   
1427  Meys, R., A. Katelhon, M. Bachmann, B. Winter,...  (2021).   

                                                  Title  \
0      Impact of attitudes, factual and causal feedb...   
1      Environmental Impact Assessment of Low-Densit...   
2     

**Extract Information from Paper Storage Folder**

In [None]:
import os
import json
import pandas as pd

folder_path = r"E:\science_mining\article_data"

folder_names1 = df['Publisher'] 
df['DOI'] = df['DOI'].str.replace('/', '_')
folder_names2 = df['DOI']

text_list = []
empty_count = 0  

for folder_name1, folder_name2 in zip(folder_names1, folder_names2):
    full_folder_path = os.path.join(folder_path, str(folder_name1), str(folder_name2))
    json_file_path = os.path.join(full_folder_path, str(folder_name2) + '.json')

    if os.path.exists(json_file_path):
        with open(json_file_path, 'r', encoding='utf-8') as file:
            data = json.load(file)

            if "full_text" in data:
                full_text_data = data["full_text"]

                if full_text_data:  
                    middle_items = full_text_data[1:-1] 
                    combined_text = ' '.join(
                        f"Title:{item.get('title', '')} Text:{item.get('text', '')} {item.get('subsections', [])}"
                        for item in middle_items
                    )
                    text_list.append(combined_text)
                else:
                    empty_count += 1  
                    print(f"full_text is empty: {folder_name2}")  
            else:
                print(f'full_text is null: {json_file_path}')
    else:
        print(f"File {json_file_path} is unavailable")

# 输出 full_text 为空的计数
print(f"full_text is null: {empty_count}")
print(f"full_text is not empty: {len(text_list)}")


**Request LLM**

In [None]:
def send_request(filename):
  client = OpenAI()

  batch_input_file = client.files.create(
    file=open(filename, "rb"),
    purpose="batch"
  )

  batch_input_file_id = batch_input_file.id

  batch = client.batches.create(
      input_file_id=batch_input_file_id,
      endpoint="/v1/chat/completions",
      completion_window="24h"
  )
  return batch

def check_batch_status(batch)
  client = OpenAI()
  retrieve_batch = client.batches.retrieve(batch.id)
  return retrieve_batch

def download_output(retrieve_batch, filename):
  client = OpenAI()
  content = client.files.content(retrieve_batch.output_file_id)
  with open(filename, "wb") as f:
      f.write(content.content)
      
def get_answer_list(filepath):
    content_list, custom_id_digits = [], []
    with open(filepath, 'r', encoding='utf-8') as file:
        for line in file:
            data = json.loads(line)
            try:
                choices = data['response']['body']['choices']
                for choice in choices:
                    message = choice['message']
                    if message['role'] == 'assistant':
                        content = message['content']
                        content_list.append(content)
                        
                        custom_id = int(data['custom_id'])
                        custom_id_digits.append(custom_id)
            except (KeyError, IndexError, ValueError):
                continue
    df = pd.DataFrame({'ID': custom_id_digits,'Answer': content_list}).sort_values(by='ID').reset_index(drop=True)
    return df

**Prompt Engineering**

In [5]:

file_path1 = 'resin_addtives_types'
q1 = '''
Respond in JSON format to each of the following questions:
You are an expert in the field of plastics LCA.
1. Based on the text, identify and output the primary modeling plastic types used to construct the LCA model. 
The following rules will help determine the primary modeling of plastics:

1)If the plastic in the text contains detailed modeling content (e.g., experimental synthesis methods, production pathways, or cost estimation for specific plastics), consider it a primary modeling plastic.
2)If plastic is mentioned in the LCI inventory, consider it a primary modeling plastic.
3)If a material is used solely for contextual reference or as a comparative example, it should not be considered a primary modeling plastic.
4)If this plastic only appears in the scenario analysis section (e.g., technological scenarios, policy scenarios) , it should not be considered a primary modeling plastic.
5)If it is bio-based plastic, its attribute should be noted in parentheses when outputting. Attention, all types of fibers are not bio-based plastic

The possible plastic types are as follows:
{PE (Polyethylene), PP (Polypropylene), PVC (Polyvinyl Chloride), PS (Polystyrene), ABS (Acrylonitrile Butadiene Styrene), PMMA (Polymethyl Methacrylate), PA (Polyamide), PC (Polycarbonate), POM (Polyoxymethylene), PPO (Polyphenylene Oxide), PBT (Polybutylene Terephthalate), PET (Polyethylene Terephthalate), PTFE (Polytetrafluoroethylene), PVDF (Polyvinylidene Fluoride),PI (Polyimide), PSF (Polysulfone), PEEK (Polyether Ether Ketone), LCP (Liquid Crystal Polymer), PAR (Polyarylate), PPS (Polyphenylene Sulfide), PUR (Polyurethane), PF (Phenolic Resin), EP (Epoxy Resin), UP (Unsaturated Polyester), Amino plastics, SI (Organosilicon Plastics), Cyanate Ester Resin, PLA (Polylactic Acid), PBS (Polybutylene Succinate), PHA (Polyhydroxyalkanoates), Bio-based PE, Bio-based PET, Bio-based PA,CFRP,GFRP}

If the text mentions plastic type that are not in the above list, output them as well.
If not mentioned specific plastics name, output 'N'.

2. Based on the text, identify whether the article studied plastic additives. 

The possible plastic additives are as follows:
{"plasticizer", "flame retardant", "stabilizer", "reinforcing material", "filler", "lubricant", "colorant", "antioxidant", "curing agent", "compatibilizer", "foaming agent", "antibacterial agent", "coupling agent", "nucleating agent", "impact modifier", "dispersing agent", "anti-foaming agent", "urea scavenger", "devulcanizing agent", "chain extender", "flexibilizer", "crosslinking agent", "fluorescent tracer", "vulcanizing agent"}
If the text mentions additives that are not in the above list, output them as well.
If the text only mentions the use of additives without specifying any plastic additive names, output 'Only use'.
If the text does not mention additives (or the text mentions that additives are excluded) and does not specify any plastic additive names, output 'N'.

Here is an example of the Json output format:
{ 
"Plastic type1":"polyethylene",
"Plastic type2":"polyimide",
"Plastic type3":"polylactic acid(bio-based)",
"Additives type1":"plasticizers",
"Additives type2":"flame retardant",
"Additives type3":"heat stabilizer",
"Additives type4":"reinforcing material",
"Additives type5":"others"

}
Here is an example of the Json output format:
{ 
"Plastic type1":"polyvinyl chloride",
"Additives type1":"N"
}
Here is an example of the Json output format:
{ 
"Plastic type1":"polyvinyl chloride",
"Additives type1":"Only use"
}



'''


In [6]:
file_path2 = 'product_types'
q2='''
Respond in JSON format to each of the following questions only based on the text:
1. You are an expert in the field of plastics LCA.Based on the text, identify and output all plastic products included in the LCA model, and classify them according to the following criteria. 
The criteria consist of ten major product categories (examples for each category are provided):
{
1)Packaging: Films, Bags, Bottles, PTTs (Pots, Trays, and Tubs), Rigids, Other Packaging
2)Building and Construction: Pipes and Ducts, Thermal Insulation, Flooring, Window Profiles, Roof Lining, Other B&C Products
3)Automotive: Vehicles, Cars, Automobiles, Engines, Air ducts, Bumpers, Car Panels, Other Automotive Parts/Components
4)Electrical and Electronic Equipment: Large HH (Household) Appliances, Cooling, Refrigerating and AC (Air Conditioning) Devices, Small HH Appliances, ICT (Information and Communication Technology) Equipment and CE (Consumer Electronics), Other Electrical and Electronic Equipment
5)Agriculture: Films (Silage, Greenhouse, Mulch, Other), Pipes, Other Products
6)Household Items, Furniture, Leisure and Others: Household Items, Toys, Furniture, Sports Items, Medical and Hygiene Items, Other Products
7)Textile: Apparel, Household Textiles, Technical Textiles (Textile Flooring, Textile Furniture, Mobility Textiles, Agrotextiles, Other Technical Textiles)
8)Fuel
9)Waste: Municipal Solid Waste, Solid Waste, PP and PE waste, Other Waste
10)Filler: Concrete Filler
}

Please output the categories of plastic products along with their product names. The product names are placed in parentheses. If the text does not mention any plastic products, only output 'N'.
Always mention: You should just output plastic products!!!
Here is an example of the Json output format:
{ 
"Product type1":"Packaging(plastic bottles)",
"Product type2":"Building and Construction(pipes)"
}
Here is an example of the Json output format:
{ 
"Product type1":"Textiles(apparel)"
}
Here is an example of the Json output format:
{ 
"Product type1":"N"
}

special case1:
{	
1)	if the article focus on the end-of-life stage(overall management, treatment, and corresponding technologies of waste plastics), And in the LCA model, no specific product names are mentioned; instead, the modeling focuses on general concepts like waste plastics (e.g., municipal solid waste, solid waste, PP and PE waste, other waste) , there are two possibilities:
{
First situation, if the article only mentions the disposal of plastics without involving the reuse of waste plastics to produce new plastic products, or if it mentions the reuse of plastic waste but does not specify the plastic products being produce, only one product type needs to be output, here is an example of the Json output format:
 	{ 
"Product type1":"Waste(municipal solid waste)"
}
Here is an example of the Json output format:
 	{ 
"Product type1":" Waste(PP and PE waste)"
}

Second situation, if the article mentions not only the disposal of plastics but also the reuse of waste plastics to produce new plastic products, only the new plastic products needs to be output.
Output example Json format:
{
"Product type1":"Building and Construction(pipes)",
"Product type2":"Packaging(bottles)",
}
}

2) if the article focuses on the end-of-life stage for any specific plastic product or composite plastic product, there are two possibilities:
{
First situation, if the article only mentions the disposal of products without involving the reuse of waste plastics to produce new plastic products, only one result needs to be output, as shown in the Json example below:
{ 
"Product type1":" Electrical and Electronic Equipment(waste LCD)"
}

Second situation, if the article mentions not only the disposal of products but also the reuse of waste plastics to produce new plastic products, both the categories of disposed products and new products need to be output.
Output example Json format:
{
"Product type1":"Packaging(waste plastic bottles)",
"Product type2":"Building and Construction(pipes)"
}
}
}

special case2: if the LCA study object in the article is concrete and mentions the addition of plastics or fibers to the concrete, only one result needs to be output, as shown in the Json example below:
{ 
"Product type1":"filler(concrete filler)"
}

Always mention: You should just output plastic products!!!

'''

In [7]:
file_path3 = 'spatiotemporal_status_software_method'
q3 = '''
Respond in JSON format to each of the following questions:
You are an expert in the field of plastics LCA.
1. Based on the text, identify the status of the target object for life cycle assessment. If the product is at the laboratory stage (e.g., a new material developed through experimentation), respond with '0'; if the product has reached a certain level of production or at the factory stage, respond with '1'. If both are mentioned, output '1'.
2.	Based on the text, identify the temporal and spatial scope and boundaries of the target object for life cycle assessment. If specific information on the product's life cycle time is mentioned, please output the year; if not found, output 'N'. Similarly, if specific spatial information on the product's life cycle is mentioned, please output the geographic location; if not found, output 'N'. (Please pay special attention to the geographic locations which are usually reflected in materials, experimental data, or localized adaptations. If different locations are mentioned at various stages, prioritize the most representative location. If LCA data is clearly sourced from a specific country or region, prioritize that country even if all production stages within that country are not explicitly specified.)
3. Based on the text, identify whether life-cycle impact assessment (LCIA) used relevant software. If software was used, provide the name (e.g., Gabi, SimaPro, openLCA, eFootprint, etc.); if no software was used or mentioned, output 'N'.
4. Based on the text, identify which environmental impact assessment method was used, and provide the name of the method used. Possible methods include: CML, TRACI, EDIP, IMPACT2002+, Eco-indicator95, Eco-indicator99, EPS, LIME, ReCiPe, ILCD, LCSEA, IPCC, etc. If no methods are mentioned, answer "N".
Here is an example of the output Json format:
{ 
"Product status":"1",
"Time":"2020",
"Location":"Global",
"LCI software":" Gabi",
"LCIA method":"CML 2001"
}


'''

In [8]:
file_path4 = 'system_boundries'
q4 = '''
Respond in JSON format to each of the following questions:
You are an expert in the field of plastics LCA.
1. Based on the text, identify whether the system boundary for life cycle assessment (LCA) is cradle-to-gate, cradle-to-grave, cradle-to-cradle, disposal stage (end-of-life phase) only, or others. Here are some rules to help you with classification:
1)If the text explicitly mentions cradle-to-gate or cradle-to-grave, the following rules do not need to be considered.
2)If the text only mentions resource extraction, raw material processing, and manufacturing, without covering the use or disposal stages, it is classified as cradle-to-gate.
3)If resource extraction, manufacturing, and disposal are explicitly mentioned, the system boundary is cradle-to-grave.
4)If the main research focus of the article is waste management and disposal or recycling technologies, and the starting point of the life cycle assessment is waste (with stages before it becomes waste not mentioned), it is classified as disposal stage only.
5)Only if the text explicitly mentions that the system boundary is cradle-to-cradle, it will be classified as cradle-to-cradle.
6)If the system boundary does not fit any of the definitions above, classify it as others.
Case 1: If the system boundary is cradle-to-grave or cradle-to-cradle or disposal stage (end-of-life phase) only:
{
For the end-of-life phase in the LCA model, identify which disposal methods are used in the LCA models. If incineration(energy recovery) is in the model,output"1";else,output "0".If landfill is in the model,output"1"; else, output "0".If recycling is considered in the model, first output "1", then output the name of the recycling process (if no specific process name is mentioned, output "N"); if recycling is not considered in the model, output "0".

Possible recycling processes are as follows:
{
Catalytic Cracking, Catalytic Hydrogenolysis, Olefin Recombination, Gasification, Microwave Pyrolysis, Thermal Cracking, Blend Cracking, Supercritical Fluid Technology, Pyrolysis, Hydrolysis, Methanolysis, Hydrogenolysis, Aminolysis, Solvolysis, Depolymerization, Composting, Mechanical Recycling
}
Attention: Recycling processes in the list are not exhaustive. If processes not included in the list are mentioned in the text, please output them as well.
Case1 output Json format example1:
{ 
"System boundary": "cradle-to-grave",
"Incineration":"1",
"landfill":"0",
"Recycling":"1",
"Recycling method1":"mechanical recycling",
"Recycling method2":"pyrolysis",
"Recycling method3":"gasification"
}
Case1 output Json format example2:
{ 
"System boundary": "cradle-to-grave",
"Incineration":"0",
"landfill":"1",
"Recycling":"0"
}
Case1 output Json format example3:
{ 
"System boundary": "cradle-to-grave",
"Incineration":"1",
"landfill":"1",
"Recycling":"1",
"Recycling method1":"N"
}
Case1 output Json format example4:
{ 
"System boundary": "cradle-to-cradle",
"Incineration":"1",
"landfill":"1",
"Recycling":"1",
"Recycling method1": "N"
}
Case1 output Json format example5:
{ 
"System boundary": "disposal stage only",
"Incineration":"0",
"landfill":"1",
"Recycling":"1",
"Recycling method1":"mechanical recycling",
"Recycling method2":"pyrolysis",
"Recycling method3":"gasification"
}
Case1 output Json format example6:
{ 
"System boundary": "disposal stage only",
"Incineration":"1",
"landfill":"0",
“Recycling”:”1”,
“Recycling method1":"N"
}
}
Case 2: If the system boundary is cradle-to-gate or others, there are no further questions.
{
Case2 example output Json format1:
{ 
"System boundary": "others"
}
Case2 example output Json format2:
{ 
"System boundary": "cradle-to-gate"
}
}


'''

In [9]:
file_path5 = 'use_stage'
q5 = '''
Respond in JSON format to each of the following questions:
1.You are an expert in the field of plastic LCA. Based on the text, answer the question below:
1)if the text states that the use phase is ignored or not considered in the model, answer "1";else,answer "0".
2)if the text  provides inventory values for the use phase, such as resource and energy consumption, emissions, answer"1";else,answer"0".
Here is an example of the json output format:
{
"answer1":"0",
"answer2":"1"
}

}

'''

In [10]:
file_path6 = 'LCI_source'
q6 = '''
Respond in JSON format to each of the following questions:
You are an expert in the field of plastics LCA.
1.	Based on the text, identify the data sources for the LCI inventory. Possible seven sources include: 
{
1)	database (e.g., ecoinvent, Gabi, CLCD-China-ECER 0.8)
2)	public datasets and reports (e.g., statistical reports, Environmental Impact Assessment Report)
3)	laboratory experiments and simulations (e.g. laboratory experiment data, aspen simulation)
4)	literature (e.g. library, literature, yearbook)
5)	researchers (e.g. researchers, research institutions)
6)	industry chain entities (e.g. factories, manufacturers or suppliers, landfill, companies or enterprises)
7)	others
}
Attention:
(1)	If the text mentions the source of LCI, it must belong to one of the seven categories in the above list.
(2)	If the text mentions LCI is provided in the Supporting Information or annex, there’s no need to make additional judgments on this sentence (i.e., there's no need to classify supporting information into any category such as reports).
(3)	If no source of the LCI inventory is mentioned, output 'N'.
 
Here is an example of the Json output format:
{
"LCI source1":"database(ecoinvent, gabi)",
"LCI source2":"literature",
"LCI source3":"public datasets and reports(statistical reports, Environmental Impact Report)",
"LCI source4":"industry chain entities"
}
Here is an example of the Json output format:
{
"LCI source1":"database(ecoinvent)",
}

'''

In [11]:
file_path7 = 'LCI_available'
q7 = '''
Respond in JSON format to each of the following questions:
You are an expert in the field of plastics LCA. 
Based on the Life Cycle Inventory analysis or the Life Cycle Inventory section in the text, answer the following questions:
1.Identify whether the specific input and output inventory values for the production phase of at least five different chemicals are mentioned in the tables of the text (these values typically include decimals). If specific values are mentioned, output "1"; if not, output "0".
Note: 
1)	The results indicators of LCA (e.g., global warming potential) should not be identified as part of the LCI inventory (even though these values also contain decimals);
2) The physical and chemical properties of chemicals and products should not be identified as part of the LCI inventory (even though these values also contain decimals).
3) If the subheading in the text clearly mentions "Life Cycle Inventory" or"Life Cycle Inventory analysis" similar expressions, only examine the content under that specific subheading and ignore other sections of the text.

2.Identify whether the text mentions that the Life Cycle Inventory is provided in an appendix or annex. If mentioned, output "1"; otherwise, output "0".
Note:
1)	If only the results of the LCA are mentioned to be included in the appendix, output "0".

3.If the text states that the LCI is not disclosed or public(e.g., due to commercial or confidentiality reasons), output "1"; otherwise, output "0".
Here is an example of the json output format:
{
"question1":"1",
"question2":"0",
"question3":"0"
}



'''

In [12]:
file_path8 = 'midpoint_name'
q8 = '''
Respond in JSON format to each of the following questions:
1. You are an expert in the field of plastics LCA. Based on the text, identify which midpoint indicators are mentioned in the environmental impact results? Please identify and output them.
Attention:
1)	If the text states that the midpoint indicators are listed in an appendix, output only "In appendix" without including any indicator names or numbers.
2)	If the text only mentions the number of midpoint indicators without specifying any indicator names, output just the number.
3)	If the text does not specify any names or numbers for midpoint indicators/environmental impact categories and does not indicate that the indicators are listed in an appendix, output "N".

Here is an example of the Json output format:
{
"Midpoint_indicators1": "global warming potential",
"Midpoint_indicators2": "freshwater ecotoxicity",
}
Here is an example of the Json output format:
{
"Midpoint_indicators1": "carbon footprint",
}
Here is an example of the Json output format:
{
"Midpoint_indicators1": " In appendix "
}
Here is an example of the Json output format:
{
"Midpoint_indicators1": "15"
}
Here is an example of the Json output format:
{
"Midpoint_indicators1": "N"
}

'''

In [13]:
file_path9 = 'endpoint_name'
q9 = '''
Respond in JSON format to each of the following questions:
1. You are an expert in the field of plastics LCA. Based on the text, identify which endpoint indicators and damage categories are mentioned in the environmental impact results? Please identify and output them.
Attention:
1)	If the text states that the endpoint indicators or damage categories are listed in an appendix, output only "In appendix" without including any indicator names or numbers.
2)	If the text only mentions the number of endpoint indicators without specifying any indicator names, output just the number.
3)	If the text does not specify any names or numbers for endpoint indicators/ damage categories and does not indicate that the indicators are listed in an appendix, output "N".
on output format:
{
"Endpoint_indicators1":"human health",
"Endpoint_indicators2":"ecosystem quality",
"Endpoint_indicators3":"resource scarcity"
}
Here is an example of the Json output format:
{
"Endpoint_indicators1":"human health",
}
Here is an example of the Json output format:
{
"Endpoint_indicators1": " In appendix "
}
Here is an example of the Json output format:
{
"Endpoint_indicators1": "4"
}

Here is an example of the Json output format:
{
"Endpoint_indicators1":"N"
}


'''

In [14]:
file_path10 = 'Analysis_Modelling'
q10 = '''
Respond in JSON format to each of the following questions:
You are an expert in the field of plastics LCA.
1. Based on the provided text, does it explicitly refer to or use uncertainty analysis (e.g., uses the phrase 'uncertainty analysis' or synonyms like 'uncertainty assessment')? If it is used, output '1'; if not, output '0'.

2.Based on the text, identify whether contribution analysis is used. If it is used, output '1'; if not, output '0'.
3.Based on the text, identify whether sensitivity analysis(SA) is used. If it is used, output '1'; if not, output '0'.
4.Based on the text, identify whether scenario analysis (by setting different hypothetical scenarios or improvement options(e.g.technological pathways, energy structures, policies, and regulations) to assess the environmental impacts of the system under varying conditions) is used. If it is used, output '1'; if not, output '0'.
5.Based on the provided text, does it explicitly refer to or use techno-economic analysis (e.g., uses the phrase ' techno-economic analysis' or ' Techno-Economic Assessment' or ' Technical-Economic Analysis' or ' Technology-Economy Analysis' or ' TEA')? If it is used, output '1'; if not, output '0'.
6.Based on the text, identify whether prospective LCA is used. If it is used, output '1'; if not, output '0'.
7.Based on the text, identify whether Material Flow Analysis is mentioned. If it is mentioned, output '1'; if not, output '0'.
8.Based on the text, identify whether Integrated Assessment Model is mentioned. If it is mentioned, output '1'; if not, output '0'.
9.Based on the text, identify whether Input-Output Analysis is mentioned. If it is mentioned, output '1'; if not, output '0'.
10.Based on the text, identify whether Computable General Equilibrium is mentioned. If it is mentioned, output '1'; if not, output '0'.
11.Based on the text, identify whether Geographic Information System is mentioned. If it is mentioned, output '1'; if not, output '0'.


Here is an example of the Json output format:
{
"uncertainty analysis":"1",
"contribution analysis":"0",
"sensitivity analysis":"1",
"scenario analysis":"0",
"techno-economic analysis":"1",
"prospective LCA":"0",
"MFA":"1",
"IAM":"0",
"IO":"1",
"CGE":"0",
"GIS":"0"
}

'''

**API Invocation**

In [None]:
request_list1 = []
request_list2 = []
request_list3 = []
request_list4 = []
request_list5 = []
request_list6 = []
request_list7 = []
request_list8 = []
request_list9 = []
request_list10 = []
for i, text in enumerate(text_list): 
    request1 = {"custom_id": str(i), "method": "POST", "url": "/v1/chat/completions"}
    body1 = {"model": "gpt-4o","temperature": 0,"response_format": {"type": "json_object"}}
    body1['messages'] = [
        {"role": "system", "content": q1},
        {"role": "user", "content": text}]
    request1['body'] = body1
    request_list1.append(request1)

    request2 = {"custom_id": str(i), "method": "POST", "url": "/v1/chat/completions"}
    body2 = {"model": "gpt-4o","temperature": 0,  "response_format": {"type": "json_object"}}
    body2['messages'] = [
        {"role": "system", "content": q2},
        {"role": "user", "content": text}]
    request2['body'] = body2
    request_list2.append(request2)

    request3 = {"custom_id": str(i), "method": "POST", "url": "/v1/chat/completions"}
    body3 = {"model": "gpt-4o","temperature": 0,"response_format": {"type": "json_object"}}
    body3['messages'] = [
        {"role": "system", "content": q3},
        {"role": "user", "content": text}]
    request3['body'] = body3
    request_list3.append(request3)

    request4 = {"custom_id": str(i), "method": "POST", "url": "/v1/chat/completions"}
    body4 = {"model": "gpt-4o","temperature": 0, "response_format": {"type": "json_object"}}
    body4['messages'] = [
        {"role": "system", "content": q4},
        {"role": "user", "content": text}]
    request4['body'] = body4
    request_list4.append(request4)

    request5 = {"custom_id": str(i), "method": "POST", "url": "/v1/chat/completions"}
    body5 = {"model": "gpt-4o","temperature": 0, "response_format": {"type": "json_object"}}
    body5['messages'] = [
        {"role": "system", "content": q5},
        {"role": "user", "content": text}]
    request5['body'] = body5
    request_list5.append(request5)

    request6 = {"custom_id": str(i), "method": "POST", "url": "/v1/chat/completions"}
    body6 = {"model": "gpt-4o","temperature": 0, "response_format": {"type": "json_object"}}
    body6['messages'] = [
        {"role": "system", "content": q6},
        {"role": "user", "content": text}]
    request6['body'] = body6
    request_list6.append(request6)

    request7 = {"custom_id": str(i), "method": "POST", "url": "/v1/chat/completions"}
    body7 = {"model": "gpt-4o","temperature": 0, "response_format": {"type": "json_object"}}
    body7['messages'] = [
        {"role": "system", "content": q7},
        {"role": "user", "content": text}]
    request7['body'] = body7
    request_list7.append(request7)

    request8 = {"custom_id": str(i), "method": "POST", "url": "/v1/chat/completions"}
    body8 = {"model": "gpt-4o","temperature": 0, "response_format": {"type": "json_object"}}
    body8['messages'] = [
        {"role": "system", "content": q8},
        {"role": "user", "content": text}]
    request8['body'] = body8
    request_list8.append(request8)

    request9 = {"custom_id": str(i), "method": "POST", "url": "/v1/chat/completions"}
    body9 = {"model": "gpt-4o","temperature": 0, "response_format": {"type": "json_object"}}
    body9['messages'] = [
        {"role": "system", "content": q9},
        {"role": "user", "content": text}]
    request9['body'] = body9
    request_list9.append(request9)

    request10 = {"custom_id": str(i), "method": "POST", "url": "/v1/chat/completions"}
    body10 = {"model": "gpt-4o","temperature": 0, "response_format": {"type": "json_object"}}
    body10['messages'] = [
        {"role": "system", "content": q10},
        {"role": "user", "content": text}]
    request10['body'] = body10
    request_list10.append(request10)
    
with open(file_path1 + '.jsonl', 'w') as file:
    for entry in request_list1:
        json_line = json.dumps(entry)
        file.write(json_line + '\n')
        
batch1 = send_request(file_path1 + '.jsonl')
print(batch1)

with open(file_path2 + '.jsonl', 'w') as file:
    for entry in request_list2:
        json_line = json.dumps(entry)
        file.write(json_line + '\n')

batch2 = send_request(file_path2 + '.jsonl')
print(batch2)

with open(file_path3 + '.jsonl', 'w') as file:
    for entry in request_list3:
        json_line = json.dumps(entry)
        file.write(json_line + '\n')

batch3 = send_request(file_path3 + '.jsonl')
print(batch3)

with open(file_path4 + '.jsonl', 'w') as file: 
    for entry in request_list4:
        json_line = json.dumps(entry)
        file.write(json_line + '\n')

batch4 = send_request(file_path4 + '.jsonl')
print(batch4)

with open(file_path5 + '.jsonl', 'w') as file: 
    for entry in request_list5:
        json_line = json.dumps(entry)
        file.write(json_line + '\n')

batch5 = send_request(file_path5 + '.jsonl')
print(batch5)

with open(file_path6 + '.jsonl', 'w') as file: 
    for entry in request_list6:
        json_line = json.dumps(entry)
        file.write(json_line + '\n')

batch6 = send_request(file_path6 + '.jsonl')
print(batch6)

with open(file_path7 + '.jsonl', 'w') as file:
        json_line = json.dumps(entry)
        file.write(json_line + '\n')

batch7 = send_request(file_path7 + '.jsonl')
print(batch7)

with open(file_path8 + '.jsonl', 'w') as file: 
    for entry in request_list8:
        json_line = json.dumps(entry)
        file.write(json_line + '\n')

batch8 = send_request(file_path8 + '.jsonl')
print(batch8)

with open(file_path9 + '.jsonl', 'w') as file:  
    for entry in request_list9:
        json_line = json.dumps(entry)
        file.write(json_line + '\n')

batch9 = send_request(file_path9 + '.jsonl')
print(batch9)

with open(file_path10 + '.jsonl', 'w') as file:  
    for entry in request_list10:
        json_line = json.dumps(entry)
        file.write(json_line + '\n')

batch10 = send_request(file_path10 + '.jsonl')
print(batch10)



**Check Batch Task Status**

In [28]:
retrieve_batch1 = check_batch_status(batch1)
retrieve_batch1.request_counts

BatchRequestCounts(completed=20, failed=0, total=20)

**Return Task Results and Save as JSON**

In [None]:
download_output(retrieve_batch1, file_path1 + "_output.jsonl")
answer_df1 = get_answer_list(file_path1 + "_output.jsonl")

**Result Processing and Storage**

In [None]:


all_results = {
    "Plastic_Type": [],
    "Additive_Type": [],
    "Product_Type": [],
    "Product_Status": [],
    "Time": [],
    "Location": [],
    "LCI_Software": [],
    "LCIA_Method": [],
    "Systems_Boundaries": [],
    "Use_Stage": [],
    "LCI_Source": [],
    "LCI_Available": [],
    "Midpoint": [],
    "Endpoint": [],
    "Analysis_Models": [],

}

# 对 answer_df1 进行处理
for (i, row1) in answer_df1.iterrows():
    id = int(row1['ID'])
    answer1 = json.loads(row1['Answer'])  # 转换为字典
    plastic_type = []
    addtive_type = []
    for a_name in answer1:
        if a_name.startswith('Plastic'):
            plastic_type.append(answer1[a_name])
        else:
            addtive_type.append(answer1[a_name])
    all_results["Plastic_Type"].append(plastic_type)
    all_results["Additive_Type"].append(addtive_type)


for (i, row1) in answer_df2.iterrows():
    id = int(row1['ID'])
    answer1 = json.loads(row1['Answer'])
    product_type = []
    for a_name in answer1:
        product_type.append(answer1[a_name])
    all_results["Product_Type"].append(product_type)

for (i, row1) in answer_df3.iterrows():
    id = int(row1['ID'])
    answer1 = json.loads(row1['Answer'])
    product_status = []
    time = []
    location = []
    lci_software = []
    lcia_method = []
    for a_name in answer1:
        if a_name.startswith('Product'):
            product_status.append(answer1[a_name])
        elif a_name.startswith('Time'):
            time.append(answer1[a_name])
        elif a_name.startswith('Location'):
            location.append(answer1[a_name])
        elif a_name.startswith('LCIA'):
            lcia_method.append(answer1[a_name])
        else:
            lci_software.append(answer1[a_name])
    all_results["Product_Status"].append(product_status)
    all_results["Time"].append(time)
    all_results["Location"].append(location)
    all_results["LCI_Software"].append(lci_software)
    all_results["LCIA_Method"].append(lcia_method)

for (i, row1) in answer_df4.iterrows():
    id = int(row1['ID'])
    answer1 = json.loads(row1['Answer'])
    systems_boundries = []
    for a_name in answer1:
        systems_boundries.append(answer1[a_name])
    all_results["Systems_Boundaries"].append(systems_boundries)

for (i, row1) in answer_df5.iterrows():
    id = int(row1['ID'])
    answer1 = json.loads(row1['Answer'])
    lci = []
    for a_name in answer1:
        lci.append(answer1[a_name])
    all_results["Use_Stage"].append(lci)

for (i, row1) in answer_df6.iterrows():
    id = int(row1['ID'])
    answer1 = json.loads(row1['Answer'])
    result = []
    for a_name in answer1:
        result.append(answer1[a_name])
    all_results["LCI_Source"].append(result)

for (i, row1) in answer_df7.iterrows():
    id = int(row1['ID'])
    answer1 = json.loads(row1['Answer'])
    analysis = []
    for a_name in answer1:
        analysis.append(answer1[a_name])
    all_results["LCI_Available"].append(analysis)

for (i, row1) in answer_df8.iterrows():
    id = int(row1['ID'])
    answer1 = json.loads(row1['Answer'])
    advice = []
    for a_name in answer1:
        advice.append(answer1[a_name])
    all_results["Midpoint"].append(advice)
    
for (i, row1) in answer_df9.iterrows():
    id = int(row1['ID'])
    answer1 = json.loads(row1['Answer'])
    advice = []
    for a_name in answer1:
        advice.append(answer1[a_name])
    all_results["Endpoint"].append(advice)

for (i, row1) in answer_df10.iterrows():
    id = int(row1['ID'])
    answer1 = json.loads(row1['Answer'])
    advice = []
    for a_name in answer1:
        advice.append(answer1[a_name])
    all_results["Analysis_Models"].append(advice)
#print(test_research['answer2'])

for key, value in all_results.items():
    print(f"{key}: {len(value)}")

final_df = pd.DataFrame(all_results)

final_df.to_excel('result_check_turbo.xlsx', index=False)



Plastic_Type: 20
Additive_Type: 20
Product_Type: 20
Product_Status: 20
Time: 20
Location: 20
LCI_Software: 20
LCIA_Method: 20
Systems_Boundaries: 20
Use_Stage: 20
LCI_Source: 20
LCI_Available: 20
Midpoint: 20
Endpoint: 20
Analysis_Models: 20
