### Libraries

In [1]:
from openai import OpenAI, RateLimitError, APIError, APITimeoutError, AuthenticationError, BadRequestError, NotFoundError
from google.api_core.exceptions import ResourceExhausted, RetryError, DeadlineExceeded
import google.generativeai as genai
import tiktoken

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

import time
import os
import ast
from tqdm import tqdm

### Layout

In [2]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_seq_items', None)

### Data

In [3]:
literature = np.load('literature.npy', allow_pickle=True).item()
example = np.load('example.npy', allow_pickle=True).item()

### General commands for getting response

In [4]:
text = "{{'Qualitative method': '1', 'Quantitative method': '1', 'Socio-demo-economic data': '1', 'Environmental data': '1', \
'Individuals': '0', 'Households': '1', 'Subnational groups': '1', 'National groups': '0', 'International groups': '0', \
'Urban': '0', 'Rural': '1', 'Time frame considered': '1', 'Foresight': '0', \
'Rainfall pattern / Variability': '1', 'Temperature change': '0', 'Food scarcity / Famine / Food security': '1', 'Drought / Aridity / Desertification': '1', \
'Floods': '1', 'Erosion / Soil fertilty / Land degradation / Deforestation / Salinisation': '1', 'Self assessment / Perceived environment': '1', \
'Labour migration': '1', 'Marriage migration': '0', 'Refugees': '0', 'International migration': '0', 'Cross-border migration': '0', 'Internal migration': '1', \
'Rural to urban': '1', 'Rural to rural': '1', 'Circular / Seasonal': '1', 'Long distance': '1', 'Short distance': '0', 'Temporal': '1', 'Permanent': '0', \
'Age': '1', 'Gender': '0', 'Ethnicity / Religion': '0'}}"
enc = tiktoken.encoding_for_model("gpt-4.1-mini")
MAX_TOKEN = len(enc.encode(text))

In [5]:
def api_def(provider):
    # API keys are hidden in another file
    with open('api.txt', 'r', encoding='utf-8') as file:
        lines = [line.strip() for line in file.readlines()]

        # Different LLMs need different kinds of command
        if provider == 'chatgpt':
            client = OpenAI(api_key=str(lines[0]), base_url="https://api.openai.com/v1")
            return client
        
        elif provider == 'deepseek':
            client = OpenAI(api_key=str(lines[1]), base_url="https://api.deepseek.com")
            return client
        
        elif provider == 'gemini':
            genai.configure(api_key=str(lines[2]))

In [6]:
def get_completion(prompt, provider, model, temperature):
    messages = [{"role": "system", "content": "You are an expert in climate mobility area and are conducting a systematic literature review. \
    Your task is to read the provided text and classify it according to the given properties with binary codes. \
    Do not include explanations, personal opinions or provide unrelated meta-comments in the answer. Provide only the classification results."},
               {"role": "user", "content": prompt}]

    # Different LLMs need different kinds of command
    if provider == 'gpt':
        client = api_def('chatgpt')
        response = client.responses.create(
            model=model,
            input=messages,
            temperature=temperature,
            max_output_tokens = round(MAX_TOKEN + 3)
        )
        return response.output_text
        
    elif provider == 'ds':
        client = api_def('deepseek')
        response = client.chat.completions.create(
            model=model,
            messages=messages,
            temperature=temperature,
            max_tokens = round(MAX_TOKEN * 1.2)
        )
        return response.choices[0].message.content

    elif provider == 'gemini':
        api_def('gemini')
        model_gemini = genai.GenerativeModel(model)
        response = model_gemini.generate_content(
            prompt,
            generation_config={"temperature": temperature, "max_output_tokens": round(MAX_TOKEN * 1.3)}
        )
        return response.text        

### Prompt and response processing

In [7]:
CONTEXT = f"""
We are conducting a systematic review of climate mobility literature. For each paper, your task is to assign some binary codes, namely '0' or '1', \
according to the specific indicators that I will provide, to classify the paper across multiple dimensions for downstream analysis. \n

While reviewing a paper, evaluate *only the content of this paper*. Do not use information from any other paper. Do not infer missing information from \
past tasks or earlier papers. Focus primarily on the abstract, data/methods, results, and conclusions sections. \
Do not rely on introduction, state-of-the-art, related works, or motivation sections, especially when they describe other researchers’ work. \
Only use information from those sections if they explicitly describe what *this paper* does. \n

For each index, output one and only one character: '0' (for 'No') or '1' (Yes), according to the concrete explanation below. Do not provide explanations. \
Each indicator is independent, do not infer one indicator based on another. Please follow strictly the format I will specify later. \
When information is missing or unclear, just assign '0'.
"""

In [13]:
ZERO_SHOT_PROMPT_1 = f"""

<Group Leader>: 
{CONTEXT} \n

The following 13 indicators will be used to categorize each paper, regarding its content: \n
a. Methods used for data analysis (refers only to analytical methods, not methods for data collection) (2 indicators): 1. 'Qualitative method' and \
2. 'Quantitative method'. Assign '0' for 'Not explicitly used' or '1' for 'Explicitly used'. A paper may use both (some researchers call it \
'mixed-method research'), one, or even neither of them. \n
b. Types of data used for analysis (only commenting on such points doesn't count) (2 indicators): 3. 'Socio-demo-economic data' ('demo' means \
'demographic') and 4. 'Environmental data'. Assign '0' for 'Not explicitly used' or '1' for 'Explicitly used'. Please do not care about how data is \
collected. A paper may include both, one, or even neither of them. \n
c. Focal demographic units of analysis (refers to the units of analysis, not to scale of research areas) (5 indicators): 5. 'Individuals', \
6. 'Households', 7. 'Subnational groups' (such as community and province), 8. 'National groups', 9. 'International groups'. Assign '0' for \
'Not explicitly concentrated' or '1' for 'Explicitly concentrated'. A paper may consider several, one or even none of them. \n
d. Research location types (refers to the places where investigation took place, not the places regarding migration) (2 indicators): 10. 'Urban', \
11. 'Rural'. Assign '0' for 'Not explicitly focused' or '1' for 'Explicitly focused'. A paper may focus on both, one, or even neither of them. \n
e. Temporal aspects (2 indicators): 12. 'Time frame considered' (such as using temporal analysis), 13. 'Foresight' (i.e., forecast, prediction, and \
future perspectives was/were addressed). Assign '0' for 'Not explicitly focused' or '1' for 'Explicitly focused'. \
A paper may consider both, one, or neither of them. \n

Always output the result in the following Python Dictionary structure with identical order, with each blank replaced by 0 or 1 only, and no extra text: \n
{{'Qualitative method': '_', 'Quantitative method': '_', 'Socio-demo-economic data': '_', 'Environmental data': '_', \
'Individuals': '_', 'Households': '_', 'Subnational groups': '_', 'National groups': '_', 'International groups': '_', \
'Urban': '_', 'Rural': '_', 'Time frame considered': '_', 'Foresight': '_'}}
"""

In [14]:
ZERO_SHOT_PROMPT_2 = f"""

<Group Leader>: 
{CONTEXT} \n

The following 7 indicators will be used to categorize each paper, regarding environmental stressors being considered as variables in the study \
(only assign '1' when a stressor is explicitly used for analysis, while only mentioning without evidence doesn't count): \n
1. 'Rainfall pattern / Variability', 2. 'Temperature change', 3. 'Food scarcity / Famine / Food security', \
4. 'Drought / Aridity / Desertification', 5. 'Floods', 6. 'Erosion / Soil fertility / Land degradation / Deforestation / Salinisation', \
7. 'Self assessment / Perceived environment '. \n
Note that indicator 7 indicates that the above stressors measured via human perception were included in the paper, \
no matter if data from observation was also included. Assign '0' for 'not explicitly considered' or '1' for 'explicitly considered'. \
For indicators 1 to 6, a paper may include several, one, or none of them. \n 

Always output the result in the following Python Dictionary structure with identical order, with each blank replaced by 0 or 1 only, and no extra text: \n
{{'Rainfall pattern / Variability': '_', 'Temperature change': '_', 'Food scarcity / Famine / Food security': '_', 'Drought / Aridity / Desertification': '_', \
'Floods': '_', 'Erosion / Soil fertility / Land degradation / Deforestation / Salinisation': '_', 'Self assessment / Perceived environment': '_'}}
"""

In [15]:
ZERO_SHOT_PROMPT_3 = f"""

<Group Leader>: 
{CONTEXT} \n

The following 8 indicators will be used to categorize each paper, regarding migration types discussed (only assign '1' when a type of migration is \
explicitly mentioned with evidence, while only discussing without any evidence doesn't count): \n
1. 'Labour migration' (migration related to work), 2. 'Marriage migration', 3. 'Refugees', \
4. 'International migration', 5. 'Cross-border migration' (compared to 'International migration' which indicates those migration with longer \
distance, 'Cross-border migration' denotes those migration that people move only from one nation to an adjacent one), 6. 'Internal migration' (those \
migration within one single nation), 7. 'Rural to urban', 8. 'Rural to rural'. A paper might contain several, one, or none of them. \n

Always output the result in the following Python Dictionary structure with identical order, with each blank replaced by 0 or 1 only, and no extra text: \n
{{'Labour migration': '_', 'Marriage migration': '_', 'Refugees': '_', 'International migration': '_', 'Cross-border migration': '_', 'Internal migration': '_', \
'Rural to urban': '_', 'Rural to rural': '_'}}
"""

In [16]:
ZERO_SHOT_PROMPT_4 = f"""

<Group Leader>: 
{CONTEXT} \n

The following 5 indicators will be used to categorize each paper, regarding migration patterns discussed (only assign '1' when a migration pattern is \
explicitly mentioned with evidence): \n
1. 'Circular / Seasonal', 2. 'Long distance', 3. 'Short distance', 4. 'Temporal' (migrants may come back), and 5. 'Permanent' \
(migrants will not come back). A paper can discuss several, one, or none of them. \n

Always output the result in the following Python Dictionary structure with identical order, with each blank replaced by 0 or 1 only, and no extra text: \n
{{'Circular / Seasonal': '_', 'Long distance': '_', 'Short distance': '_', 'Temporal': '_', 'Permanent': '_'}}
"""

In [17]:
ZERO_SHOT_PROMPT_5 = f"""

<Group Leader>: 
{CONTEXT} \n

The following 3 indicators will be used to categorize each paper, indicating if specific demographic characteristics are used as variables \
(only assign '1' when a characteristic is explicitly used for analysis, while only mentioning without evidence doesn't count): 1. 'Age', 2. 'Gender', \
3. 'Ethnicity / Religion'. A paper may use several, one, or none of them. \n

Always output the result in the following Python Dictionary structure with identical order, with each blank replaced by 0 or 1 only, and no extra text: \n
{{'Age': '_', 'Gender': '_', 'Ethnicity / Religion': '_'}}
"""

In [19]:
ONE_SHOT_PROMPT_1 = f"""

{ZERO_SHOT_PROMPT_1} \n

Now please try to code the following paper: \n
\"\"\"{example[0]}\"\"\"
\n
<Climate Mobility Expert>: Here is my result: \n
{{'Qualitative method': '1', 'Quantitative method': '1', 'Socio-demo-economic data': '1', 'Environmental data': '1', \
'Individuals': '0', 'Households': '1', 'Subnational groups': '1', 'National groups': '0', 'International groups': '0', \
'Urban': '0', 'Rural': '1', 'Time frame considered': '1', 'Foresight': '0'}}
"""

In [20]:
ONE_SHOT_PROMPT_2 = f"""

{ZERO_SHOT_PROMPT_2} \n

Now please try to code the following paper: \n
\"\"\"{example[0]}\"\"\"
\n
<Climate Mobility Expert>: Here is my result: \n
{{'Rainfall pattern / Variability': '1', 'Temperature change': '0', 'Food scarcity / Famine / Food security': '1', 'Drought / Aridity / Desertification': '1', \
'Floods': '1', 'Erosion / Soil fertility / Land degradation / Deforestation / Salinisation': '1', 'Self assessment / Perceived environment': '1'}}
"""

In [21]:
ONE_SHOT_PROMPT_3 = f"""

{ZERO_SHOT_PROMPT_3} \n

Now please try to code the following paper: \n
\"\"\"{example[0]}\"\"\"
\n
<Climate Mobility Expert>: Here is my result: \n
{{'Labour migration': '1', 'Marriage migration': '0', 'Refugees': '0', 'International migration': '0', 'Cross-border migration': '0', 'Internal migration': '1', \
'Rural to urban': '1', 'Rural to rural': '1'}}
"""

In [22]:
ONE_SHOT_PROMPT_4 = f"""

{ZERO_SHOT_PROMPT_4} \n

Now please try to code the following paper: \n
\"\"\"{example[0]}\"\"\"
\n
<Climate Mobility Expert>: Here is my result: \n
{{'Circular / Seasonal': '1', 'Long distance': '1', 'Short distance': '0', 'Temporal': '1', 'Permanent': '0'}}
"""

In [23]:
ONE_SHOT_PROMPT_5 = f"""

{ZERO_SHOT_PROMPT_5} \n

Now please try to code the following paper: \n
\"\"\"{example[0]}\"\"\"
\n
<Climate Mobility Expert>: Here is my result: \n
{{'Age': '1', 'Gender': '0', 'Ethnicity / Religion': '0'}}
"""

In [24]:
THREE_SHOT_PROMPT_1 = f"""

{ZERO_SHOT_PROMPT_1} \n

Now please try to code the following 3 papers: \n
\"\"\"{example}\"\"\"
\n
<Climate Mobility Expert>: OK, here is my result: \n
{{'Qualitative method': '1', 'Quantitative method': '1', 'Socio-demo-economic data': '1', 'Environmental data': '1', \
'Individuals': '0', 'Households': '1', 'Subnational groups': '1', 'National groups': '0', 'International groups': '0', \
'Urban': '0', 'Rural': '1', 'Time frame considered': '1', 'Foresight': '0'}}, \
{{'Qualitative method': '0', 'Quantitative method': '1', 'Socio-demo-economic data': '1', 'Environmental data': '1', \
'Individuals': '0', 'Households': '1', 'Subnational groups': '0', 'National groups': '0', 'International groups': '0', \
'Urban': '1', 'Rural': '1', 'Time frame considered': '1', 'Foresight': '1'}}, \
{{'Qualitative method': '0', 'Quantitative method': '1', 'Socio-demo-economic data': '1', 'Environmental data': '1', \
'Individuals': '1', 'Households': '1', 'Subnational groups': '0', 'National groups': '0', 'International groups': '0', \
'Urban': '0', 'Rural': '1', 'Time frame considered': '1', 'Foresight': '0'}}
"""

In [25]:
THREE_SHOT_PROMPT_2 = f"""

{ZERO_SHOT_PROMPT_2} \n

Now please try to code the following 3 papers: \n
\"\"\"{example}\"\"\"
\n
<Climate Mobility Expert>: OK, here is my result: \n
{{'Rainfall pattern / Variability': '1', 'Temperature change': '0', 'Food scarcity / Famine / Food security': '1', 'Drought / Aridity / Desertification': '1', \
'Floods': '1', 'Erosion / Soil fertility / Land degradation / Deforestation / Salinisation': '1', 'Self assessment / Perceived environment': '1'}}, \
{{'Rainfall pattern / Variability': '1', 'Temperature change': '1', 'Food scarcity / Famine / Food security': '0', 'Drought / Aridity / Desertification': '0', \
'Floods': '0', 'Erosion / Soil fertility / Land degradation / Deforestation / Salinisation': '0', 'Self assessment / Perceived environment': '0'}}, \
{{'Rainfall pattern / Variability': '0', 'Temperature change': '0', 'Food scarcity / Famine / Food security': '0', 'Drought / Aridity / Desertification': '0', \
'Floods': '0', 'Erosion / Soil fertility / Land degradation / Deforestation / Salinisation': '1', 'Self assessment / Perceived environment': '0'}}
"""

In [26]:
THREE_SHOT_PROMPT_3 = f"""

{ZERO_SHOT_PROMPT_3} \n

Now please try to code the following 3 papers: \n
\"\"\"{example}\"\"\"
\n
<Climate Mobility Expert>: OK, here is my result: \n
{{'Labour migration': '1', 'Marriage migration': '0', 'Refugees': '0', 'International migration': '0', 'Cross-border migration': '0', 'Internal migration': '1', \
'Rural to urban': '1', 'Rural to rural': '1'}}, \
{{'Labour migration': '1', 'Marriage migration': '0', 'Refugees': '0', 'International migration': '1', 'Cross-border migration': '0', 'Internal migration': '1', \
'Rural to urban': '1', 'Rural to rural': '0'}}, \
{{'Labour migration': '1', 'Marriage migration': '1', 'Refugees': '0', 'International migration': '0', 'Cross-border migration': '0', 'Internal migration': '1', \
'Rural to urban': '1', 'Rural to rural': '0'}}
"""

In [27]:
THREE_SHOT_PROMPT_4 = f"""

{ZERO_SHOT_PROMPT_4} \n

Now please try to code the following 3 papers: \n
\"\"\"{example}\"\"\"
\n
<Climate Mobility Expert>: OK, here is my result: \n
{{'Circular / Seasonal': '1', 'Long distance': '1', 'Short distance': '0', 'Temporal': '1', 'Permanent': '0'}}, \
{{'Circular / Seasonal': '1', 'Long distance': '1', 'Short distance': '1', 'Temporal': '1', 'Permanent': '1'}}, \
{{'Circular / Seasonal': '1', 'Long distance': '0', 'Short distance': '1', 'Temporal': '1', 'Permanent': '1'}}
"""

In [28]:
THREE_SHOT_PROMPT_5 = f"""

{ZERO_SHOT_PROMPT_5} \n

Now please try to code the following 3 papers: \n
\"\"\"{example}\"\"\"
\n
<Climate Mobility Expert>: OK, here is my result: \n
{{'Age': '1', 'Gender': '0', 'Ethnicity / Religion': '0'}}, \
{{'Age': '0', 'Gender': '0', 'Ethnicity / Religion': '0'}}, \
{{'Age': '1', 'Gender': '1', 'Ethnicity / Religion': '0'}}
"""

In [30]:
THREE_SHOT_SEPARATE_PROMPT_1 = f"""

{ZERO_SHOT_PROMPT_1} \n

Now please try to code the following paper: \n
\"\"\"{example[0]}\"\"\"
\n
<Climate Mobility Expert>: OK, here is my result: \n
{{'Qualitative method': '1', 'Quantitative method': '1', 'Socio-demo-economic data': '1', 'Environmental data': '1', \
'Individuals': '0', 'Households': '1', 'Subnational groups': '1', 'National groups': '0', 'International groups': '0', \
'Urban': '0', 'Rural': '1', 'Time frame considered': '1', 'Foresight': '0'}}, \n
<Group Leader>: Please try to code another paper below: \n
\"\"\"{example[1]}\"\"\"
\n
<Climate Mobility Expert>: OK, here is my result: \n
{{'Qualitative method': '0', 'Quantitative method': '1', 'Socio-demo-economic data': '1', 'Environmental data': '1', \
'Individuals': '0', 'Households': '1', 'Subnational groups': '0', 'National groups': '0', 'International groups': '0', \
'Urban': '1', 'Rural': '1', 'Time frame considered': '1', 'Foresight': '1'}}, \n
<Group Leader>: Please try to code another paper below: \n
\"\"\"{example[2]}\"\"\"
\n
<Climate Mobility Expert>: OK, here is my result: \n
{{'Qualitative method': '0', 'Quantitative method': '1', 'Socio-demo-economic data': '1', 'Environmental data': '1', \
'Individuals': '1', 'Households': '1', 'Subnational groups': '0', 'National groups': '0', 'International groups': '0', \
'Urban': '0', 'Rural': '1', 'Time frame considered': '1', 'Foresight': '0'}}
"""

In [65]:
THREE_SHOT_SEPARATE_PROMPT_2 = f"""

{ZERO_SHOT_PROMPT_2} \n

Now please try to code the following paper: \n
\"\"\"{example[0]}\"\"\"
\n
<Climate Mobility Expert>: OK, here is my result: \n
{{'Rainfall pattern / Variability': '1', 'Temperature change': '0', 'Food scarcity / Famine / Food security': '1', 'Drought / Aridity / Desertification': '1', \
'Floods': '1', 'Erosion / Soil fertility / Land degradation / Deforestation / Salinisation': '1', 'Self assessment / Perceived environment': '1'}}, \n
<Group Leader>: Please try to code another paper below: \n
\"\"\"{example[1]}\"\"\"
\n
<Climate Mobility Expert>: OK, here is my result: \n
{{'Rainfall pattern / Variability': '1', 'Temperature change': '1', 'Food scarcity / Famine / Food security': '0', 'Drought / Aridity / Desertification': '0', \
'Floods': '0', 'Erosion / Soil fertility / Land degradation / Deforestation / Salinisation': '0', 'Self assessment / Perceived environment': '0'}}, \n
<Group Leader>: Please try to code another paper below: \n
\"\"\"{example[2]}\"\"\"
\n
<Climate Mobility Expert>: OK, here is my result: \n
{{'Rainfall pattern / Variability': '0', 'Temperature change': '0', 'Food scarcity / Famine / Food security': '0', 'Drought / Aridity / Desertification': '0', \
'Floods': '0', 'Erosion / Soil fertility / Land degradation / Deforestation / Salinisation': '1', 'Self assessment / Perceived environment': '0'}}
"""

In [32]:
THREE_SHOT_SEPARATE_PROMPT_3 = f"""

{ZERO_SHOT_PROMPT_3} \n

Now please try to code the following paper: \n
\"\"\"{example[0]}\"\"\"
\n
<Climate Mobility Expert>: OK, here is my result: \n
{{'Labour migration': '1', 'Marriage migration': '0', 'Refugees': '0', 'International migration': '0', 'Cross-border migration': '0', 'Internal migration': '1', \
'Rural to urban': '1', 'Rural to rural': '1'}}, \n
<Group Leader>: Please try to code another paper below: \n
\"\"\"{example[1]}\"\"\"
\n
<Climate Mobility Expert>: OK, here is my result: \n
{{'Labour migration': '1', 'Marriage migration': '0', 'Refugees': '0', 'International migration': '1', 'Cross-border migration': '0', 'Internal migration': '1', \
'Rural to urban': '1', 'Rural to rural': '0'}}, \n
<Group Leader>: Please try to code another paper below: \n
\"\"\"{example[2]}\"\"\"
\n
<Climate Mobility Expert>: OK, here is my result: \n
{{'Labour migration': '1', 'Marriage migration': '1', 'Refugees': '0', 'International migration': '0', 'Cross-border migration': '0', 'Internal migration': '1', \
'Rural to urban': '1', 'Rural to rural': '0'}}
"""

In [33]:
THREE_SHOT_SEPARATE_PROMPT_4 = f"""

{ZERO_SHOT_PROMPT_4} \n

Now please try to code the following paper: \n
\"\"\"{example[0]}\"\"\"
\n
<Climate Mobility Expert>: OK, here is my result: \n
{{'Circular / Seasonal': '1', 'Long distance': '1', 'Short distance': '0', 'Temporal': '1', 'Permanent': '0'}}, \n
<Group Leader>: Please try to code another paper below: \n
\"\"\"{example[1]}\"\"\"
\n
<Climate Mobility Expert>: OK, here is my result: \n
{{'Circular / Seasonal': '1', 'Long distance': '1', 'Short distance': '1', 'Temporal': '1', 'Permanent': '1'}}, \n
<Group Leader>: Please try to code another paper below: \n
\"\"\"{example[2]}\"\"\"
\n
<Climate Mobility Expert>: OK, here is my result: \n
{{'Circular / Seasonal': '1', 'Long distance': '0', 'Short distance': '1', 'Temporal': '1', 'Permanent': '1'}}
"""

In [34]:
THREE_SHOT_SEPARATE_PROMPT_5 = f"""

{ZERO_SHOT_PROMPT_5} \n

Now please try to code the following paper: \n
\"\"\"{example[0]}\"\"\"
\n
<Climate Mobility Expert>: OK, here is my result: \n
{{'Age': '1', 'Gender': '0', 'Ethnicity / Religion': '0'}}, \n
<Group Leader>: Please try to code another paper below: \n
\"\"\"{example[1]}\"\"\"
\n
<Climate Mobility Expert>: OK, here is my result: \n
{{'Age': '0', 'Gender': '0', 'Ethnicity / Religion': '0'}}, \n
<Group Leader>: Please try to code another paper below: \n
\"\"\"{example[2]}\"\"\"
\n
<Climate Mobility Expert>: OK, here is my result: \n
{{'Age': '1', 'Gender': '1', 'Ethnicity / Religion': '0'}}
"""

In [35]:
def extract_features(paper: dict, example, provider, model, temperature, part):

    fail_count = 0
    ZERO_SHOT_PROMPT_CURRENT = globals()[f"ZERO_SHOT_PROMPT_{part}"]
    ONE_SHOT_PROMPT_CURRENT = globals()[f"ONE_SHOT_PROMPT_{part}"]
    THREE_SHOT_PROMPT_CURRENT = globals()[f"THREE_SHOT_PROMPT_{part}"]
    THREE_SHOT_SEPARATE_PROMPT_CURRENT = globals()[f"THREE_SHOT_SEPARATE_PROMPT_{part}"]
    while True:
        if example == 3:
            prompt = f"""
            Please answer in a consistent style, performing the following actions step by step: \n
            1 - Read the instructions and the examples, understand the leader's requirements and the export's work on coding papers. \n
            2 - You will be provided with a paper. Please read and give your answer according to the leader's requirement \
            just as what the expert did. \n
            You just need to return the final result of step 2, and make it in Python dictionary format as performed below. Do not explain reasoning. \n

            Here are the instructions and examples: \n"
            f"{THREE_SHOT_PROMPT_CURRENT}\n"
            Here is the new paper, please code it like the examples: \n"
            f"{paper}\n"
            "Keep in mind that output format is Python dictionary, with no extra text."
            """

        elif example == '3s':
            prompt = f"""
            Please answer in a consistent style, performing the following actions step by step: \n
            1 - Read the instructions and the examples, understand the leader's requirements and the expert's work on coding papers. \n
            2 - You will be provided with a paper. Please read and give your answer according to the leader's requirement \
            just as what the expert did. \n
            You just need to return the final result of step 2, and make it in Python dictionary format as performed below. Do not explain reasoning. \n

            Here are the instructions and examples: \n"
            f"{THREE_SHOT_SEPARATE_PROMPT_CURRENT}\n"
            Here is the new paper, please code it like the examples: \n"
            f"{paper}\n"
            "Keep in mind that output format is Python dictionary, with no extra text."
            """

        elif example == 1:
            prompt = f"""
            Please answer in a consistent style, performing the following actions step by step: \n
            1 - Read the instructions and the example, understand the leader's requirements and the expert's work on coding paper. \n
            2 - You will be provided with a paper. Please read and give your answer according to the leader's requirement \
            just as what the expert did. \n
            You just need to return the final result of step 2, and make it in Python dictionary format as performed below. Do not explain reasoning. \n

            Here are the instructions and example: \n"
            f"{ONE_SHOT_PROMPT_CURRENT}\n"
            Here is the new paper, please code it like the example: \n"
            f"{paper}\n"
            "Keep in mind that output format is Python dictionary, with no extra text."
            """
        
        elif example == 0:
            prompt = f"""
            Please answer in a consistent style, performing the following actions step by step: \n
            1 - Read the instructions, understand the leader's requirements regarding coding papers. \n
            2 - You will be provided with a paper. Please read and give your answer according to the leader's requirement. \n
            You just need to return the final result of step 2, and make it in Python dictionary format as performed below. Do not explain reasoning. \n

            Here are the instructions: \n"
            f"{ZERO_SHOT_PROMPT_CURRENT}\n"
            Here is the paper, please code it according to the requirements: \n"
            f"{paper}\n"
            "Keep in mind that output format is Python dictionary, with no extra text."
            """

        response = get_completion(prompt, provider, model, temperature)

        # Parse response when needed
        if response.strip().startswith("```"):
            lines = response.splitlines()
            if lines and lines[0].startswith("```"):
                lines = lines[1:]
            if lines and lines[-1].startswith("```"):
                lines = lines[:-1]
            response = "\n".join(lines)
        response = response.strip()
    
        try:
            data = ast.literal_eval(response)
            return data
        except Exception as e:
            fail_count += 1
            time.sleep(1)
            if fail_count >= 10:
                raise ValueError(f"Can't parse as Python dict: {response}, retried too many times") from e

In [36]:
def extract_to_df(literature: dict, example, provider, model, part):
    results = []
    temperatures = np.round(np.arange(0.0, 2.0, 0.4), 1)
        
    for t in tqdm(temperatures, leave=True):
        for run_id in range(10):
            success = False
            while not success:
                try:
                    features = extract_features(literature, example, provider, model, t, part)
                    features["provider"] = provider
                    features["model"] = model
                    features["few shot"] = example
                    features["temperature"] = t
                    features["run"] = run_id
                    results.append(features)
                    success = True
                except (RateLimitError, APIError, APITimeoutError, ResourceExhausted) as e: # AuthenticationError, BadRequestError, NotFoundError
                    time.sleep(2)
        
    df = pd.DataFrame(results)
    return df

In [37]:
def concat(literature: dict, example, provider, model):
    dfs = {}
    for part in tqdm(range(1,6)):
        dfs[f"df_{part}"] = extract_to_df(literature, example, provider, model, part)

    merged_df = pd.merge(dfs["df_1"], 
                         pd.merge(dfs["df_2"],
                                  pd.merge(dfs["df_3"],
                                           pd.merge(dfs["df_4"], dfs["df_5"], on=['provider', 'model', 'few shot', 'temperature', 'run'], how='inner'),
                                           on=['provider', 'model', 'few shot', 'temperature', 'run'], how='inner'),
                                  on=['provider', 'model', 'few shot', 'temperature', 'run'], how='inner'),
                         on=['provider', 'model', 'few shot', 'temperature', 'run'], how='inner')
    return merged_df

### Results

In [38]:
gpt4o_0 = concat(literature[0], 0, 'gpt', 'gpt-4o-mini')

  0%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|████████████████▊                                                                   | 1/5 [00:40<02:40, 40.22s/it]
[A%|█████████████████████████████████▌                                                  | 2/5 [01:11<01:44, 34.77s/it]
[A%|██████████████████████████████████████████████████▍                                 | 3/5 [01:50<01:13, 36.91s/it]
[A%|███████████████████████████████████████████████████████████████████▏                | 4/5 [02:19<00:33, 33.88s/it]
100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [02:50<00:00, 34.19s/it]
 20%|████████████████▌                                                                  | 1/5 [02:51<11:24, 171.00s/it]
[A%|                                   

In [40]:
gpt4o_1 = concat(literature[0], 1, 'gpt', 'gpt-4o-mini')

  0%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|████████████████▊                                                                   | 1/5 [00:39<02:37, 39.26s/it]
[A%|█████████████████████████████████▌                                                  | 2/5 [01:27<02:13, 44.53s/it]
[A%|██████████████████████████████████████████████████▍                                 | 3/5 [02:09<01:26, 43.30s/it]
[A%|███████████████████████████████████████████████████████████████████▏                | 4/5 [02:44<00:40, 40.08s/it]
100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [03:21<00:00, 40.20s/it]
 20%|████████████████▌                                                                  | 1/5 [03:21<13:24, 201.05s/it]
[A%|                                   

In [39]:
gpt4o_3 = concat(literature[0], 3, 'gpt', 'gpt-4o-mini')

  0%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|████████████████▊                                                                   | 1/5 [00:40<02:41, 40.49s/it]
[A%|█████████████████████████████████▌                                                  | 2/5 [01:19<01:58, 39.37s/it]
[A%|██████████████████████████████████████████████████▍                                 | 3/5 [01:56<01:16, 38.35s/it]
[A%|███████████████████████████████████████████████████████████████████▏                | 4/5 [02:30<00:36, 36.94s/it]
100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [03:18<00:00, 39.78s/it]
 20%|████████████████▌                                                                  | 1/5 [03:18<13:15, 198.92s/it]
[A%|                                   

In [41]:
gpt4o_3s = concat(literature[0], '3s', 'gpt', 'gpt-4o-mini')

  0%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|████████████████▊                                                                   | 1/5 [00:39<02:39, 39.96s/it]
[A%|█████████████████████████████████▌                                                  | 2/5 [01:19<01:58, 39.48s/it]
[A%|██████████████████████████████████████████████████▍                                 | 3/5 [02:03<01:23, 41.93s/it]
[A%|███████████████████████████████████████████████████████████████████▏                | 4/5 [02:36<00:38, 38.40s/it]
100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [03:11<00:00, 38.31s/it]
 20%|████████████████▌                                                                  | 1/5 [03:11<12:46, 191.56s/it]
[A%|                                   

In [42]:
gpt41_0 = concat(literature[0], 0, 'gpt', 'gpt-4.1-mini')

  0%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|████████████████▊                                                                   | 1/5 [00:28<01:54, 28.60s/it]
[A%|█████████████████████████████████▌                                                  | 2/5 [00:53<01:19, 26.52s/it]
[A%|██████████████████████████████████████████████████▍                                 | 3/5 [01:20<00:53, 26.80s/it]
[A%|███████████████████████████████████████████████████████████████████▏                | 4/5 [01:45<00:25, 25.98s/it]
100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [02:10<00:00, 26.12s/it]
 20%|████████████████▌                                                                  | 1/5 [02:10<08:42, 130.63s/it]
[A%|                                   

In [43]:
gpt41_1 = concat(literature[0], 1, 'gpt', 'gpt-4.1-mini')

  0%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|████████████████▊                                                                   | 1/5 [00:32<02:09, 32.39s/it]
[A%|█████████████████████████████████▌                                                  | 2/5 [00:58<01:26, 28.93s/it]
[A%|██████████████████████████████████████████████████▍                                 | 3/5 [01:26<00:56, 28.32s/it]
[A%|███████████████████████████████████████████████████████████████████▏                | 4/5 [02:00<00:30, 30.51s/it]
100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [02:30<00:00, 30.08s/it]
 20%|████████████████▌                                                                  | 1/5 [02:30<10:01, 150.44s/it]
[A%|                                   

In [44]:
gpt41_3 = concat(literature[0], 3, 'gpt', 'gpt-4.1-mini')

  0%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|████████████████▊                                                                   | 1/5 [00:37<02:28, 37.24s/it]
[A%|█████████████████████████████████▌                                                  | 2/5 [01:16<01:56, 38.72s/it]
[A%|██████████████████████████████████████████████████▍                                 | 3/5 [01:48<01:10, 35.38s/it]
[A%|███████████████████████████████████████████████████████████████████▏                | 4/5 [02:21<00:34, 34.46s/it]
100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [02:57<00:00, 35.51s/it]
 20%|████████████████▌                                                                  | 1/5 [02:57<11:50, 177.58s/it]
[A%|                                   

In [45]:
gpt41_3s = concat(literature[0], '3s', 'gpt', 'gpt-4.1-mini')

  0%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|████████████████▊                                                                   | 1/5 [00:36<02:25, 36.44s/it]
[A%|█████████████████████████████████▌                                                  | 2/5 [01:13<01:50, 36.80s/it]
[A%|██████████████████████████████████████████████████▍                                 | 3/5 [01:47<01:10, 35.39s/it]
[A%|███████████████████████████████████████████████████████████████████▏                | 4/5 [02:19<00:34, 34.32s/it]
100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [02:53<00:00, 34.68s/it]
 20%|████████████████▌                                                                  | 1/5 [02:53<11:33, 173.43s/it]
[A%|                                   

In [47]:
dsv3_0 = concat(literature[0], 0, 'ds', 'deepseek-chat')

  0%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|████████████████▊                                                                   | 1/5 [00:47<03:08, 47.04s/it]
[A%|█████████████████████████████████▌                                                  | 2/5 [01:33<02:20, 46.75s/it]
[A%|██████████████████████████████████████████████████▍                                 | 3/5 [02:22<01:35, 47.79s/it]
[A%|███████████████████████████████████████████████████████████████████▏                | 4/5 [03:07<00:46, 46.65s/it]
100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [03:52<00:00, 46.56s/it]
 20%|████████████████▌                                                                  | 1/5 [03:52<15:31, 232.82s/it]
[A%|                                   

In [48]:
dsv3_1 = concat(literature[0], 1, 'ds', 'deepseek-chat')

  0%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|████████████████▊                                                                   | 1/5 [00:46<03:07, 46.89s/it]
[A%|█████████████████████████████████▌                                                  | 2/5 [01:32<02:18, 46.02s/it]
[A%|██████████████████████████████████████████████████▍                                 | 3/5 [02:19<01:32, 46.46s/it]
[A%|███████████████████████████████████████████████████████████████████▏                | 4/5 [03:07<00:47, 47.05s/it]
100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [03:55<00:00, 47.19s/it]
 20%|████████████████▌                                                                  | 1/5 [03:55<15:43, 235.96s/it]
[A%|                                   

In [49]:
dsv3_3 = concat(literature[0], 3, 'ds', 'deepseek-chat')

  0%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|████████████████▊                                                                   | 1/5 [00:53<03:32, 53.15s/it]
[A%|█████████████████████████████████▌                                                  | 2/5 [01:44<02:36, 52.01s/it]
[A%|██████████████████████████████████████████████████▍                                 | 3/5 [02:35<01:43, 51.65s/it]
[A%|███████████████████████████████████████████████████████████████████▏                | 4/5 [03:27<00:51, 51.70s/it]
100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [04:17<00:00, 51.53s/it]
 20%|████████████████▌                                                                  | 1/5 [04:17<17:10, 257.67s/it]
[A%|                                   

In [50]:
dsv3_3s = concat(literature[0], '3s', 'ds', 'deepseek-chat')

  0%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|████████████████▊                                                                   | 1/5 [00:55<03:43, 55.91s/it]
[A%|█████████████████████████████████▌                                                  | 2/5 [01:44<02:34, 51.55s/it]
[A%|██████████████████████████████████████████████████▍                                 | 3/5 [02:34<01:42, 51.05s/it]
[A%|███████████████████████████████████████████████████████████████████▏                | 4/5 [03:24<00:50, 50.52s/it]
100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [04:11<00:00, 50.39s/it]
 20%|████████████████▌                                                                  | 1/5 [04:11<16:47, 251.97s/it]
[A%|                                   

In [51]:
gemini20_0 = concat(literature[0], 0, 'gemini', 'gemini-2.0-flash')

  0%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|████████████████▊                                                                   | 1/5 [00:13<00:53, 13.36s/it]
[A%|█████████████████████████████████▌                                                  | 2/5 [00:25<00:37, 12.51s/it]
[A%|██████████████████████████████████████████████████▍                                 | 3/5 [00:37<00:24, 12.37s/it]
[A%|███████████████████████████████████████████████████████████████████▏                | 4/5 [00:49<00:12, 12.08s/it]
100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [01:01<00:00, 12.26s/it]
 20%|████████████████▊                                                                   | 1/5 [01:01<04:05, 61.30s/it]
[A%|                                   

In [52]:
gemini20_1 = concat(literature[0], 1, 'gemini', 'gemini-2.0-flash')

  0%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|████████████████▊                                                                   | 1/5 [00:13<00:52, 13.15s/it]
[A%|█████████████████████████████████▌                                                  | 2/5 [00:26<00:39, 13.25s/it]
[A%|██████████████████████████████████████████████████▍                                 | 3/5 [00:39<00:26, 13.08s/it]
[A%|███████████████████████████████████████████████████████████████████▏                | 4/5 [00:52<00:13, 13.19s/it]
100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [01:06<00:00, 13.22s/it]
 20%|████████████████▊                                                                   | 1/5 [01:06<04:24, 66.14s/it]
[A%|                                   

In [53]:
gemini20_3 = concat(literature[0], 3, 'gemini', 'gemini-2.0-flash')

  0%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|████████████████▊                                                                   | 1/5 [00:51<03:25, 51.48s/it]
[A%|█████████████████████████████████▌                                                  | 2/5 [02:01<03:07, 62.37s/it]
[A%|██████████████████████████████████████████████████▍                                 | 3/5 [02:22<01:27, 43.66s/it]
[A%|███████████████████████████████████████████████████████████████████▏                | 4/5 [03:18<00:48, 48.55s/it]
100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [04:09<00:00, 49.85s/it]
 20%|████████████████▌                                                                  | 1/5 [04:09<16:37, 249.26s/it]
[A%|                                   

In [54]:
gemini20_3s = concat(literature[0], '3s', 'gemini', 'gemini-2.0-flash')

  0%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|████████████████▊                                                                   | 1/5 [00:16<01:07, 16.84s/it]
[A%|█████████████████████████████████▌                                                  | 2/5 [00:32<00:47, 15.90s/it]
[A%|██████████████████████████████████████████████████▍                                 | 3/5 [00:48<00:32, 16.14s/it]
[A%|███████████████████████████████████████████████████████████████████▏                | 4/5 [01:05<00:16, 16.63s/it]
100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [01:26<00:00, 17.36s/it]
 20%|████████████████▊                                                                   | 1/5 [01:26<05:47, 86.80s/it]
[A%|                                   

In [55]:
gemini25_0 = concat(literature[0], 0, 'gemini', 'gemini-2.5-flash-lite')

  0%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|████████████████▊                                                                   | 1/5 [00:12<00:51, 12.80s/it]
[A%|█████████████████████████████████▌                                                  | 2/5 [00:23<00:35, 11.84s/it]
[A%|██████████████████████████████████████████████████▍                                 | 3/5 [00:34<00:22, 11.41s/it]
[A%|███████████████████████████████████████████████████████████████████▏                | 4/5 [00:45<00:11, 11.26s/it]
100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:56<00:00, 11.37s/it]
 20%|████████████████▊                                                                   | 1/5 [00:56<03:47, 56.88s/it]
[A%|                                   

In [56]:
gemini25_1 = concat(literature[0], 1, 'gemini', 'gemini-2.5-flash-lite')

  0%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|████████████████▊                                                                   | 1/5 [00:11<00:46, 11.74s/it]
[A%|█████████████████████████████████▌                                                  | 2/5 [00:22<00:33, 11.18s/it]
[A%|██████████████████████████████████████████████████▍                                 | 3/5 [00:33<00:21, 10.94s/it]
[A%|███████████████████████████████████████████████████████████████████▏                | 4/5 [00:43<00:10, 10.81s/it]
100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:55<00:00, 11.01s/it]
 20%|████████████████▊                                                                   | 1/5 [00:55<03:40, 55.07s/it]
[A%|                                   

In [57]:
gemini25_3 = concat(literature[0], 3, 'gemini', 'gemini-2.5-flash-lite')

  0%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|████████████████▊                                                                   | 1/5 [00:14<00:58, 14.54s/it]
[A%|█████████████████████████████████▌                                                  | 2/5 [00:29<00:44, 14.95s/it]
[A%|██████████████████████████████████████████████████▍                                 | 3/5 [00:43<00:29, 14.51s/it]
[A%|███████████████████████████████████████████████████████████████████▏                | 4/5 [00:57<00:14, 14.32s/it]
100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [01:11<00:00, 14.23s/it]
 20%|████████████████▊                                                                   | 1/5 [01:11<04:44, 71.18s/it]
[A%|                                   

In [58]:
gemini25_3s = concat(literature[0], '3s', 'gemini', 'gemini-2.5-flash-lite')

  0%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|                                                                                            | 0/5 [00:00<?, ?it/s]
[A%|████████████████▊                                                                   | 1/5 [00:15<01:03, 15.77s/it]
[A%|█████████████████████████████████▌                                                  | 2/5 [00:28<00:42, 14.19s/it]
[A%|██████████████████████████████████████████████████▍                                 | 3/5 [00:43<00:29, 14.55s/it]
[A%|███████████████████████████████████████████████████████████████████▏                | 4/5 [01:07<00:18, 18.12s/it]
100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [01:22<00:00, 16.43s/it]
 20%|████████████████▊                                                                   | 1/5 [01:22<05:28, 82.15s/it]
[A%|                                   

In [59]:
df_all = pd.concat([gpt4o_0, gpt4o_1, gpt4o_3, gpt4o_3s, gpt41_0, gpt41_1, gpt41_3, gpt41_3s, dsv3_0, dsv3_1, dsv3_3, dsv3_3s, gemini25_0, gemini25_1, \
                    gemini25_3, gemini25_3s, gemini20_0, gemini20_1, gemini20_3, gemini20_3s], ignore_index=True)
df_all = df_all.astype(str)
df_all.to_csv('bi_separate_text.csv', index=False)

### Evaluation

In [66]:
LLM_result = pd.read_csv('bi_separate_text.csv')
manual_result = pd.read_excel('manual.xlsx')

In [67]:
manual_result = manual_result.iloc[[3]].drop(columns=['ID', 'AUTHOR', 'TITLE']).reset_index(drop=True)
manual_result

Unnamed: 0,Qualitative method,Quantitative method,Socio-demo-economic data,Environmental data,Individuals,Households,Subnational groups,National groups,International groups,Urban,Rural,Time frame considered,Foresight,Rainfall pattern / Variability,Temperature change,Food scarcity / Famine / Food security,Drought / Aridity / Desertification,Floods,Erosion / Soil fertility / Land degradation / Deforestation / Salinisation,Self assessment / Perceived environment,Labour migration,Marriage migration,Refugees,International migration,Cross-border migration,Internal migration,Rural to urban,Rural to rural,Circular / Seasonal,Long distance,Short distance,Temporal,Permanent,Age,Gender,Ethnicity / Religion
0,0,1,1,1,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0


In [68]:
value_cols = manual_result.columns # For comparisons
model_cols_all = LLM_result.columns[36:] # About models
model_cols = [c for c in model_cols_all if c != 'run'] # Except for 'run'

bool_df = (LLM_result[value_cols] == manual_result.iloc[0])
LLM_result['accuracy'] = bool_df.mean(axis=1)

df_model_acc = LLM_result[model_cols + ['accuracy']].reset_index(drop=True)

In [69]:
def plot(df, provider, model, few_shot, pos):    
    subset = df[(df['provider'] == provider) & (df['model'] == model) & (df['few shot'] == few_shot)]
    
    if few_shot != '3s':
        shot_label = f"{few_shot}-shot learning (together)"
    else:
        shot_label = '3-shot learning (separate)'
    
    plt.figure(figsize=(8,6))
    plt.title(f"Accuracy of {model} under {shot_label} setting at different temperatures")
    label = f"{model}-{shot_label}"
        
    sns.boxplot(data=subset, x='temperature', y='accuracy', width=0.5, showfliers=True, \
                boxprops=dict(facecolor="#ff8936", edgecolor='black', linewidth=1), \
                medianprops=dict(color='black', linewidth=1.5), \
                whiskerprops=dict(color="black", linewidth=1), \
                capprops=dict(color="black", linewidth=1), \
                flierprops=dict(marker='o', markerfacecolor="black", markersize=4, alpha=0.5))
    
    plt.xlabel('Temperature')
    plt.ylabel('Accuracy')
    plt.grid(True)
    plt.ylim(0.2, 1)
    filename = f"{model}_{few_shot}_accuracy_boxplot.png"
    filepath = os.path.join(pos, filename)
    plt.savefig(filepath, dpi=300, bbox_inches='tight')
    plt.close()

In [70]:
for provider in df_model_acc['provider'].unique():
    models_for_provider = df_model_acc[df_model_acc['provider'] == provider]['model'].unique()
    for model in models_for_provider:
        for few_shot in df_model_acc['few shot'].unique():
            plot(df_model_acc, provider, model, few_shot, 'plots/text_input_separate_dict')

In [None]:
plt.figure(figsize=(8, 7))

linestyles = {
    "0": "solid",
    "1": "dashed",
    "3": "dashdot",
    "3s": "dotted"
}

model_list = df_model_acc_avg['model'].unique()
color_map = {model: plt.cm.tab20(i) for i, model in enumerate(model_list)}

for model in df_model_acc_avg['model'].unique():
    for few_shot in df_model_acc_avg['few shot'].unique():
        subset = df_model_acc_avg[(df_model_acc_avg['model'] == model) & (df_model_acc_avg['few shot'] == few_shot)]

        linestyle = linestyles.get(few_shot, "solid")
        color = color_map[model]
        
        if few_shot != '3s':
            shot_label = f"{few_shot}shot"
        else:
            shot_label = '3shot-separate'
        label = f"{model}-{shot_label}"
        
        plt.plot(subset['temperature'], subset['accuracy'], marker='o', linestyle=linestyle, color=color, label=label)

plt.xlabel('Temperature')
plt.ylabel('Accuracy')
plt.title('Accuracy with Different Temperatures, Models and Few Shot Settings')
plt.legend(bbox_to_anchor=(1.5, 0.95), loc='upper right')
plt.grid(True)
plt.show()