In [None]:
!pip -q install google-genai

Zero shot limitation generation

In [None]:
df['response_string_all'] = df.apply(lambda row: f"""Abstract: {row['df_Abstract']}
Introduction: {row['df_Introduction']}
Related Work: {row['df_Related_Work']}
Methodology: {row['df_Methodology']}
Dataset: {row['df_Dataset']}
Conclusion: {row['df_Conclusion']}
Experiment and Results: {row['df_Experiment_and_Results']}
Other1: {row['Extra']}
Other2: {row['Extra_Top']}
""", axis=1)

# To run this code you need:
#   pip install google-genai pandas

import os
import time
import pandas as pd

from google import genai
from google.genai import types
df["generated_limitations_gemini"] = ''

# Ensure no literal "nan" creeps into the prompt
df["response_string_all"] = df["response_string_all"].fillna("")
df["retrieved_text"]      = df["retrieved_text"].fillna("")

# ——— Global list to hold each generated limitation result ———
generated_limitations: list[str] = []

# ——— Initialize the Gemini client ———
client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY"))
if client is None:
    raise ValueError("GEMINI_API_KEY environment variable not set or invalid.")

model = "gemini-1.5-flash"  # or "gemini-1.5-pro"
gen_config = types.GenerateContentConfig(response_mime_type="text/plain")

instruction = (
    "You are a helpful, respectful, and honest assistant for generating limitations or shortcomings of a research paper.\n Generate limitations or shortcomings from the scientific paper.':\n\n"
)
# instruction  = Extractor

total = len(df)
print(f"→ Starting generation for {total} samples using model '{model}'")

for idx, row in df.iterrows():
    # Build the LLM input by concatenating the two fields
    part1 = str(row["response_string_all"])
    part2 = str(row["retrieved_text"])
    combined_input = (
        part1
        + "\n\nThis is the retrieved text from cited papers:\n"
        + part2
    )

    # Prepend our instruction
    full_prompt = instruction + combined_input
    # full_prompt = instruction + part1

    # Build the request payload
    contents = [
        types.Content(
            role="user",
            parts=[types.Part.from_text(text=full_prompt)]
        )
    ]

    # Send to Gemini (streaming) and collect all text
    try:
        collected = ""
        for chunk in client.models.generate_content_stream(
            model=model,
            contents=contents,
            config=gen_config,
        ):
            collected += chunk.text

        # Even if `collected` is "", append it as-is
        generated_limitations.append(collected.strip())
        print(f"  ✓ Row {idx+1} done.")

    except Exception as e:
        print(f"  ✗ Error on row {idx+1}: {e}")
        generated_limitations.append(f"ERROR: {e}")

    # Throttle to respect ~15 RPM (i.e. ~4 seconds/request)
    if idx < total - 1:
        time.sleep(5.0)

# After the loop, put everything into a new column
df["generated_limitations_gemini"] = generated_limitations

print("\n→ All done. Check df['generated_limitations'] for results.")


Extractor

In [None]:
# To run this code you need:
#   pip install google-genai pandas

import os
import time
import pandas as pd

from google import genai
from google.genai import types
df["generated_limitations_extractor"] = ''

# Ensure no literal "nan" creeps into the prompt
df["response_string_all"] = df["response_string_all"].fillna("")
df["retrieved_text"]      = df["retrieved_text"].fillna("")

# ——— Global list to hold each generated limitation result ———
generated_limitations_extractor: list[str] = []

# ——— Initialize the Gemini client ———
client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY"))
if client is None:
    raise ValueError("GEMINI_API_KEY environment variable not set or invalid.")

model = "gemini-1.5-flash"  # or "gemini-1.5-pro"
gen_config = types.GenerateContentConfig(response_mime_type="text/plain")

instruction = (
    "Given the following text, identify and list any limitations or drawbacks mentioned or implied:\n\n" + Extractor
)
# instruction  = Extractor

total = len(df)
print(f"→ Starting generation for {total} samples using model '{model}'")

for idx, row in df.iterrows():
    # Build the LLM input by concatenating the two fields
    part1 = str(row["response_string_all"])

    full_prompt = instruction + part1

    # Build the request payload
    contents = [
        types.Content(
            role="user",
            parts=[types.Part.from_text(text=full_prompt)]
        )
    ]

    # Send to Gemini (streaming) and collect all text
    try:
        collected = ""
        for chunk in client.models.generate_content_stream(
            model=model,
            contents=contents,
            config=gen_config,
        ):
            collected += chunk.text

        # Even if `collected` is "", append it as-is
        generated_limitations.append(collected.strip())
        print(f"  ✓ Row {idx+1} done.")

    except Exception as e:
        print(f"  ✗ Error on row {idx+1}: {e}")
        generated_limitations.append(f"ERROR: {e}")

    # Throttle to respect ~15 RPM (i.e. ~4 seconds/request)
    if idx < total - 1:
        time.sleep(4.0)

# After the loop, put everything into a new column
df["generated_limitations_extractor"][:100] = generated_limitations_extractor

print("\n→ All done. Check df['generated_limitations'] for results.")


Analyzer Agent

In [None]:
# To run this code you need:
#   pip install google-genai pandas

import os
import time
import pandas as pd

from google import genai
from google.genai import types
df["generated_limitations_analyzer"] = ''

# Ensure no literal "nan" creeps into the prompt
df["response_string_all"] = df["response_string_all"].fillna("")

# ——— Global list to hold each generated limitation result ———
generated_limitations_analyzer: list[str] = []

# ——— Initialize the Gemini client ———
client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY"))
if client is None:
    raise ValueError("GEMINI_API_KEY environment variable not set or invalid.")

model = "gemini-1.5-flash"  # or "gemini-1.5-pro"
gen_config = types.GenerateContentConfig(response_mime_type="text/plain")

instruction = (
    "Given the following text, identify and list any limitations or drawbacks mentioned or implied:\n\n" + Analyzer
)
# instruction  = Extractor

total = len(df)
print(f"→ Starting generation for {total} samples using model '{model}'")

for idx, row in df.iterrows():
    # Build the LLM input by concatenating the two fields
    part1 = str(row["response_string_all"])
    full_prompt = instruction + part1

    contents = [
        types.Content(
            role="user",
            parts=[types.Part.from_text(text=full_prompt)]
        )
    ]

    # Send to Gemini (streaming) and collect all text
    try:
        collected = ""
        for chunk in client.models.generate_content_stream(
            model=model,
            contents=contents,
            config=gen_config,
        ):
            collected += chunk.text

        # Even if `collected` is "", append it as-is
        generated_limitations.append(collected.strip())
        print(f"  ✓ Row {idx+1} done.")

    except Exception as e:
        print(f"  ✗ Error on row {idx+1}: {e}")
        generated_limitations.append(f"ERROR: {e}")

    # Throttle to respect ~15 RPM (i.e. ~4 seconds/request)
    if idx < total - 1:
        time.sleep(4.0)

# After the loop, put everything into a new column
df["generated_limitations_analyzer"][:100] = generated_limitations_analyzer

print("\n→ All done. Check df['generated_limitations'] for results.")


Reviewer

In [None]:

import os
import time
import pandas as pd

from google import genai
from google.genai import types
df["generated_limitations_reviewer"] = ''

df["response_string_all"] = df["response_string_all"].fillna("")

generated_limitations_reviewer: list[str] = []

# ——— Initialize the Gemini client ———
client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY"))
if client is None:
    raise ValueError("GEMINI_API_KEY environment variable not set or invalid.")

model = "gemini-1.5-flash"  # or "gemini-1.5-pro"
gen_config = types.GenerateContentConfig(response_mime_type="text/plain")

instruction = (
    "Given the following text, identify and list any limitations or drawbacks mentioned or implied:\n\n" + Reviewer
)
# instruction  = Extractor

total = len(df)
print(f"→ Starting generation for {total} samples using model '{model}'")

for idx, row in df.iterrows():
    # Build the LLM input by concatenating the two fields
    part1 = str(row["response_string_all"])

    full_prompt = instruction + part1


    contents = [
        types.Content(
            role="user",
            parts=[types.Part.from_text(text=full_prompt)]
        )
    ]

    # Send to Gemini (streaming) and collect all text
    try:
        collected = ""
        for chunk in client.models.generate_content_stream(
            model=model,
            contents=contents,
            config=gen_config,
        ):
            collected += chunk.text

        # Even if `collected` is "", append it as-is
        generated_limitations.append(collected.strip())
        print(f"  ✓ Row {idx+1} done.")

    except Exception as e:
        print(f"  ✗ Error on row {idx+1}: {e}")
        generated_limitations.append(f"ERROR: {e}")

    # Throttle to respect ~15 RPM (i.e. ~4 seconds/request)
    if idx < total - 1:
        time.sleep(4.0)

# After the loop, put everything into a new column
df["generated_limitations_reviewer"][:100] = generated_limitations_reviewer

print("\n→ All done. Check df['generated_limitations'] for results.")


Citation

In [None]:
import os
import time
import pandas as pd

from google import genai
from google.genai import types
df["generated_limitations_citation"] = ''

df["retrieved_text"]      = df["retrieved_text"].fillna("")

# ——— Global list to hold each generated limitation result ———
generated_limitations_citation: list[str] = []

# ——— Initialize the Gemini client ———
client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY"))
if client is None:
    raise ValueError("GEMINI_API_KEY environment variable not set or invalid.")

model = "gemini-1.5-flash"  # or "gemini-1.5-pro"
gen_config = types.GenerateContentConfig(response_mime_type="text/plain")

instruction = (
    "Given the following text, identify and list any limitations or drawbacks mentioned or implied:\n\n" + Citation
)
# instruction  = Extractor

total = len(df)
print(f"→ Starting generation for {total} samples using model '{model}'")

for idx, row in df.iterrows():
    part2 = str(row["retrieved_text"])
    combined_input = (
        "\n\nThis is the retrieved text from cited papers:\n"
        + part2
    )

    # Prepend our instruction
    full_prompt = instruction + combined_input
    # full_prompt = instruction + part1

    contents = [
        types.Content(
            role="user",
            parts=[types.Part.from_text(text=full_prompt)]
        )
    ]

    # Send to Gemini (streaming) and collect all text
    try:
        collected = ""
        for chunk in client.models.generate_content_stream(
            model=model,
            contents=contents,
            config=gen_config,
        ):
            collected += chunk.text

        # Even if `collected` is "", append it as-is
        generated_limitations.append(collected.strip())
        print(f"  ✓ Row {idx+1} done.")

    except Exception as e:
        print(f"  ✗ Error on row {idx+1}: {e}")
        generated_limitations.append(f"ERROR: {e}")

    # Throttle to respect ~15 RPM (i.e. ~4 seconds/request)
    if idx < total - 1:
        time.sleep(4.0)

# After the loop, put everything into a new column
df["generated_limitations_citation"][:100] = generated_limitations_citation

print("\n→ All done. Check df['generated_limitations'] for results.")
