In [1]:
#Importing libraries
import pandas as pd   #for creating and managing structured tabular data
from pathlib import Path  #provides interface for working with file paths.
import re  #for cleaning extracted pdf text
from pypdf import PdfReader  #for reading and extracting text from PDF documents.


In [2]:

#Showing full content of each column 
#pd.set_option('display.max_colwidth', None)

# Showing full rows and columns 
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)


Step 1: Text Preparation for Congressional Record PDFs

Input : CREC-2025-03-31-pt1-PgH1369.pdf

Output: paragraphs with columns:

doc_id, paragraph_id, paragraph_text


In [3]:
#Importing the pdf file

PDF_PATH = Path("CREC-2025-03-31-pt1-PgH1369.pdf")
DOC_ID   = "CREC_2025_03_31_HOUSE"

In [4]:
# Patterns to remove (headers, footers, metadata, page codes)
HEADER_PATTERNS = [
    r"^CONGRESSIONAL RECORD",          # page header
    r"^H\d{3,4}\b",                    # like H1369, H1370.
    r"^VerDate\b",                     # VerDate Sep 11 2014...
    r"^Jkt \d+",                       # Jkt 059060
    r"^PO 00000\b",
    r"^Frm \d+",
    r"^Fmt \d+",
    r"^Sfmt \d+",
    r"^[A-Z]:\\CR\\FM\\",              # E:\CR\FM\...
    r"^b \d{3,4}\b$",                  # b 2110, b 2120 (column/time markers)
]

header_regexes = [re.compile(p) for p in HEADER_PATTERNS]

In [5]:
#function that checks whether a given line of text is a header or footer

def is_header_or_footer(line: str) -> bool:
    s = line.strip()
    if not s:
        return False
    return any(r.match(s) for r in header_regexes)

In [6]:
# fuction that checks Speaker markers
SPEAKER_RE = re.compile(
    r"^(Mr\.|Ms\.|Mrs\.|Miss|Madam Speaker|Mr\. Speaker|The SPEAKER|The CHAIR)"
)

def is_speaker_line(line: str) -> bool:
    return bool(SPEAKER_RE.match(line.strip()))

In [7]:
#Functoion for Joining lines with breaking hyphens,into making them one word

def normalize_hyphenation(prev: str, curr: str) -> str:

    prev = prev.rstrip()
    if prev.endswith("-"):
        return prev[:-1] + curr.lstrip()
    else:
        return prev + " " + curr.lstrip()

In [8]:
# converting the PDF into raw page text

reader = PdfReader(str(PDF_PATH))
pages = [page.extract_text() or "" for page in reader.pages]

df_pages = pd.DataFrame(
    {"page_num": range(1, len(pages) + 1),
     "raw_text": pages}
)

df_pages.head()

#NOTE---THE \n mean line break

TimeoutError: [Errno 60] Operation timed out

In [None]:
# Splitting pages into lines (before cleaning)

records = []
for i, page_text in enumerate(pages, start=1):
    for j, line in enumerate(page_text.splitlines(), start=1):
        records.append(
            {"page_num": i, "line_num": j, "raw_line": line}
        )

df_lines = pd.DataFrame(records)

df_lines.head(20)


Unnamed: 0,page_num,line_num,raw_line
0,1,1,"CONGRESSIONAL RECORD — HOUSE H1369 March 31, 2025"
1,1,2,to the bond markets because the bond
2,1,3,markets are basically about to run this
3,1,4,country. If you have to sell $6 billion a
4,1,5,"day, $60,000, $70,000 every second,"
5,1,6,maybe you need to pay attention to
6,1,7,your bankers who you are having to
7,1,8,"sell your debt to, to communicate to"
8,1,9,those debt markets we are serious and
9,1,10,"looking at ways to use technology, bet-"


In [None]:
# Marking and dropping header/footer lines

df_lines["is_header_footer"] = df_lines["raw_line"].apply(is_header_or_footer)
df_lines["is_speaker_line"] = df_lines["raw_line"].apply(is_speaker_line)

In [None]:
#checking those lines which are headers and footers
df_lines[df_lines["is_header_footer"]].head(10)

Unnamed: 0,page_num,line_num,raw_line,is_header_footer,is_speaker_line
0,1,1,"CONGRESSIONAL RECORD — HOUSE H1369 March 31, 2025",True,False
58,1,59,b 2110,True,False
237,1,238,"VerDate Sep 11 2014 10:16 Apr 01, 2025 Jkt 059060 PO 00000 Frm 00029 Fmt 7634 Sfmt 0634 E:\CR\FM\K31MR7.063 H31MRPT1dmwilson on DSK7X7S144PROD with HOUSE",True,False
238,2,1,"CONGRESSIONAL RECORD — HOUSEH1370 March 31, 2025",True,False
251,2,14,b 2120,True,False
469,2,232,b 2130,True,False
474,2,237,"VerDate Sep 11 2014 10:16 Apr 01, 2025 Jkt 059060 PO 00000 Frm 00030 Fmt 7634 Sfmt 0634 E:\CR\FM\K31MR7.064 H31MRPT1dmwilson on DSK7X7S144PROD with HOUSE",True,False
475,3,1,"CONGRESSIONAL RECORD — HOUSE H1371 March 31, 2025",True,False
709,3,235,"VerDate Sep 11 2014 10:16 Apr 01, 2025 Jkt 059060 PO 00000 Frm 00031 Fmt 7634 Sfmt 0634 E:\CR\FM\K31MR7.067 H31MRPT1dmwilson on DSK7X7S144PROD with HOUSE",True,False


In [None]:
#checking those lines which are speaker lines
df_lines[df_lines["is_speaker_line"]].head(10)

Unnamed: 0,page_num,line_num,raw_line,is_header_footer,is_speaker_line
45,1,46,"Mr. Speaker, we are now starting to",False,True
59,1,60,"Mr. Speaker, we are now running",False,True
83,1,84,"Mr. Speaker, I yield back the balance",False,True
93,1,94,"Mr. ESPAILLAT. Mr. Speaker, I ask",False,True
99,1,100,The SPEAKER pro tempore. Is there,False,True
103,1,104,"Mr. ESPAILLAT. Mr. Speaker, to-",False,True
149,1,150,"Mr. Speaker, the Congressional His-",False,True
182,1,183,"Mr. Speaker, I yield to the gentleman",False,True
185,1,186,"Mr. CASTRO of Texas. Mr. Speaker,",False,True
225,1,226,"Mr. Speaker, Rau´ l will be missed. My",False,True


In [None]:
# Keepping only non-header/footer lines for paragraph building
df_lines_keep = df_lines[~df_lines["is_header_footer"]].copy()

df_lines_keep.head(10)

Unnamed: 0,page_num,line_num,raw_line,is_header_footer,is_speaker_line
1,1,2,to the bond markets because the bond,False,False
2,1,3,markets are basically about to run this,False,False
3,1,4,country. If you have to sell $6 billion a,False,False
4,1,5,"day, $60,000, $70,000 every second,",False,False
5,1,6,maybe you need to pay attention to,False,False
6,1,7,your bankers who you are having to,False,False
7,1,8,"sell your debt to, to communicate to",False,False
8,1,9,those debt markets we are serious and,False,False
9,1,10,"looking at ways to use technology, bet-",False,False
10,1,11,ter models when obesity is the single,False,False


In [None]:
#Building paragraphs from kept lines

paragraphs = []
current_para = ""

for _, row in df_lines_keep.iterrows():
    line = row["raw_line"]

    # Blank line → paragraph boundary
    if not line.strip():
        if current_para.strip():
            paragraphs.append(current_para.strip())
            current_para = ""
        continue

    # Speaker line → paragraph boundary, but skip speaker text
    if row["is_speaker_line"]:
        if current_para.strip():
            paragraphs.append(current_para.strip())
            current_para = ""
        continue

    # Accumulate into current paragraph
    if not current_para:
        current_para = line.strip()
    else:
        current_para = normalize_hyphenation(current_para, line)

# Flush last paragraph
if current_para.strip():
    paragraphs.append(current_para.strip())

df_paragraphs = pd.DataFrame(
    {"doc_id": DOC_ID,
     "paragraph_id": range(1, len(paragraphs) + 1),
     "paragraph_text": paragraphs}
)


df_paragraphs.head(10)

#Lines with MR SPeaker are removed


Unnamed: 0,doc_id,paragraph_id,paragraph_text
0,CREC_2025_03_31_HOUSE,1,"to the bond markets because the bond markets are basically about to run this country. If you have to sell $6 billion a day, $60,000, $70,000 every second, maybe you need to pay attention to your bankers who you are having to sell your debt to, to communicate to those debt markets we are serious and looking at ways to use technology, better models when obesity is the single biggest expense in our society. Yes, we are not supposed to say that. Mr. Speaker, please don’t tell anyone. Last year, the Joint Economic economists calculated $9.1 trillion additional of healthcare spending. Is it moral with what we do in food policy, nutritional support, how we deliver healthcare? Maybe the concept of helping our brothers and sisters live healthier when 31 percent of Medicare spending is diabetes—33 percent of all healthcare overall is just diabetes. Is that Republican or Democrat? It is just trying to get your policy alignment to the fact we are buried in debt and getting older as a society. I think in a decade, 23 percent of our population is 65 and up. We now know that we are having this remarkable shortage of young people. We are already potentially on the cusp of having more deaths than births in our country. In a couple of weeks, we are trying to roll out a STEM-based, talent-based immigration bill because for the economy to grow and stabilize, we don’t have a choice. People say, David, you are not allowed to talk about immigration. People won’t understand it. Well, they understand the economic survival of you still getting your benefits when you are a senior. We can make this work."
1,CREC_2025_03_31_HOUSE,2,"run into articles saying that we are putting the extraordinary privilege. What are the two extraordinary privileges America has? Our currency—the world borrows in our currency, meaning the fact of the matter is when we sell debt, there is a demand to hold U.S. dollars denominated. Then, the second thing is people want to live here. They want to invest here. They want to be educated here. They want to be entrepreneurs here."
2,CREC_2025_03_31_HOUSE,3,"into multiple articles saying some of the things we are doing, particularly our debt stack which is putting our extraordinary privilege of the country at risk. It doesn’t have to be this way. A couple of smart economists say we have 3 or 4 more years. At that point the debt gets so hard to manage. The Federal Reserve last week took us from a 2.1 GDP down to 1.7. Just that movement is almost $200 billion a year in tax collection. Just that GDP reduction the Federal Reserve calculated for the next 3 years, taking that out to 10, that is more money than everything in our budget reconciliation. The lack of understanding of the inner dynamics of our debt, the interest, and these dollars terrifies me because there is a path. There is a path for this to work, Mr. Speaker, but we are living on a razor’s edge because we are not doing the hard work."
3,CREC_2025_03_31_HOUSE,4,"of my time. f HONORING THE LIFE OF REPRESENTATIVE RAU´ L GRIJALVA (Under the Speaker’s announced policy of January 3, 2025, Mr. ESPAILLAT of New York was recognized for 30 minutes.) GENERAL LEAVE"
4,CREC_2025_03_31_HOUSE,5,unanimous consent that all Members may have 5 legislative days in which to revise and extend their remarks and include extraneous material into the record.
5,CREC_2025_03_31_HOUSE,6,objection to the request of the gentleman from New York? There was no objection.
6,CREC_2025_03_31_HOUSE,7,"night I rise to honor the life and legacy of Congressman Rau´ l Grijalva. Rau´ l Grijalva passed away peacefully on March 13 of this year, surrounded by his wife and three daughters. Born in Tucson, Arizona, on February 19, 1948, he was raised in a close-knit community with his sisters, Lydia and Norma. A proud Sunnyside High graduate, he met and married the love of his life, Ramona Garduno, in 1971. Together, they raised three daughters: Adelita, Raquel, and Marisa. They built a life rooted in service and activism. His parents instilled in him the value of hard work, perseverance, and giving back to the community. He dedicated over 50 years to public service, beginning with the Tucson Unified School Board in 1974, and later serving on the Pima County Board of Supervisors. As a U.S. Congressman for 22 years, he championed education, environmental protection, social justice, and Native-American sovereignty. His leadership helped secure the 2023 designation of the Baaj Nwaavjo I’tah Kukveni National Monument, protecting nearly 1 million acres of Tribal land. Known as the most liberal Member of Congress, he never wavered in his fight for justice and equality. He never shied away from that dignified title. A devoted husband, father, and Tata, he cherished his family and treasured time with his grandchildren. He had a sharp wit about him, a deep love for music and books, and a knack for doodling during meetings. Many of us saw him do his artwork in some our committee meetings. His staff and volunteers, the Grijalvistas, were like family, standing by him in his lifelong pursuit of progress."
7,CREC_2025_03_31_HOUSE,8,"panic Caucus is standing firm with his family during this time of sorrow and need. I know that they are watching this proceeding. I want to salute them and tell them we love them and we stand with them. Rau´ l Grijalva’s unwavering commitment to justice and his community will continue to inspire generations to come. Since coming to Congress and joining the Natural Resources Committee, Rau´ l Grijalva was one of the most outspoken champions for environmental justice, public lands, indigenous rights, climate change, and corporate accountability. He was a visionary leader, both on the committee and as co-chair of the Progressive Caucus, amplifying the voices of historically silenced communities. From Louisiana’s Cancer Alley to Alaskan Native communities in the Arctic, he fought for those most impacted by environmental harm. Under his chairmanship in the 117th Congress, the Natural Resources Committee signed 149 natural resource bills into law. In the minority, he proved that aggressive and effective oversight was possible, holding the Trump administration accountable and exposing corruption."
8,CREC_2025_03_31_HOUSE,9,from Texas (Mr. C ASTRO).
9,CREC_2025_03_31_HOUSE,10,"it is an honor to be here tonight with members of the Congressional Hispanic Caucus, and with our Democratic leader as well, to celebrate and remember a man who made a difference: Rau ´ l Grijalva. He never forgot where he came from. Just as importantly, he never forgot why he came. He was somebody who lent a voice to the vulnerable and to the folks who had been discarded and forgotten. He was a man of his community. I served 10 years in Austin in the State legislature and am now in year 13 here. As I meet all the people that represent and serve the districts that they do, I always wonder how closely each person represents the average person walking down the street in their district. Rau ´ l Grijalva was that every person. To me, that is one of the biggest compliments that can be paid not only to the people who sent him here but also to him. He left the same way he came in, a man of principle who stood up for what he believed, who fought for the people that he represented, and always spoke truth to power. He was a legend of Mexican-American activism, known across the country for protecting our national parks, the environment, fighting for early childhood education and higher education, and uplifting immigrants and marginalized communities. As all of us know, he was also a kind and brilliant man. He had a kind heart but a fierce spirit. He was a fearless voice for Arizonans and all Americans."


In [None]:
# PROMPT BUILDER
#This function builds the full natural‑language prompt that gets sent to the LLM
def build_esa_prompt(paragraph_text: str) -> str:
    return f"""
Am analyzing U.S congressional text.

Task: Label ONLY as ESA_RELEVANT or NOT_RELEVANT.

ESA-relevant: Mentions Endangered Species Act, endangered/threatened species policy, 
critical habitat, Section 7/9, listings/delistings, ESA implementation/critique.

NOT-relevant: Procedural, ceremonial, generic environment, non-ESA wildlife.

Examples:
"Endangered Species Act blocks harvests" → ESA_RELEVANT
"Mr. Speaker, I yield back" → NOT_RELEVANT

Paragraph: "{paragraph_text}"

Respond EXACTLY: ESA_RELEVANT or NOT_RELEVANT
"""

In [None]:
import openai
from my_sk import my_skk  # getting my_skk from the python file my_sk

openai.api_key = my_skk  # Legacy style (works with your openai.chat.completions.create)
print("✅ API configured")  # Test


✅ API configured


In [None]:
# #Am testing the chatbot so that i get get a response by passing the model i want to use and then passing the messages

# #Messages is going to be a list of dicti


# response = openai.chat.completions.create(
#     model="gpt-4o-mini",  # Which AI model to use (cheap/fast version)
#     messages=[{"role": "user", "content": "Say hello"}]
# )
# print(response.choices[0].message.content)


role: "user": Tells the model "this is what a human said"

content: The actual text/prompt: "Say hello"

In [None]:
# calling the LLM
def call_llm(prompt: str) -> str:
    response = openai.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=10,
        temperature=0.0,  # Zero for exact labels
    )
    return response.choices[0].message.content.strip()


In [None]:
# CLASSIFIER
def classify_esa_relevance(paragraph_text: str) -> str:
    prompt = build_esa_prompt(paragraph_text)
    raw = call_llm(prompt)
    label = raw.strip().upper()
    if "ESA_RELEVANT" in label:
        return "ESA_RELEVANT"
    return "NOT_RELEVANT"

In [None]:
# RUN ON FULL DATAFRAME (add esa_relevance_llm column)
print(f"Labeling {len(df_paragraphs)} paragraphs...")
df_paragraphs["esa_relevance_llm"] = df_paragraphs["paragraph_text"].apply(classify_esa_relevance)

Labeling 24 paragraphs...


In [None]:
# RESULTS
print("\n✅ CLASSIFICATION COMPLETE")
print(df_paragraphs["esa_relevance_llm"].value_counts())
print(f"ESA-relevant: {sum(df_paragraphs['esa_relevance_llm']=='ESA_RELEVANT')} / {len(df_paragraphs)}")


✅ CLASSIFICATION COMPLETE
esa_relevance_llm
NOT_RELEVANT    23
ESA_RELEVANT     1
Name: count, dtype: int64
ESA-relevant: 1 / 24


In [None]:
# VIEW SAMPLES
print("\nSAMPLE RESULTS:")
display(df_paragraphs[["paragraph_id", "paragraph_text", "esa_relevance_llm"]].head(10))


SAMPLE RESULTS:


Unnamed: 0,paragraph_id,paragraph_text,esa_relevance_llm
0,1,"to the bond markets because the bond markets are basically about to run this country. If you have to sell $6 billion a day, $60,000, $70,000 every second, maybe you need to pay attention to your bankers who you are having to sell your debt to, to communicate to those debt markets we are serious and looking at ways to use technology, better models when obesity is the single biggest expense in our society. Yes, we are not supposed to say that. Mr. Speaker, please don’t tell anyone. Last year, the Joint Economic economists calculated $9.1 trillion additional of healthcare spending. Is it moral with what we do in food policy, nutritional support, how we deliver healthcare? Maybe the concept of helping our brothers and sisters live healthier when 31 percent of Medicare spending is diabetes—33 percent of all healthcare overall is just diabetes. Is that Republican or Democrat? It is just trying to get your policy alignment to the fact we are buried in debt and getting older as a society. I think in a decade, 23 percent of our population is 65 and up. We now know that we are having this remarkable shortage of young people. We are already potentially on the cusp of having more deaths than births in our country. In a couple of weeks, we are trying to roll out a STEM-based, talent-based immigration bill because for the economy to grow and stabilize, we don’t have a choice. People say, David, you are not allowed to talk about immigration. People won’t understand it. Well, they understand the economic survival of you still getting your benefits when you are a senior. We can make this work.",NOT_RELEVANT
1,2,"run into articles saying that we are putting the extraordinary privilege. What are the two extraordinary privileges America has? Our currency—the world borrows in our currency, meaning the fact of the matter is when we sell debt, there is a demand to hold U.S. dollars denominated. Then, the second thing is people want to live here. They want to invest here. They want to be educated here. They want to be entrepreneurs here.",NOT_RELEVANT
2,3,"into multiple articles saying some of the things we are doing, particularly our debt stack which is putting our extraordinary privilege of the country at risk. It doesn’t have to be this way. A couple of smart economists say we have 3 or 4 more years. At that point the debt gets so hard to manage. The Federal Reserve last week took us from a 2.1 GDP down to 1.7. Just that movement is almost $200 billion a year in tax collection. Just that GDP reduction the Federal Reserve calculated for the next 3 years, taking that out to 10, that is more money than everything in our budget reconciliation. The lack of understanding of the inner dynamics of our debt, the interest, and these dollars terrifies me because there is a path. There is a path for this to work, Mr. Speaker, but we are living on a razor’s edge because we are not doing the hard work.",NOT_RELEVANT
3,4,"of my time. f HONORING THE LIFE OF REPRESENTATIVE RAU´ L GRIJALVA (Under the Speaker’s announced policy of January 3, 2025, Mr. ESPAILLAT of New York was recognized for 30 minutes.) GENERAL LEAVE",NOT_RELEVANT
4,5,unanimous consent that all Members may have 5 legislative days in which to revise and extend their remarks and include extraneous material into the record.,NOT_RELEVANT
5,6,objection to the request of the gentleman from New York? There was no objection.,NOT_RELEVANT
6,7,"night I rise to honor the life and legacy of Congressman Rau´ l Grijalva. Rau´ l Grijalva passed away peacefully on March 13 of this year, surrounded by his wife and three daughters. Born in Tucson, Arizona, on February 19, 1948, he was raised in a close-knit community with his sisters, Lydia and Norma. A proud Sunnyside High graduate, he met and married the love of his life, Ramona Garduno, in 1971. Together, they raised three daughters: Adelita, Raquel, and Marisa. They built a life rooted in service and activism. His parents instilled in him the value of hard work, perseverance, and giving back to the community. He dedicated over 50 years to public service, beginning with the Tucson Unified School Board in 1974, and later serving on the Pima County Board of Supervisors. As a U.S. Congressman for 22 years, he championed education, environmental protection, social justice, and Native-American sovereignty. His leadership helped secure the 2023 designation of the Baaj Nwaavjo I’tah Kukveni National Monument, protecting nearly 1 million acres of Tribal land. Known as the most liberal Member of Congress, he never wavered in his fight for justice and equality. He never shied away from that dignified title. A devoted husband, father, and Tata, he cherished his family and treasured time with his grandchildren. He had a sharp wit about him, a deep love for music and books, and a knack for doodling during meetings. Many of us saw him do his artwork in some our committee meetings. His staff and volunteers, the Grijalvistas, were like family, standing by him in his lifelong pursuit of progress.",NOT_RELEVANT
7,8,"panic Caucus is standing firm with his family during this time of sorrow and need. I know that they are watching this proceeding. I want to salute them and tell them we love them and we stand with them. Rau´ l Grijalva’s unwavering commitment to justice and his community will continue to inspire generations to come. Since coming to Congress and joining the Natural Resources Committee, Rau´ l Grijalva was one of the most outspoken champions for environmental justice, public lands, indigenous rights, climate change, and corporate accountability. He was a visionary leader, both on the committee and as co-chair of the Progressive Caucus, amplifying the voices of historically silenced communities. From Louisiana’s Cancer Alley to Alaskan Native communities in the Arctic, he fought for those most impacted by environmental harm. Under his chairmanship in the 117th Congress, the Natural Resources Committee signed 149 natural resource bills into law. In the minority, he proved that aggressive and effective oversight was possible, holding the Trump administration accountable and exposing corruption.",NOT_RELEVANT
8,9,from Texas (Mr. C ASTRO).,NOT_RELEVANT
9,10,"it is an honor to be here tonight with members of the Congressional Hispanic Caucus, and with our Democratic leader as well, to celebrate and remember a man who made a difference: Rau ´ l Grijalva. He never forgot where he came from. Just as importantly, he never forgot why he came. He was somebody who lent a voice to the vulnerable and to the folks who had been discarded and forgotten. He was a man of his community. I served 10 years in Austin in the State legislature and am now in year 13 here. As I meet all the people that represent and serve the districts that they do, I always wonder how closely each person represents the average person walking down the street in their district. Rau ´ l Grijalva was that every person. To me, that is one of the biggest compliments that can be paid not only to the people who sent him here but also to him. He left the same way he came in, a man of principle who stood up for what he believed, who fought for the people that he represented, and always spoke truth to power. He was a legend of Mexican-American activism, known across the country for protecting our national parks, the environment, fighting for early childhood education and higher education, and uplifting immigrants and marginalized communities. As all of us know, he was also a kind and brilliant man. He had a kind heart but a fierce spirit. He was a fearless voice for Arizonans and all Americans.",NOT_RELEVANT


In [None]:
# EXTRACT & SAVE ESA CORPUS
df_esa = df_paragraphs[df_paragraphs["esa_relevance_llm"] == "ESA_RELEVANT"].copy()

df_esa.head()

Unnamed: 0,doc_id,paragraph_id,paragraph_text,esa_relevance_llm
20,CREC_2025_03_31_HOUSE,21,"lege and honor to recognize and celebrate the remarkable contributions of Congressman Rau´l Grijalva—an advocate, a leader, and a true champion for our environment, especially the protection of the most endangered species. From the very beginning of his tenure in public service, Congressman Grijalva has been a tireless advocate for the preservation of our planet’s most vulnerable creatures. Whether it’s a rare desert butterfly, sea turtle, or a small fish, Congressman Grijalva has dedicated his career to ensuring that these species—and the ecosystems they inhabit— are protected for generations to come. In a time when climate change, habitat destruction, and human encroachment threaten biodiversity at unprecedented levels, Congressman Grijalva has consistently fought for policies that safeguard endangered species. As Chairman and Ranking Member, he has been a leading voice on the House Natural Resources Committee, pushing for stronger protections under the Endangered Species Act and advocating for increased funding to support conservation efforts. One of his most significant accomplishments has been his leadership in securing critical protections for species in the Southwestern United States, where his home state of Arizona sits at the intersection of diverse and fragile ecosystems. Congressman Grijalva has worked tirelessly to preserve the habitats of species like the Mexican gray wolf and the jaguar, whose survival has been threatened by deforestation, poaching, and illegal hunting. His efforts are a testament to his deep commitment to preserving not just the species themselves but also the delicate balance of nature. In addition to his work on endangered species, Congressman Grijalva has shown an unwavering commitment to environmental justice. He understands that the fight to protect our wildlife goes hand in hand with the fight to protect our communities—particularly those that are most vulnerable to environmental degradation. He has worked to ensure that conservation efforts don’t just benefit the wildlife we cherish but also the people who rely on healthy ecosystems for their livelihoods and well-being. Congressman Grijalva’s advocacy has also extended to the legislative arena, where he has played a crucial role in shaping and passing key pieces of legislation aimed at combating the extinction crisis. His work in advancing the Recovering America’s Wildlife Act, a bill that seeks to fund state-led conservation efforts for atrisk species, has the potential to change the landscape of wildlife conservation in this country. His vision was clear: we must act now to preserve biodiversity, before more species are lost. Through his leadership, passion, and vision, Congressman Grijalva has not only safeguarded the lives of endangered species but has also inspired a new generation of environmental advocates to continue the fight for the planet’s most precious resources. As we honor Congressman Grijalva, let us reflect on the profound impact his work has had on the world around us. Let us also renew our own commitment to protecting endangered species, preserving their habitats, and ensuring that future generations will have the opportunity to marvel at the beauty and wonder of our planet’s incredible biodiversity. I thank Congressman Grijalva, for his tireless dedication to our environment, for our lands, waters, and wildlife, and for making our country a cleaner, safer, better place for all Americans. We worked closely together for many years on Natural Resources on many important issues, and I will always be thankful for his leadership, partnership, and friendship. He was a fighter until the end, and his work will always be remembered by the countless people it has impacted. Above all, he was my friend. He always had my back and could always find a way to make me smile when I was down. He inspired hope in all of us. He fought the good fight. He will be truly missed. We are deeply grateful for his service and inspired by his example. His legacy will continue to inspire and guide us in the years to come.",ESA_RELEVANT


In [None]:
# PROMPT BUILDER
#This function builds the full natural‑language prompt that gets sent to the LLM
def build_esa_prompt(paragraph_text: str) -> str:
    return f"""
Am analyzing U.S congressional text.

Task: Label ONLY as ESA_RELEVANT or NOT_RELEVANT.

ESA-relevant: Mentions Endangered Species Act, endangered/threatened species policy, 
critical habitat, Section 7/9, listings/delistings, ESA implementation/critique.

NOT-relevant: Procedural, ceremonial, generic environment, non-ESA wildlife.

Examples:
"Endangered Species Act blocks harvests" → ESA_RELEVANT
"Mr. Speaker, I yield back" → NOT_RELEVANT

Paragraph: "{paragraph_text}"

Respond EXACTLY: ESA_RELEVANT or NOT_RELEVANT
"""