In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = 'https://birdcount.in/tag/ebird-monthly-challenge/page/3/?el_dbe_page'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

# Container for posts
articles = soup.select('article.et_pb_post_extra')

rows = []
for art in articles:
    # Title & link
    h3 = art.select_one('h3.entry-title a')
    title = h3.text.strip() if h3 else ''
    link = h3['href'] if h3 else ''

    # Content/summary
    summary_tag = art.select_one('.post-data p')
    content = summary_tag.text.strip() if summary_tag else ''

    # Meta info
    meta = art.select_one('p.post-meta')
    date = comments = reading = ''
    if meta:
        spans = meta.find_all('span')
        if len(spans) >= 3:
            date = spans[0].text.strip()
            comments = spans[1].text.strip()
            reading = spans[2].text.strip()

    # Image URL
    img = art.select_one('.post-media img')
    img_url = img['src'] if img else ''

    rows.append([img_url, title, link, content, date, comments, reading])

# Create DataFrame and export
df = pd.DataFrame(rows, columns=[
    'Image URL', 'Title', 'Article URL', 'Content', 'Date', 'Comments', 'Read Time'
])
df.to_excel('ebird_challenges_page3.xlsx', index=False)
print(f"✅ Extracted {len(rows)} rows")


✅ Extracted 12 rows


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

base_url = 'https://birdcount.in/tag/ebird-monthly-challenge'
page = 1
rows = []

while True:
    url = f'{base_url}/page/{page}/?el_dbe_page'
    print(f'🔄 Fetching page {page}: {url}')
    response = requests.get(url)
    if response.status_code != 200:
        print(f'❌ Failed to load page {page}')
        break

    soup = BeautifulSoup(response.text, 'html.parser')
    articles = soup.select('article.et_pb_post_extra')
    if not articles:
        print('✅ No more articles found. Ending scrape.')
        break

    for art in articles:
        h3 = art.select_one('h3.entry-title a')
        title = h3.text.strip() if h3 else ''
        link = h3['href'] if h3 else ''

        summary_tag = art.select_one('.post-data p')
        content = summary_tag.text.strip() if summary_tag else ''

        meta = art.select_one('p.post-meta')
        date = comments = reading = ''
        if meta:
            spans = meta.find_all('span')
            if len(spans) >= 3:
                date = spans[0].text.strip()
                comments = spans[1].text.strip()
                reading = spans[2].text.strip()

        img = art.select_one('.post-media img')
        img_url = img['src'] if img else ''

        rows.append([img_url, title, link, content, date, comments, reading])

    # Check if there's a "Load More" button
    load_more = soup.select_one('a.el-load-more')
    if not load_more:
        print(f'🛑 No "Load More" button found on page {page}. Done.')
        break

    page += 1
    time.sleep(1)  # Be polite to the server

# Save to Excel
df = pd.DataFrame(rows, columns=[
    'Image URL', 'Title', 'Article URL', 'Content', 'Date', 'Comments', 'Read Time'
])
df.to_excel('ebird_challenges_all.xlsx', index=False)
print(f'✅ Scraped {len(df)} articles and saved to ebird_challenges_all.xlsx')


🔄 Fetching page 1: https://birdcount.in/tag/ebird-monthly-challenge/page/1/?el_dbe_page
🔄 Fetching page 2: https://birdcount.in/tag/ebird-monthly-challenge/page/2/?el_dbe_page
🔄 Fetching page 3: https://birdcount.in/tag/ebird-monthly-challenge/page/3/?el_dbe_page
🔄 Fetching page 4: https://birdcount.in/tag/ebird-monthly-challenge/page/4/?el_dbe_page
🔄 Fetching page 5: https://birdcount.in/tag/ebird-monthly-challenge/page/5/?el_dbe_page
🔄 Fetching page 6: https://birdcount.in/tag/ebird-monthly-challenge/page/6/?el_dbe_page
🔄 Fetching page 7: https://birdcount.in/tag/ebird-monthly-challenge/page/7/?el_dbe_page
🔄 Fetching page 8: https://birdcount.in/tag/ebird-monthly-challenge/page/8/?el_dbe_page
🔄 Fetching page 9: https://birdcount.in/tag/ebird-monthly-challenge/page/9/?el_dbe_page
🔄 Fetching page 10: https://birdcount.in/tag/ebird-monthly-challenge/page/10/?el_dbe_page
🔄 Fetching page 11: https://birdcount.in/tag/ebird-monthly-challenge/page/11/?el_dbe_page
🔄 Fetching page 12: https://

# From other chat

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
from requests.exceptions import RequestException

base_url = 'https://birdcount.in/tag/ebird-monthly-challenge'
page = 1
rows = []

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36'
}

while True:
    url = f'{base_url}/page/{page}/?el_dbe_page'
    print(f'🔄 Fetching page {page}: {url}')

    try:
        response = requests.get(url, headers=headers, timeout=10)
        if response.status_code != 200:
            print(f'❌ Failed to load page {page} (status code {response.status_code})')
            break
    except RequestException as e:
        print(f'⚠️ Request failed for page {page}: {e}')
        break

    soup = BeautifulSoup(response.text, 'html.parser')
    articles = soup.select('article.et_pb_post_extra')
    if not articles:
        print('✅ No more articles found. Ending scrape.')
        break

    for art in articles:
        h3 = art.select_one('h3.entry-title a')
        title = h3.text.strip() if h3 else ''
        link = h3['href'] if h3 else ''

        summary_tag = art.select_one('.post-data p')
        content = summary_tag.text.strip() if summary_tag else ''

        meta = art.select_one('p.post-meta')
        date = comments = reading = ''
        if meta:
            spans = meta.find_all('span')
            if len(spans) >= 3:
                date = spans[0].text.strip()
                comments = spans[1].text.strip()
                reading = spans[2].text.strip()

        img = art.select_one('.post-media img')
        img_url = img['src'] if img else ''

        rows.append([img_url, title, link, content, date, comments, reading])

    # Check for "Load More" button
    load_more = soup.select_one('a.el-load-more')
    if not load_more:
        print(f'🛑 No "Load More" button found on page {page}. Done.')
        break

    page += 1
    time.sleep(1)  # Be polite to the server

# Save to Excel with timestamped filename
df = pd.DataFrame(rows, columns=[
    'Image URL', 'Title', 'Article URL', 'Content', 'Date', 'Comments', 'Read Time'
])
output_filename = f'ebird_challenges_{time.strftime("%Y%m%d")}.xlsx'
df.to_excel(output_filename, index=False)

print(f'✅ Scraped {len(df)} articles and saved to {output_filename}')


🔄 Fetching page 1: https://birdcount.in/tag/ebird-monthly-challenge/page/1/?el_dbe_page
🔄 Fetching page 2: https://birdcount.in/tag/ebird-monthly-challenge/page/2/?el_dbe_page
🔄 Fetching page 3: https://birdcount.in/tag/ebird-monthly-challenge/page/3/?el_dbe_page
🔄 Fetching page 4: https://birdcount.in/tag/ebird-monthly-challenge/page/4/?el_dbe_page
🔄 Fetching page 5: https://birdcount.in/tag/ebird-monthly-challenge/page/5/?el_dbe_page
🔄 Fetching page 6: https://birdcount.in/tag/ebird-monthly-challenge/page/6/?el_dbe_page
🔄 Fetching page 7: https://birdcount.in/tag/ebird-monthly-challenge/page/7/?el_dbe_page
🔄 Fetching page 8: https://birdcount.in/tag/ebird-monthly-challenge/page/8/?el_dbe_page
🔄 Fetching page 9: https://birdcount.in/tag/ebird-monthly-challenge/page/9/?el_dbe_page
🔄 Fetching page 10: https://birdcount.in/tag/ebird-monthly-challenge/page/10/?el_dbe_page
🔄 Fetching page 11: https://birdcount.in/tag/ebird-monthly-challenge/page/11/?el_dbe_page
🔄 Fetching page 12: https://

In [None]:
!pip install requests readability-lxml beautifulsoup4 lxml


Collecting readability-lxml
  Downloading readability_lxml-0.8.4.1-py3-none-any.whl.metadata (4.0 kB)
Collecting cssselect (from readability-lxml)
  Downloading cssselect-1.3.0-py3-none-any.whl.metadata (2.6 kB)
Collecting lxml_html_clean (from lxml[html_clean]->readability-lxml)
  Downloading lxml_html_clean-0.4.2-py3-none-any.whl.metadata (2.4 kB)
Downloading readability_lxml-0.8.4.1-py3-none-any.whl (19 kB)
Downloading cssselect-1.3.0-py3-none-any.whl (18 kB)
Downloading lxml_html_clean-0.4.2-py3-none-any.whl (14 kB)
Installing collected packages: lxml_html_clean, cssselect, readability-lxml
Successfully installed cssselect-1.3.0 lxml_html_clean-0.4.2 readability-lxml-0.8.4.1


In [None]:
import requests
from readability import Document
from bs4 import BeautifulSoup
from requests.exceptions import RequestException

def extract_main_content(url):
    headers = {
        'User-Agent': 'Mozilla/5.0'
    }

    try:
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
    except RequestException as e:
        return f"❌ Failed to fetch page: {e}"

    # Use Readability to extract the main article content
    doc = Document(response.text)
    html_content = doc.summary()
    title = doc.title()

    # Optionally clean with BeautifulSoup
    soup = BeautifulSoup(html_content, 'html.parser')
    text = soup.get_text(separator='\n', strip=True)

    return f"📄 Title: {title}\n\n{text}"

# 🔽 Input from user
url = input("Enter the URL: ")
main_content = extract_main_content(url)
print("\n📝 Extracted Main Content:\n")
print(main_content)


Enter the URL: https://birdcount.in/apr2025-ebirders/

📝 Extracted Main Content:

📄 Title: April 2025 eBirders of the Month - Bird Count India

April 2025 eBirders of the Month
Before moving on to the results for the monthly challenges, here is a brief glimpse of birding in April by the numbers (with the previous month in brackets).
No. of birders
: 6,075 (7,324)
Number of observations
: 8.93 lakhs (12.52 lakhs)
Number of lists (all types)
: 54,400 (72,942)
Number of species
: 1,109 (1,119)
Number of unique lists with media
: 5,633 (7,135)
The challenge for April was to upload a minimum of 20 checklists, with audio (rated) of at least 5 species. A total of
44 eBirders
successfully met this target!
(Updated on 1 June  2025)
.
Congratulations to all these birders!
Ains Priestman, Ajay Sarvagnam, Amrit Raha, Anand Birdlife, Anand Singh, Aravind Am, Asim Giri, Bhaskar Mandal & Lakshmi Chatterjee, Biplab Banerjee, Chandu A, Chirag Munje, Dipankar Dev, Dr Bipasha David, Dr Mohammed Umer Shar

In [None]:
import requests

def send_to_gemini(api_key: str, page_content: str):
    url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={api_key}"

    headers = {
        "Content-Type": "application/json"
    }

    # Build the prompt with instructions + your scraped content
    prompt_text = f"""
Extract structured information from the text below about a birding challenge. For each item, provide the value or list if available, or 'Not found' if missing.

1. Number of birders
2. Number of observations
3. Number of lists
4.Number of species
5.Number of unique lists with media
6.Names of birders (as a list)
7.Winner's name
8.How was the winner chosen
9.Location of the challenge (if any)
10. Upload requirements or conditions for completing the challenge
11. Any tips or important points (list)
12. List of bird species mentioned (if any)

Text:
\"\"\"{page_content}\"\"\"
"""

    data = {
        "contents": [
            {
                "parts": [
                    {
                        "text": prompt_text.strip()
                    }
                ]
            }
        ]
    }

    response = requests.post(url, headers=headers, json=data)

    if response.status_code == 200:
        result = response.json()
        return result
    else:
        raise Exception(f"Error {response.status_code}: {response.text}")


import json

def pretty_print_gemini_response(response_json):
    try:
        # Extract the first candidate's first part's text
        candidates = response_json.get("candidates", [])
        if not candidates:
            print("No candidates found in response.")
            return

        content = candidates[0].get("content", {})
        parts = content.get("parts", [])
        if not parts:
            print("No parts found in content.")
            return

        text = parts[0].get("text", "")

        # Print clean output
        print("\n=== Extracted Structured Information ===\n")
        print(text.strip())

    except Exception as e:
        print(f"Error while parsing response: {e}")


if __name__ == "__main__":
    # Replace this with your actual API key
    API_KEY = "YOUR_API_KEY"

    # Paste the scraped page content here as a multiline string
    page_content = """
Enter the URL: https://birdcount.in/apr2025-ebirders/

April 2025 eBirders of the Month - Bird Count India

April 2025 eBirders of the Month
Before moving on to the results for the monthly challenges, here is a brief glimpse of birding in April by the numbers (with the previous month in brackets).
No. of birders
: 6,075 (7,324)
Number of observations
: 8.93 lakhs (12.52 lakhs)
Number of lists (all types)
: 54,400 (72,942)
Number of species
: 1,109 (1,119)
Number of unique lists with media
: 5,633 (7,135)
The challenge for April was to upload a minimum of 20 checklists, with audio (rated) of at least 5 species. A total of
44 eBirders
successfully met this target!
(Updated on 1 June  2025)
.
Congratulations to all these birders!
Ains Priestman, Ajay Sarvagnam, Amrit Raha, Anand Birdlife, Anand Singh, Aravind Am, Asim Giri, Bhaskar Mandal & Lakshmi Chatterjee, Biplab Banerjee, Chandu A, Chirag Munje, Dipankar Dev, Dr Bipasha David, Dr Mohammed Umer Sharieff, Gaja Mohanraj, Hariharan T V, Jayadev Menon,
Kedar Champhekar
, Kilson Kiragori, Kit Britten, Krishnamoorthy Muthirulan, Lakshmikant Neve, Madhavi Babtiwale, Maggie Geer, Mahmadanesh Khira, Munish Gowda, Neeraja V, Padma Gyalpo, Parthasarathi Chakrabarti, Pranad Patil, Rahul Wakare, Rajesh Radhakrishnan, Ramesh Shenai, Ranjeet Singh, Sandip Das, Sanjiv Khanna, Sharad Apte, Shilpa Gadgil, Shubham Giri, Soubhagya Mohanty, Sreekumar Chirukandoth, Uma Vaijnath, Vijaya Lakshmi, Vivek Sudhakaran.
The above list does not include group accounts and those with no identifiable names.
From these 44 names, one was drawn using a computer-generated random number. This person is
Neeraja V
who receives a copy of
Women in the Wild: Stories of India’s Most Brilliant
Women Wildlife Biologists
by Anita Mani as a small gift in appreciation.
Are you up to date with the eBird India challenge for
May
?
Also, see here for the fresh set of
yearlong challenges for 2025
!
Header Image
: Rufous-winged Fulvetta
Schoeniparus castaneceps
© Manjula Desai / Macaulay Library
"""

    try:
        response = send_to_gemini('AIzaSyBMUGzMz7SxlBvJ2KdsQE0Ovez2kHJo3No', page_content)
        print("Gemini API Response:\n")
        print(response)
        pretty_print_gemini_response(response)
    except Exception as e:
        print("Failed to get response from Gemini API:", e)


Gemini API Response:

{'candidates': [{'content': {'parts': [{'text': "Here's the structured information extracted from the text:\n\n1.  **Number of birders:** 6,075 (for April, with 7,324 in the previous month)\n2.  **Number of observations:** 8.93 lakhs (for April, with 12.52 lakhs in the previous month)\n3.  **Number of lists:** 54,400 (for April, with 72,942 in the previous month)\n4.  **Number of species:** 1,109 (for April, with 1,119 in the previous month)\n5.  **Number of unique lists with media:** 5,633 (for April, with 7,135 in the previous month)\n6.  **Names of birders (as a list):** ['Ains Priestman', 'Ajay Sarvagnam', 'Amrit Raha', 'Anand Birdlife', 'Anand Singh', 'Aravind Am', 'Asim Giri', 'Bhaskar Mandal & Lakshmi Chatterjee', 'Biplab Banerjee', 'Chandu A', 'Chirag Munje', 'Dipankar Dev', 'Dr Bipasha David', 'Dr Mohammed Umer Sharieff', 'Gaja Mohanraj', 'Hariharan T V', 'Jayadev Menon', 'Kedar Champhekar', 'Kilson Kiragori', 'Kit Britten', 'Krishnamoorthy Muthirulan', '

In [None]:
import requests

import requests
from readability import Document
from bs4 import BeautifulSoup
from requests.exceptions import RequestException

def extract_main_content(url):
    headers = {
        'User-Agent': 'Mozilla/5.0'
    }

    try:
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
    except RequestException as e:
        return f"❌ Failed to fetch page: {e}"

    # Use Readability to extract the main article content
    doc = Document(response.text)
    html_content = doc.summary()
    title = doc.title()

    # Optionally clean with BeautifulSoup
    soup = BeautifulSoup(html_content, 'html.parser')
    text = soup.get_text(separator='\n', strip=True)

    return f"📄 Title: {title}\n\n{text}"

def send_to_gemini(api_key: str, page_content: str):
    url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={api_key}"

    headers = {
        "Content-Type": "application/json"
    }

    # Build the prompt with instructions + your scraped content
    prompt_text = f"""
Extract structured information from the text below about a birding challenge. For each item, provide the value or list if available, or 'Not found' if missing.

1. Number of birders
2. Number of observations
3. Number of lists
4.Number of species
5.Number of unique lists with media
6.Names of birders (as a list)
7.Winner's name
8.How was the winner chosen
9.Location of the challenge (if any)
10. Upload requirements or conditions for completing the challenge
11. Any tips or important points (list)
12. List of bird species mentioned (if any)

Text:
\"\"\"{page_content}\"\"\"
"""

    data = {
        "contents": [
            {
                "parts": [
                    {
                        "text": prompt_text.strip()
                    }
                ]
            }
        ]
    }

    response = requests.post(url, headers=headers, json=data)

    if response.status_code == 200:
        result = response.json()
        return result
    else:
        raise Exception(f"Error {response.status_code}: {response.text}")


import json

def pretty_print_gemini_response(response_json):
    try:
        # Extract the first candidate's first part's text
        candidates = response_json.get("candidates", [])
        if not candidates:
            print("No candidates found in response.")
            return

        content = candidates[0].get("content", {})
        parts = content.get("parts", [])
        if not parts:
            print("No parts found in content.")
            return

        text = parts[0].get("text", "")

        # Print clean output
        print("\n=== Extracted Structured Information ===\n")
        print(text.strip())

    except Exception as e:
        print(f"Error while parsing response: {e}")


if __name__ == "__main__":
    url = input("Enter the URL: ")
    main_content = extract_main_content(url)
    API_KEY = "AIzaSyBMUGzMz7SxlBvJ2KdsQE0Ovez2kHJo3No"
    page_content = main_content

    try:
        response = send_to_gemini(API_KEY, page_content)
        print("Gemini API Response:\n")
        print(response)
        pretty_print_gemini_response(response)
    except Exception as e:
        print("Failed to get response from Gemini API:", e)


Enter the URL: https://birdcount.in/jun14-ebirders-2/

📝 Extracted Main Content:

📄 Title: April 2014 eBirders of the Month - Bird Count India

April 2014 eBirders of the Month
April has come and gone, and it’s time to crunch the numbers and see who has been able to meet the
eBirding Challenge for the month
, which was to upload at least 20 complete lists
from India during the month.
During April,
194
eBird users uploaded
1,237
lists of all types, which together accounted for
26,481
records.
In all,
17
eBirders met or exceeded the April target. They are:
Aidan & Savio Fonseca
Anish Aravind
Arya Vinod
Bela Arora
Dhananjai Mohan
Ganeshwar S V
Ishan Sadwelkar
Manish Kumar
Manju Sinha
Michael Emenaker
Panchapakesan Jeganathan
Premchand Reghuvaran
Pronoy Baidya
Raja Simma Pandiyan
Raman Kumar
Shivaprakash Adavanne
Suhel Quader
Many congratulations to each of these
eBirders of the Month
for April 2014!
And, as promised, one of these 17 has been chosen using a computer-generated random number

In [7]:
!pip install requests readability-lxml beautifulsoup4 lxml

Collecting readability-lxml
  Downloading readability_lxml-0.8.4.1-py3-none-any.whl.metadata (4.0 kB)
Collecting cssselect (from readability-lxml)
  Downloading cssselect-1.3.0-py3-none-any.whl.metadata (2.6 kB)
Collecting lxml_html_clean (from lxml[html_clean]->readability-lxml)
  Downloading lxml_html_clean-0.4.2-py3-none-any.whl.metadata (2.4 kB)
Downloading readability_lxml-0.8.4.1-py3-none-any.whl (19 kB)
Downloading cssselect-1.3.0-py3-none-any.whl (18 kB)
Downloading lxml_html_clean-0.4.2-py3-none-any.whl (14 kB)
Installing collected packages: lxml_html_clean, cssselect, readability-lxml
Successfully installed cssselect-1.3.0 lxml_html_clean-0.4.2 readability-lxml-0.8.4.1


In [None]:
import pandas as pd
import json
import requests
from readability import Document
from bs4 import BeautifulSoup
from requests.exceptions import RequestException

def get_api_key():
  return "AIzaSyBMUGzMz7SxlBvJ2KdsQE0Ovez2kHJo3No"

def get_prompt_text():
  return (f"""
Extract structured information from the text below about a birding challenge. For each item, provide the value or list if available, or 'Not found' if missing.

1. Number of birders
2. Number of observations
3. Number of lists
4. Number of species
5. Number of unique lists with media
6. Names of birders (as a list)
7. Winner's name
8. How was the winner chosen
9. Location of the challenge (if any)
10. Upload requirements or conditions for completing the challenge
11. Any tips or important points (list)
12. List of bird species mentioned (if any)

Text:
\"\"\"{page_content}\"\"\"
""")
def extract_main_content(url):
    headers = {
        'User-Agent': 'Mozilla/5.0'
    }

    try:
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
    except RequestException as e:
        return f"❌ Failed to fetch page: {e}"

    # Use Readability to extract the main article content
    doc = Document(response.text)
    html_content = doc.summary()
    title = doc.title()

    # Optionally clean with BeautifulSoup
    soup = BeautifulSoup(html_content, 'html.parser')
    text = soup.get_text(separator='\n', strip=True)

    return f"📄 Title: {title}\n\n{text}"

def send_to_gemini(api_key: str, page_content: str):
    url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={api_key}"

    headers = {
        "Content-Type": "application/json"
    }

    # Build the prompt with instructions + your scraped content
    prompt_text = get_prompt_text()

    data = {
        "contents": [
            {
                "parts": [
                    {
                        "text": prompt_text.strip()
                    }
                ]
            }
        ]
    }

    response = requests.post(url, headers=headers, json=data)

    if response.status_code == 200:
        result = response.json()
        return result
    else:
        raise Exception(f"Error {response.status_code}: {response.text}")

def pretty_print_gemini_response(response_json):
    try:
        # Extract the first candidate's first part's text
        candidates = response_json.get("candidates", [])
        if not candidates:
            print("No candidates found in response.")
            return

        content = candidates[0].get("content", {})
        parts = content.get("parts", [])
        if not parts:
            print("No parts found in content.")
            return

        text = parts[0].get("text", "")

        # Print clean output
        print("\n=== Extracted Structured Information ===\n")
        print(text.strip())

    except Exception as e:
        print(f"Error while parsing response: {e}")

if __name__ == "__main__":
    df = pd.read_excel("/content/ebird_challenges_20250608.xlsx")
    urls = df["Article URL"].dropna().tolist()
    urls = urls[:1]  # You can change 5 to any other stopper
    API_KEY = get_api_key()

    for idx, url in enumerate(urls, start=1):
        print(f"\n🔗 Processing URL {idx}: {url}")
        try:
            page_content = extract_main_content(url)
            response = send_to_gemini(API_KEY, page_content)

            print("📥 Gemini Response:")
            pretty_print_gemini_response(response)
        except Exception as e:
            print(f"❌ Failed to process URL {idx}: {e}")


🔗 Processing URL 1: https://birdcount.in/jun25-challenge/
📥 Gemini Response:

=== Extracted Structured Information ===

Here's the structured information extracted from the text:

1.  **Number of birders:** Not found
2.  **Number of observations:** Not found
3.  **Number of lists:** 25 eligible checklists
4.  **Number of species:** Not found
5.  **Number of unique lists with media:** Not found
6.  **Names of birders (as a list):** Not found
7.  **Winner's name:** Not found
8.  **How was the winner chosen:** Not found
9.  **Location of the challenge (if any):** India
10. **Upload requirements or conditions for completing the challenge:**
    *   25 eligible checklists
    *   At least five checklists must contain a brood-parasitic cuckoo
    *   Eligible checklists must report ALL species seen/heard (and are marked 'complete')
    *   Include counts for all species numbers (no 'X' entries)
    *   Checklists must be at least 15 minutes or longer in duration
    *   Upload all lists by 