In [None]:
import requests
from bs from BeautifulSoup
import pandas as pd

# Configuration
keywords = ["pemanasan global", "global warming", "climate change", "climate crisis", "krisis iklim"]
base_url = "https://www.jawapos.com/search?q="
headers = {
    "User-Agent": "ResearchScraper (EMAIL): This bot is conducting research on climate change in Indonesia for non-profit academic purposes."
}

# Initialize the dictionary to store DataFrames for each keyword
dataframes = {}
max_pages_per_keyword = 50  # Set the maximum number of pages to scrape per keyword

# Loop over each keyword to perform search and fetch articles
for keyword in keywords:
    articles = []  # Initialize an empty list to store articles for this keyword
    print(f"Starting search for keyword: '{keyword}'")
    page_empty = False  # Flag to check if the current page is empty

    for page in range(1, max_pages_per_keyword + 1):
        if page_empty:  # Stop if the previous page had no articles
            print(f"No more articles found for '{keyword}' beyond page {page-1}.")
            break

        print(f"Fetching page {page} for keyword: '{keyword}'")
        query_url = f"{base_url}{requests.utils.quote(keyword)}&page={page}"
        response = requests.get(query_url, headers=headers)
        soup = BeautifulSoup(response.content, "html.parser")
        items = soup.find_all("div", class_="latest__item")

        # Check if the page is empty
        if not items:
            page_empty = True
            continue

        # Extract details for each article found
        for item in items:
            title = item.find("h2", class_="latest__title").text.strip()
            link = item.find("a", class_="latest__link")["href"]
            date = item.find("div", class_="latest__date").text.strip()  # Corrected class for date
            articles.append({"title": title, "link": link, "date": date})

    # Fetch the article content for each link collected
    for article in articles:
        try:
            response = requests.get(article['link'], headers=headers)
            soup = BeautifulSoup(response.content, "html.parser")
            article_text = ' '.join([p.text for p in soup.find("article", class_="read__content").find_all("p")])
            article['content'] = article_text
        except Exception as e:
            print(f"Error fetching article content for '{article['title']}': {e}")
            article['content'] = ""

    # Convert the list of article details into a DataFrame
    df_keyword = pd.DataFrame(articles)
    
    # Save the DataFrame to a CSV file, using the keyword to name the file
    keyword_filename = f"{keyword.replace(' ', '_').lower()}_articles_{page-1}.csv"  # Files named by the last scraped page
    df_keyword.to_csv(keyword_filename, index=False)
    print(f"Articles for '{keyword}' saved to '{keyword_filename}'.")

    # Add the DataFrame to the `dataframes` dictionary
    dataframes[keyword] = df_keyword
