In [7]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
import re

def parse_relative_date(date_str):
    """Converts relative time (e.g., '3 hours ago') into an actual datetime object."""
    if "hour" in date_str or "heure" in date_str:
        hours = int(re.search(r'\d+', date_str).group())
        return datetime.now() - timedelta(hours=hours)
    elif "minute" in date_str or "minute" in date_str:
        minutes = int(re.search(r'\d+', date_str).group())
        return datetime.now() - timedelta(minutes=minutes)
    elif "day" in date_str or "jour" in date_str:
        days = int(re.search(r'\d+', date_str).group())
        return datetime.now() - timedelta(days=days)
    elif "week" in date_str or "semaine" in date_str:
        weeks = int(re.search(r'\d+', date_str).group())
        return datetime.now() - timedelta(weeks=weeks)
    else:
        try:
            return datetime.strptime(date_str, "%B %d, %Y")  # Format: "March 10, 2024"
        except ValueError:
            return datetime.min  # Default to a very old date if parsing fails

def fetch_articles(query, language, country):
    """Fetch news articles from Google News based on the query and language settings."""
    url = f'https://news.google.com/search?q={query}&hl={language}&gl={country}&ceid={country}%3A{language}'
    
    # Send HTTP request
    response = requests.get(url)

    # Check if request was successful
    if response.status_code == 200:
        # Parse HTML content
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find all articles
        articles = soup.find_all('article')

        # List to store data
        data = []

        for article in articles:
            title_tag = article.find('a', class_='JtKRv')
            date_tag = article.find('time')
            source_tag = article.find('div', class_='vr1PYe')  # Adjust class if needed

            # Extract data
            title = title_tag.get_text() if title_tag else "N/A"
            date_text = date_tag.get_text() if date_tag else "N/A"
            source = source_tag.get_text() if source_tag else "N/A"
            
            # Convert date to datetime object
            parsed_date = parse_relative_date(date_text)
            
            # Construct the correct link
            link = title_tag['href'] if title_tag else "#"
            if link.startswith('.'):
                link = 'https://news.google.com' + link[1:]
            else:
                link = 'https://news.google.com' + link

            # Add data to the list
            data.append([title, source, date_text, parsed_date, link, language])

        # Create a Pandas DataFrame
        return pd.DataFrame(data, columns=['Title', 'Source', 'Raw Date', 'Parsed Date', 'Link', 'Language'])
    else:
        print(f"Error fetching articles for {query} ({language}): HTTP {response.status_code}")
        return pd.DataFrame(columns=['Title', 'Source', 'Raw Date', 'Parsed Date', 'Link', 'Language'])

# Combine English and French results
def search_ai_environmental_impact_articles():
    df_english = fetch_articles("AI environmental impact", "en", "US")
    df_french = fetch_articles("impact environnemental de l'IA", "fr", "FR")

    # Combine both datasets
    df_combined = pd.concat([df_english, df_french], ignore_index=True)

    # Sort by parsed date (most recent first)
    df_combined = df_combined.sort_values(by='Parsed Date', ascending=False).reset_index(drop=True)

    return df_combined

# Example usage
if __name__ == "__main__":
    df_articles = search_ai_environmental_impact_articles()

    # Display the first 10 results (or fewer if not enough articles)
    print(df_articles[['Title', 'Source', 'Raw Date', 'Link', 'Language']].head(30))


                                                Title  \
0   Julien Casiro : pourquoi l’IA consomme-t-elle ...   
1   AI infrastructure’s environmental costs clash ...   
2   AI’s energy crisis is here: Why artificial int...   
3                     The environmental impacts of AI   
4   La recherche s'attaque au casse-tête de la mes...   
5          How AI is playing a role in climate change   
6   What If Generative AI Could Help Save The Envi...   
7   Hope: The AI Act’s Approach to Address the Env...   
8   L’Observatoire mondial sur l’impact environnem...   
9   Investir dans l’IA, un pari autant coûteux que...   
10  IA : comment les chercheurs et chercheuses s’a...   
11  Environnement: rendre l'intelligence artificie...   
12  DATA CENTERS, INTELLIGENCE ARTIFICIELLE: UN CO...   
13  Les déchets électroniques, l'autre impact envi...   
14  Un référentiel pour mesurer et réduire l’impac...   
15      Un Observatoire pour rendre l'IA plus durable   
16    Explained: Generative AI’