### **Disaster News Monitoring Script**

This Python script monitors disaster-related news updates for ICPAC countries using the GNews and OpenCage APIs. It extracts relevant information such as location, deaths, affected, and displaced from the article content. The script geocodes the location to get latitude and longitude coordinates. It periodically checks for new articles based on specified keywords and saves the gathered data into a CSV file. The script runs in a loop, logging updates at regular intervals, and prints the number of new articles found each time. The main function initializes API keys, query, country list, check interval, and output file name.

Import Libraries

In [3]:
import requests
import csv
import time
import re
from datetime import datetime, timedelta


Extract Additional Information Function

In [4]:
# Function to extract additional information from article content
def extract_additional_info(content):
    location = None
    deaths = 0
    affected = 0
    displaced = 0

    # Regex patterns for extracting information
    location_pattern = re.compile(r'(in|at|near|around)\s([A-Za-z\s]+)')
    deaths_pattern = re.compile(r'(\d+)\s(deaths?|dead)')
    affected_pattern = re.compile(r'(\d+)\s(affected|injured)')
    displaced_pattern = re.compile(r'(\d+)\s(displaced)')

    location_match = location_pattern.search(content)
    if location_match:
        location = location_match.group(2)

    deaths_match = deaths_pattern.search(content)
    if deaths_match:
        deaths = int(deaths_match.group(1))

    affected_match = affected_pattern.search(content)
    if affected_match:
        affected = int(affected_match.group(1))

    displaced_match = displaced_pattern.search(content)
    if displaced_match:
        displaced = int(displaced_match.group(1))

    return {
        'location': location,
        'deaths': deaths,
        'affected': affected,
        'displaced': displaced
    }


Geocode Location Function

In [5]:
# Function to geocode a location
def geocode_location(api_key, location):
    geocode_url = 'https://api.opencagedata.com/geocode/v1/json'
    params = {
        'q': location,
        'key': api_key,
        'limit': 1
    }

    response = requests.get(geocode_url, params=params)

    if response.status_code == 200:
        result = response.json()
        if result['results']:
            lat = result['results'][0]['geometry']['lat']
            lng = result['results'][0]['geometry']['lng']
            return lat, lng

    return None, None  # Return None if the geocoding fails or no results found


Get GNews Articles Function

In [6]:
# Function to get news articles from GNews
def get_gnews_articles(api_key, query, countries, from_date, to_date):
    articles = []
    base_url = 'https://gnews.io/api/v4/search'

    for country in countries:
        params = {
            'q': f"{query} {country}",
            'from': from_date,
            'to': to_date,
            'lang': 'en',
            'token': api_key,
            'max': 100
        }

        response = requests.get(base_url, params=params)

        if response.status_code == 200:
            for article in response.json().get('articles', []):
                # Extract additional info
                additional_info = extract_additional_info(article['content'])
                article.update(additional_info)  # Add additional info to the article

                # Geocode the location if found
                if additional_info['location']:
                    lat, lng = geocode_location(opencage_api_key, additional_info['location'])
                    article['latitude'] = lat
                    article['longitude'] = lng

                articles.append(article)
        else:
            print(f'Error {response.status_code}: {response.text}')

    return articles


Save Articles to CSV Function

In [7]:
# Function to save articles to CSV
def save_articles_to_csv(articles, filename):
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Title', 'PublishedAt', 'Source', 'URL', 'Location', 'Longitude', 'Latitude', 'Deaths', 'Affected', 'Displaced'])  # Header row
        for article in articles:
            writer.writerow([
                article['title'],
                article['publishedAt'],
                article['source']['name'],
                article['url'],
                article.get('location', 'N/A'),
                article.get('longitude', 'N/A'),
                article.get('latitude', 'N/A'),
                article.get('deaths', 0),
                article.get('affected', 0),
                article.get('displaced', 0)
            ])


Monitor Updates Function

In [8]:
# Function to monitor updates
def monitor_updates(api_key, query, countries, check_interval, output_file):
    last_checked = datetime.now()

    while True:
        print(f"Checking for updates at {last_checked.strftime('%Y-%m-%d %H:%M:%S')}...")

        # Get articles from the last check time to now
        articles = get_gnews_articles(api_key, query, countries, last_checked.isoformat(), datetime.now().isoformat())

        if articles:
            print(f"Found {len(articles)} new articles.")
            save_articles_to_csv(articles, output_file)
            print(f'Successfully saved {len(articles)} articles to {output_file}.')
        else:
            print("No new articles found.")

        last_checked = datetime.now()  # Update the last checked time
        time.sleep(check_interval)  # Wait for the specified interval before checking again


Main Function

In [None]:
# Main function
if __name__ == "__main__":
    gnews_api_key = '3402cb6ce8b30e0297d209bee6cce955'  # Replace with your GNews API key
    opencage_api_key = '8f5af4139c174071b690fed88bc80d11'  # Replace with your OpenCage API key
    query = 'disaster OR hurricane OR calamity OR health OR earthquake OR flood'
    icpac_countries = ['Djibouti', 'Ethiopia', 'Tanzania', 'Eritrea', 'Kenya', 'Burundi', 'Somalia', 'South Sudan', 'Sudan', 'Rwanda', 'Uganda']

    # Set the monitoring interval (e.g., check every 60 seconds)
    check_interval = 60  # in seconds
    csv_filename = 'disaster_news_icpac.csv'

    # Start monitoring updates
    monitor_updates(gnews_api_key, query, icpac_countries, check_interval, csv_filename)


Checking for updates at 2024-08-02 05:46:36...
Found 101 new articles.
Successfully saved 101 articles to disaster_news_icpac.csv.
Checking for updates at 2024-08-02 05:47:33...
