In [None]:
import requests
from bs4 import BeautifulSoup
import time
import json
from datetime import datetime
import os

In [None]:
def get_articles(base_url):
    headers = {'User-Agent': 'Mozilla/5.0'}
    all_articles = []
    
    response = requests.get(base_url, headers=headers)
    if response.status_code != 200:
        print(f"Error fetching page: {response.status_code}")
        return []
    
    soup = BeautifulSoup(response.text, 'html.parser')
    articles_list = soup.find_all('div', class_='views-row')
    print(f"Found {len(articles_list)} articles.")
    
    for article in articles_list:
        title_tag = article.find('h2')
        link_tag = title_tag.find('a') if title_tag else None
        date_tag = article.find('span', class_='datespan')
        summary_tag = article.find('p')
        
        if link_tag:
            title = link_tag.text.strip()
            article_url = 'https://www.understandingwar.org' + link_tag['href']
            date = date_tag.text.strip() if date_tag else "Unknown date"
            summary = summary_tag.text.strip() if summary_tag else "No summary available"
            
            print(f"Fetching article: {title} ({article_url})")
            text = get_article_text(article_url)
            all_articles.append({'title': title, 'url': article_url, 'date': date, 'summary': summary})
            time.sleep(2)
    
    return all_articles

def get_article_text(article_url):
    headers = {'User-Agent': 'Mozilla/5.0'}
    response = requests.get(article_url, headers=headers)
    
    if response.status_code != 200:
        print(f"Error fetching article: {article_url}")
        return "Error fetching article"
    
    soup = BeautifulSoup(response.text, 'html.parser')
    content = soup.find('div', class_='field-item')
    
    return content.text.strip() if content else "No content found"

def save_articles(articles):
    today = datetime.now().strftime("%Y-%m-%d")
    filename = f"articles_{today}.json"
    
    with open(filename, "w", encoding="utf-8") as f:
        json.dump(articles, f, ensure_ascii=False, indent=4)
    
    print(f"Saved {len(articles)} articles to {filename}")

if __name__ == "__main__":
    base_url = "https://www.understandingwar.org/publications?type%5B0%5D=backgrounder&type%5B1%5D=map&type%5B2%5D=other_work&type%5B3%5D=report&tid%5B0%5D=300&field_lastname_value=&sort_by=created&sort_order=DESC"
    articles = get_articles(base_url)
    
    if articles:
        save_articles(articles)
    else:
        print("No new articles found.")


Found 10 articles.
Fetching article: Russian Offensive Campaign Assessment, March 26, 2025 (https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-march-26-2025)
Fetching article: Russian Offensive Campaign Assessment, March 25, 2025 (https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-march-25-2025)
Fetching article: Russian Offensive Campaign Assessment, March 24, 2025 (https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-march-24-2025)
Fetching article: Putin is Still Stealing Ukrainian Children (https://www.understandingwar.org/backgrounder/putin-still-stealing-ukrainian-children)
Fetching article: Russian Offensive Campaign Assessment, March 23, 2025 (https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-march-23-2025)
Fetching article: Russian Offensive Campaign Assessment, March 22, 2025 (https://www.understandingwar.org/backgrounder/russian-offensive-campaig