# Run Script


In [9]:
import json
import datetime as dt
from pathlib import Path
import requests


def fetch_json(url):
    """Return JSON data from url."""
    response = requests.get(url)
    response.raise_for_status()
    return response.json()


def convert_entities(text):
    """Convert text to HTML entities."""
    return text.encode('ascii', 'xmlcharrefreplace').decode('ascii')


def convert_data(data):
    """Convert text fields to HTML entities."""
    if isinstance(data, dict):
        return {k: convert_data(v) for k, v in data.items()}
    if isinstance(data, list):
        return [convert_data(x) for x in data]
    if isinstance(data, str):
        return convert_entities(data)
    return data


def write_json(data, path):
    """Write JSON data to path."""
    path.write_text(
        json.dumps(data, indent=2, ensure_ascii=False),
        encoding='utf-8'
    )


def build_list(data):
    """Return HTML list items."""
    items = []
    for item in data:
        row = (
            f"<li><a href='{item['link']}'>{item['title']}</a>"
            f"<div class='byline small text-muted'>"
            f"{item['source']}, "
            f"<span class=\"datetime\">{item['pubdate']}</span></div>"
            f"</li>"
        )
        items.append(row)
    return '\n'.join(items)


def fill_template(template, timestamp, list_html):
    """Insert timestamp and list into page template."""
    result = template.replace('{TIME}', timestamp)
    return result.replace('{LIST}', list_html)


def write_files(page, archive_dir):
    """Write index page and archive."""
    Path('index.md').write_text(page, encoding='utf-8')
    archive_dir.mkdir(exist_ok=True)
    ts = dt.datetime.utcnow().strftime('%Y-%m-%d-%H-00-00')
    archive = archive_dir / f'{ts}.md'
    archive.write_text(page, encoding='utf-8')
    return archive


def update_archive_index(archive_dir):
    """Create archive index."""
    files = sorted(p for p in archive_dir.glob('*.md') if p.name != 'index.md')
    lines = [
        '---\n',
        'layout: default\n',
        'title: News Archive\n',
        '---\n\n'
    ]
    for p in files:
        lines.append(f'- [{p.stem}]({p.name})\n')
    (archive_dir / 'index.md').write_text(''.join(lines), encoding='utf-8')


def main():
    top_url = 'https://analysis.castromedia.org/analysis/news-topics/top.json'
    data = fetch_json(top_url)
    html_data = convert_data(data)
    write_json(html_data, Path('top.json'))
    analysis_urls = {
        '1h.json': 'https://analysis.castromedia.org/analysis/headline_analysis/1h.json',
        '24h.json': 'https://analysis.castromedia.org/analysis/headline_analysis/24h.json',
        '7d.json': 'https://analysis.castromedia.org/analysis/headline_analysis/7d.json'
    }
    for filename, url in analysis_urls.items():
        analysis_data = fetch_json(url)
        write_json(analysis_data, Path(filename))
    updated = dt.datetime.utcnow().strftime('%Y-%m-%d %H:%M UTC')
    template = Path('template.md').read_text(encoding='utf-8')
    list_html = build_list(html_data)
    page = fill_template(template, updated, list_html)
    archive_dir = Path('archive')
    archive_path = write_files(page, archive_dir)
    print(f'Archive written to {archive_path}')
    update_archive_index(archive_dir)
    print('Archive index updated')


if __name__ == '__main__':
    main()


Archive written to archive\2025-07-23-22-00-00.md
Archive index updated


In [10]:
def load_sorted(path):
    """Return items from path sorted by pubdate descending."""
    text = Path(path).read_text(encoding='utf-8')
    items = json.loads(text)
    items = sorted(
        items,
        key=lambda x: dt.datetime.strptime(x['pubdate'], '%Y-%m-%d-%H-%M-%S %z'),
        reverse=True
    )
    return convert_data(items)

all1h_html = build_list(load_sorted('1h.json'))
all24h_html = build_list(load_sorted('24h.json'))
all7d_html = build_list(load_sorted('7d.json'))


KeyError: 'title'