Skip to content

Commit 20c8328

Browse files
mlkgrntclaude
andauthored
feat(route): add EFE Noticias route (#22165)
* feat(route): add EFE Noticias route Add route for EFE (Agencia EFE) Spanish news agency with 10 categories: mundo, espana, economia, cultura, ciencia-y-tecnologia, deportes, salud, medio-ambiente, educacion, euro-efe. Features: - Article images via enclosure and inline in description - Ad filtering (auto-banner, srr-main, promotional logos) - Clean img attributes for RSS reader compatibility - Configurable item limit via query parameter * fix(route/efe): address review comments - Remove `limit` from `parameters` (query param, not path param) - Scope article link selector to `.elementor-loop-container` to avoid anti-pattern #1 - Remove unnecessary `.first()` on `.elementor-widget-theme-post-content` - Remove `.srr-main` from cleanup (not inside content area), keep `decoding`/`loading` on images Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * fix: address review comments for EFE route - Fix parameters to structured object with only path params - Scope selector to .elementor-post article links only - Remove unnecessary .first() on title - Stop stripping image attributes (decoding, loading, width, etc.) Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * fix: revert overly specific selector that broke all routes .elementor-post class doesn't exist on EFE site, use attribute selector to pre-filter article URLs instead. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * fix: fetch articles sequentially to avoid 429 rate limiting Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * fix: suppress no-await-in-loop lint for sequential rate-limited fetches Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * fix: use specific selector for article title links only Select only .elementor-widget-theme-post-title a inside .e-loop-item to avoid matching image links, category links, or duplicate entries. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * fix: use single quotes for static string (oxlint stylistic/quotes) Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * fix: use Promise.all and remove Set (selector matches once per article) Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * refactor: use Array.map instead of push for link collection Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> * fix(efe): use pMap to limit concurrency and avoid 429 rate limiting --------- Co-authored-by: mlkgrnt <mlkgrnt@users.noreply.github.com> Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
1 parent a5573cd commit 20c8328

2 files changed

Lines changed: 104 additions & 0 deletions

File tree

lib/routes/efe/index.ts

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
import { load } from 'cheerio';
2+
import pMap from 'p-map';
3+
4+
import type { Route } from '@/types';
5+
import cache from '@/utils/cache';
6+
import ofetch from '@/utils/ofetch';
7+
import { parseDate } from '@/utils/parse-date';
8+
9+
const rootUrl = 'https://efe.com';
10+
11+
const categories: Record<string, string> = {
12+
mundo: 'Mundo',
13+
espana: 'España',
14+
economia: 'Economía',
15+
cultura: 'Cultura',
16+
'ciencia-y-tecnologia': 'Ciencia y Tecnología',
17+
deportes: 'Deportes',
18+
salud: 'Salud',
19+
'medio-ambiente': 'Medio Ambiente',
20+
educacion: 'Educación',
21+
'euro-efe': 'EuroEFE',
22+
};
23+
24+
export const route: Route = {
25+
path: '/:category?',
26+
name: 'Category',
27+
maintainers: ['mlkgrnt'],
28+
example: '/efe/mundo',
29+
parameters: {
30+
category: {
31+
description: 'Category slug, see table below. Defaults to mundo.',
32+
default: 'mundo',
33+
},
34+
},
35+
handler,
36+
categories: ['new-media'],
37+
features: {
38+
requireConfig: false,
39+
requirePuppeteer: false,
40+
antiCrawler: false,
41+
supportBT: false,
42+
supportPodcast: false,
43+
supportScihub: false,
44+
},
45+
radar: [
46+
{
47+
source: ['efe.com/:category'],
48+
target: '/:category',
49+
},
50+
],
51+
};
52+
53+
async function handler(ctx) {
54+
const category = ctx.req.param('category') || 'mundo';
55+
const limit = ctx.req.query('limit') ? Number.parseInt(ctx.req.query('limit'), 10) : 20;
56+
const pageUrl = `${rootUrl}/${category}/`;
57+
58+
const response = await ofetch(pageUrl);
59+
const $ = load(response);
60+
61+
const links = $('.e-loop-item .elementor-widget-theme-post-title a[href]')
62+
.toArray()
63+
.map((el) => $(el).attr('href'))
64+
.filter((href): href is string => !!href && href.startsWith(`${rootUrl}/${category}/`) && /\/\d{4}-\d{2}-\d{2}\//.test(href));
65+
66+
const items = await pMap(
67+
links.slice(0, limit),
68+
(link) =>
69+
cache.tryGet(link, async () => {
70+
const detail = await ofetch(link);
71+
const $detail = load(detail);
72+
73+
const title = $detail('title').text();
74+
const dateMatch = detail.match(/"datePublished":\s*"([^"]+)"/);
75+
const pubDate = dateMatch ? parseDate(dateMatch[1]) : undefined;
76+
77+
const image = $detail('meta[property="og:image"]').attr('content');
78+
const content = $detail('.elementor-widget-theme-post-content');
79+
content.find('.auto-banner').remove();
80+
const description = (image ? `<figure><img src="${image}"></figure>` : '') + (content.html() || '');
81+
82+
return {
83+
title,
84+
link,
85+
pubDate,
86+
description,
87+
};
88+
}),
89+
{ concurrency: 2 }
90+
);
91+
92+
return {
93+
title: `EFE Noticias - ${categories[category] || category}`,
94+
link: pageUrl,
95+
item: items,
96+
};
97+
}

lib/routes/efe/namespace.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import type { Namespace } from '@/types';
2+
3+
export const namespace: Namespace = {
4+
name: 'EFE Noticias',
5+
url: 'efe.com',
6+
lang: 'es',
7+
};

0 commit comments

Comments
 (0)