Skip to content

Commit f29e5cc

Browse files
authored
feat(route/kleinanzeigen): add Kleinanzeigen (kleinanzeigen.de) (#21959)
* add Kleinanzeigen.de search feed * simplify route name * Update lib/routes/kleinanzeigen/search.ts Co-authored-by: Tony <TonyRL@users.noreply.github.com> * remove title cloning * include all images * improve description parsing ---------
1 parent 0788983 commit f29e5cc

7 files changed

Lines changed: 458 additions & 0 deletions

File tree

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import type { Namespace } from '@/types';
2+
3+
export const namespace: Namespace = {
4+
name: 'Kleinanzeigen',
5+
url: 'www.kleinanzeigen.de',
6+
lang: 'de',
7+
categories: ['shopping'],
8+
description: 'Kleinanzeigen is a german marketplace for selling and buying locally, similar to Facebook Marketplace.',
9+
};

lib/routes/kleinanzeigen/search.ts

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import type { Context } from 'hono';
2+
3+
import type { Data, Route } from '@/types';
4+
5+
import { search } from './utils/search';
6+
7+
export const route: Route = {
8+
path: '/search/:routeParams',
9+
categories: ['shopping'],
10+
example: '/kleinanzeigen/search/category=PCs&location=Berlin&radius=20',
11+
parameters: {
12+
routeParams: 'Extra parameters, see the table below',
13+
},
14+
description: `::: tip
15+
Parameter
16+
17+
| Name | Description | Default |
18+
| --------------- | ------------------------------------------------------------------------- | ------------- |
19+
| query | Search Query | undefined |
20+
| category | Category (as named on Kleinanzeigen) | undefined |
21+
| categoryId | Category ID (advanced) | undefined |
22+
| location | Location (as named on Kleinanzeigen) | undefined |
23+
| locationId | Location ID (advanced) | undefined |
24+
| radius | Radius in KM around the Location | 0 |
25+
| sortingField | Order of the Products (SORTING\\_DATE, PRICE\\_AMOUNT, PRICE\\_AMOUNT\\_DESC) | SORTING\\_DATE |
26+
| minPrice | minimal Price | undefined |
27+
| maxPrice | maximal Price | undefined |
28+
| shippingCarrier | Shipping Carrier (e.g. DHL, HERMES) | undefined |
29+
30+
:::`,
31+
features: {
32+
requireConfig: false,
33+
requirePuppeteer: false,
34+
antiCrawler: false,
35+
supportBT: false,
36+
supportPodcast: false,
37+
supportScihub: false,
38+
},
39+
radar: [],
40+
name: 'Search',
41+
maintainers: ['LunyaaDev'],
42+
handler,
43+
};
44+
45+
function handler(ctx: Context): Promise<Data> {
46+
const { routeParams } = ctx.req.param();
47+
const params = new URLSearchParams(routeParams);
48+
49+
return search({
50+
query: params.get('query') || undefined,
51+
category: params.get('category') || undefined,
52+
categoryId: params.get('categoryId') || undefined,
53+
location: params.get('location') || undefined,
54+
locationId: params.get('locationId') || undefined,
55+
radius: params.get('radius') || undefined,
56+
sortingField: params.get('sortingField') || undefined,
57+
minPrice: params.get('minPrice') || undefined,
58+
maxPrice: params.get('maxPrice') || undefined,
59+
shippingCarrier: params.get('shippingCarrier') || undefined,
60+
});
61+
}
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
import { load } from 'cheerio';
2+
import sanitize from 'sanitize-html';
3+
4+
import type { DataItem } from '@/types';
5+
import cache from '@/utils/cache';
6+
import ofetch from '@/utils/ofetch';
7+
8+
/**
9+
* Parse item infos about a product page
10+
* @param url url of the product page
11+
* @returns
12+
*/
13+
export const getProductPage = (url: string): Promise<DataItem> =>
14+
cache.tryGet(url, async () => {
15+
const response = await ofetch(url);
16+
const $ = load(response);
17+
18+
const product = $('#viewad-product');
19+
const sellerProfile = $('#viewad-profile-box');
20+
21+
const title = product
22+
.find('#viewad-title')
23+
.find('.is-hidden') // Find all elements with class 'is-hidden'
24+
.remove() // Remove them
25+
.end() // Go back to the h1
26+
.text() // Get the text
27+
.trim();
28+
29+
// price of the product
30+
const price =
31+
product.find('.boxedarticle--price').text().trim() + // price
32+
' ' +
33+
product.find('.boxedarticle--details--shipping').text().trim(); // shipping price
34+
35+
// address of the product
36+
const address = product.find('[itemprop="address"]').text().trim();
37+
38+
// description of the product
39+
const descriptionHtml = product.find('[itemprop="description"]').html() || '';
40+
const description = sanitize(descriptionHtml, {
41+
allowedTags: ['br'],
42+
});
43+
44+
// get images from page
45+
const images = product
46+
.find('#viewad-image')
47+
.toArray()
48+
.map((img) => ({
49+
src: $(img).attr('src'),
50+
alt: $(img).attr('alt'),
51+
}));
52+
53+
// create html for images
54+
const imagesHtml = images
55+
.filter((img) => img.src)
56+
.map((img) => `<img src="${img.src}" alt="${img.alt}" />`)
57+
.join('<br>');
58+
59+
const category = [
60+
...$('.breadcrump .breadcrump-link')
61+
.toArray()
62+
.slice(1)
63+
.map((x) => $(x).text().trim()),
64+
product
65+
.find('.addetailslist--detail')
66+
.filter((i, el) => $(el).text().includes('Art'))
67+
.find('.addetailslist--detail--value')
68+
.text()
69+
.trim(),
70+
].join(' > ');
71+
72+
return {
73+
title,
74+
link: url,
75+
description: `${price}<br>${address}<br><br>${description}<br>${imagesHtml}<br>`,
76+
author: [
77+
{
78+
name: sellerProfile.find('.userprofile-vip a').text().trim(),
79+
url: sellerProfile.find('.userprofile-vip a').attr('href'),
80+
},
81+
],
82+
category: [category],
83+
};
84+
});
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import type { CheerioAPI } from 'cheerio';
2+
3+
import type { DataItem } from '@/types';
4+
5+
import { getProductPage } from './get-product-page';
6+
7+
/**
8+
* parse listing page to get product infos
9+
* @param $ CheerioAPI data
10+
* @returns
11+
*/
12+
export const parseListingPage = ($: CheerioAPI): Promise<DataItem[]> =>
13+
Promise.all(
14+
$('li.ad-listitem.fully-clickable-card')
15+
.not('.badge-topad')
16+
.toArray()
17+
.map((item) => {
18+
const $item = $(item);
19+
const article = $item.find('article').first();
20+
return getProductPage(`https://www.kleinanzeigen.de${article.attr('data-href')}`);
21+
})
22+
);
Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
/*
2+
3+
Script to extract categories from the Kleinanzeigen page:
4+
5+
const extractCategories = (data) => {
6+
return data.map(x => [
7+
[x[1].categoryName[1], x[1].id[1]],
8+
...extractCategories(x[1].children[1]).flat()
9+
])
10+
}
11+
12+
document.querySelectorAll('astro-island').forEach(x => {
13+
const propsString = x.getAttribute('props')
14+
const props = JSON.parse(propsString)
15+
if(props.categories) {
16+
console.log(props.categories[1])
17+
const data = extractCategories(props.categories[1])
18+
console.log(JSON.stringify(Object.fromEntries(data.flat()), null, 2))
19+
}
20+
})
21+
*/
22+
const category2Id = {
23+
'Alle Kategorien': 0,
24+
'Auto, Rad & Boot': 289,
25+
Autos: 216,
26+
'Autoteile & Reifen': 223,
27+
'Boote & Bootszubehör': 211,
28+
'Fahrräder & Zubehör': 217,
29+
'Motorräder & Motorroller': 305,
30+
'Motorradteile & Zubehör': 306,
31+
'Nutzfahrzeuge & Anhänger': 276,
32+
'Reparaturen & Dienstleistungen': 280,
33+
'Wohnwagen & -mobile': 220,
34+
'Weiteres Auto, Rad & Boot': 241,
35+
Dienstleistungen: 297,
36+
Altenpflege: 236,
37+
'Babysitter/-in & Kinderbetreuung': 237,
38+
Elektronik: 161,
39+
'Haus & Garten': 80,
40+
'Künstler/-in & Musiker/-in': 191,
41+
'Reise & Event': 294,
42+
'Tierbetreuung & Training': 133,
43+
'Umzug & Transport': 238,
44+
'Weitere Dienstleistungen': 298,
45+
'Eintrittskarten & Tickets': 231,
46+
'Bahn & ÖPNV': 286,
47+
'Comedy & Kabarett': 254,
48+
Gutscheine: 287,
49+
Kinder: 252,
50+
Konzerte: 255,
51+
Sport: 257,
52+
'Theater & Musical': 251,
53+
'Weitere Eintrittskarten & Tickets': 256,
54+
'Audio & Hifi': 172,
55+
'Dienstleistungen Elektronik': 226,
56+
Foto: 245,
57+
'Handy & Telefon': 173,
58+
Haushaltsgeräte: 176,
59+
Konsolen: 279,
60+
Notebooks: 278,
61+
PCs: 228,
62+
'PC-Zubehör & Software': 225,
63+
'Tablets & Reader': 285,
64+
'TV & Video': 175,
65+
Videospiele: 227,
66+
Wearables: 405,
67+
'Wearables Zubehör': 406,
68+
'Weitere Elektronik': 168,
69+
'Familie, Kind & Baby': 17,
70+
'Baby- & Kinderkleidung': 22,
71+
'Baby- & Kinderschuhe': 19,
72+
'Baby-Ausstattung': 258,
73+
'Babyschalen & Kindersitze': 21,
74+
'Kinderwagen & Buggys': 25,
75+
Kinderzimmermöbel: 20,
76+
Spielzeug: 23,
77+
'Weiteres Familie, Kind & Baby': 18,
78+
'Freizeit, Hobby & Nachbarschaft': 185,
79+
'Esoterik & Spirituelles': 265,
80+
'Essen & Trinken': 248,
81+
Freizeitaktivitäten: 187,
82+
'Handarbeit, Basteln & Kunsthandwerk': 282,
83+
'Kunst & Antiquitäten': 240,
84+
Modellbau: 249,
85+
'Reise & Eventservices': 233,
86+
Sammeln: 234,
87+
'Sport & Camping': 230,
88+
Trödel: 250,
89+
'Verloren & Gefunden': 189,
90+
'Weiteres Freizeit, Hobby & Nachbarschaft': 242,
91+
Badezimmer: 91,
92+
Büro: 93,
93+
Dekoration: 246,
94+
'Dienstleistungen Haus & Garten': 239,
95+
'Gartenzubehör & Pflanzen': 89,
96+
Heimtextilien: 90,
97+
Heimwerken: 84,
98+
'Küche & Esszimmer': 86,
99+
'Lampen & Licht': 82,
100+
Schlafzimmer: 81,
101+
Wohnzimmer: 88,
102+
'Weiteres Haus & Garten': 87,
103+
Haustiere: 130,
104+
Fische: 138,
105+
Hunde: 134,
106+
Katzen: 136,
107+
Kleintiere: 132,
108+
Nutztiere: 135,
109+
Pferde: 139,
110+
'Vermisste Tiere': 283,
111+
Vögel: 243,
112+
Zubehör: 313,
113+
Immobilien: 195,
114+
'Auf Zeit & WG': 199,
115+
Container: 402,
116+
Eigentumswohnungen: 196,
117+
'Ferien- & Auslandsimmobilien': 275,
118+
'Garagen & Stellplätze': 197,
119+
Gewerbeimmobilien: 277,
120+
'Grundstücke & Gärten': 207,
121+
'Häuser zum Kauf': 208,
122+
'Häuser zur Miete': 205,
123+
Mietwohnungen: 203,
124+
Neubauprojekte: 403,
125+
'Weitere Immobilien': 198,
126+
Jobs: 102,
127+
Ausbildung: 118,
128+
'Bau, Handwerk & Produktion': 111,
129+
'Büroarbeit & Verwaltung': 114,
130+
'Gastronomie & Tourismus': 110,
131+
'Kundenservice & Call Center': 105,
132+
'Mini- & Nebenjobs': 107,
133+
Praktika: 125,
134+
'Sozialer Sektor & Pflege': 123,
135+
'Transport, Logistik & Verkehr': 247,
136+
'Vertrieb, Einkauf & Verkauf': 117,
137+
'Weitere Jobs': 109,
138+
'Mode & Beauty': 153,
139+
'Beauty & Gesundheit': 269,
140+
Damenbekleidung: 154,
141+
Damenschuhe: 159,
142+
Herrenbekleidung: 160,
143+
Herrenschuhe: 158,
144+
'Taschen & Accessoires': 156,
145+
'Uhren & Schmuck': 157,
146+
'Weiteres Mode & Beauty': 155,
147+
'Musik, Filme & Bücher': 73,
148+
'Bücher & Zeitschriften': 76,
149+
'Büro & Schreibwaren': 281,
150+
Comics: 284,
151+
'Fachbücher, Schule & Studium': 77,
152+
'Film & DVD': 79,
153+
'Musik & CDs': 78,
154+
Musikinstrumente: 74,
155+
'Weitere Musik, Filme & Bücher': 75,
156+
Nachbarschaftshilfe: 401,
157+
'Unterricht & Kurse': 235,
158+
Computerkurse: 260,
159+
'Kochen & Backen': 263,
160+
'Kunst & Gestaltung': 264,
161+
'Musik & Gesang': 262,
162+
Nachhilfe: 268,
163+
Sportkurse: 261,
164+
Sprachkurse: 271,
165+
Tanzkurse: 267,
166+
Weiterbildung: 266,
167+
'Weitere Unterricht & Kurse ': 270,
168+
'Verschenken & Tauschen': 272,
169+
Tauschen: 273,
170+
Verleihen: 274,
171+
Verschenken: 192,
172+
};
173+
174+
/**
175+
* Resolve the Category string to categoryId
176+
* @param category: string category string as entered into the Kleinanzeigen search
177+
* @returns
178+
*/
179+
export const resolveCategory = (category: string): number => category2Id[category] || 0;
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import cache from '@/utils/cache';
2+
import ofetch from '@/utils/ofetch';
3+
4+
/**
5+
* Resolve the Location string to locationId
6+
* @param location location string as entered into the Kleinanzeigen search
7+
* @returns
8+
*/
9+
export const resolveLocation = async (location: string) => {
10+
const url = new URL('https://www.kleinanzeigen.de/s-ort-empfehlungen.json');
11+
url.searchParams.append('query', location);
12+
13+
// get url as string
14+
const urlString = url.toString();
15+
16+
// fetch location recommendations
17+
const res = await cache.tryGet(urlString, async () => await ofetch<Record<`_${number}`, string>>(urlString));
18+
19+
// find searched location and return it or null
20+
const locationEntry = Object.entries(res).findLast((x) => x[1] === location);
21+
if (!locationEntry) {
22+
return null;
23+
}
24+
25+
return Number.parseInt(locationEntry[0].slice(1));
26+
};

0 commit comments

Comments
 (0)