In [1]:
import json
import os

import tqdm

from parser import online_parser

In [2]:
DATA_DIR = 'data/'

# ✅Parse Kupus.ru

In [3]:
with open('parser/config/kupus.json', 'r') as file:
    kupus_config = json.loads(file.read())

kupus_parser = online_parser.OnlineParser(
    config=kupus_config
)

kupus_parse_topics = kupus_config['header_links']

In [4]:
parsed_topics = {}

for link in tqdm.tqdm(kupus_parse_topics):
    parsed_topics[link] = kupus_parser.parse_headers(link, deep=10)

100%|█████████████████████████████████████████████| 7/7 [00:40<00:00,  5.82s/it]


In [5]:
with open(os.path.join(DATA_DIR, 'kupus_headers.json'), 'w') as file:
    file.write(json.dumps(parsed_topics))

In [6]:
breakpoint = 0

for topic in tqdm.tqdm(list(parsed_topics.keys())[breakpoint:], desc='Processing topics'):
    try:
        comments = []
        for flud in tqdm.tqdm(parsed_topics[topic], leave=False, desc=f'Processing fluds in {topic}'):
            flud_link = parsed_topics[topic][flud][0]
            comments.append((flud_link, kupus_parser.parse_comments(flud_link, deep=10)))
        with open(os.path.join(DATA_DIR, f'kupus_comments/kupus_comments_{breakpoint}.json'), 'w') as file:
            file.write(json.dumps({topic: comments}))
        breakpoint += 1
    except Exception:
        print(breakpoint)
        breakpoint += 1

Processing topics:   0%|                                  | 0/6 [00:00<?, ?it/s]
Processing fluds in https://kupus.ru/forum/18-kreditnye-karty/:   0%| | 0/50 [00[A
Processing fluds in https://kupus.ru/forum/18-kreditnye-karty/:   2%| | 1/50 [00[A
Processing fluds in https://kupus.ru/forum/18-kreditnye-karty/:   4%| | 2/50 [00[A
Processing fluds in https://kupus.ru/forum/18-kreditnye-karty/:   6%| | 3/50 [00[A
Processing fluds in https://kupus.ru/forum/18-kreditnye-karty/:   8%| | 4/50 [00[A
Processing fluds in https://kupus.ru/forum/18-kreditnye-karty/:  10%| | 5/50 [00[A
Processing fluds in https://kupus.ru/forum/18-kreditnye-karty/:  12%| | 6/50 [00[A
Processing fluds in https://kupus.ru/forum/18-kreditnye-karty/:  14%|▏| 7/50 [00[A
Processing fluds in https://kupus.ru/forum/18-kreditnye-karty/:  16%|▏| 8/50 [01[A
Processing fluds in https://kupus.ru/forum/18-kreditnye-karty/:  18%|▏| 9/50 [01[A
Processing fluds in https://kupus.ru/forum/18-kreditnye-karty/:  20%|▏| 10/50 [

# ✅Parse Hranidengi.com

In [3]:
with open('parser/config/hranidengi.json', 'r') as file:
    hranidengi_config = json.loads(file.read())

hranidengi_parser = online_parser.OnlineParser(
    config=hranidengi_config
)

hranidengi_parse_topics = hranidengi_config['header_links']

In [4]:
parsed_topics = {}

for link in tqdm.tqdm(hranidengi_parse_topics):
    parsed_topics[link] = hranidengi_parser.parse_headers(link, deep=10)

100%|█████████████████████████████████████████████| 8/8 [00:28<00:00,  3.51s/it]


In [6]:
with open(os.path.join(DATA_DIR, 'hranidengi_headers.json'), 'w') as file:
    file.write(json.dumps(parsed_topics))

In [4]:
hranidengi_parser.parse_comments('https://hranidengi.com/threads/blokirovki-v-gazprombanke.1180/', deep=50)

<time class="u-dt" data-date-string="03.02.25" data-time="1738610640" data-time-string="22:24" datetime="2025-02-03T22:24:00+0300" dir="auto" itemprop="datePublished" title="03.02.25 в 22:24">03.02.25</time>
----------------------------------------------------------------------------------------------------------
ammo · 03.02.25 в 22:24


In [None]:
breakpoint = 0

for topic in tqdm.tqdm(list(parsed_topics.keys())[breakpoint:], desc='Processing topics'):
    try:
        comments = []
        for flud in tqdm.tqdm(parsed_topics[topic], leave=False, desc=f'Processing fluds in {topic}'):
            flud_link = parsed_topics[topic][flud][0]
            comments.append(hranidengi_parser.parse_comments(flud_link, deep=10))
        with open(os.path.join(DATA_DIR, f'hranidengi_comments/hranidengi_comments_{breakpoint}.json'), 'w') as file:
            file.write(json.dumps({flud_link: comments}))
        breakpoint += 1
    except Exception:
        print(breakpoint)
        breakpoint += 1

Processing topics:   0%|                                  | 0/8 [00:00<?, ?it/s]
Processing fluds in https://hranidengi.com/forums/konfliktnye-situacii/:   0%| |[A
Processing fluds in https://hranidengi.com/forums/konfliktnye-situacii/:   5%| |[A
Processing fluds in https://hranidengi.com/forums/konfliktnye-situacii/:   9%| |[A
Processing fluds in https://hranidengi.com/forums/konfliktnye-situacii/:  14%|▏|[A
Processing fluds in https://hranidengi.com/forums/konfliktnye-situacii/:  18%|▏|[A
Processing fluds in https://hranidengi.com/forums/konfliktnye-situacii/:  23%|▏|[A
Processing fluds in https://hranidengi.com/forums/konfliktnye-situacii/:  27%|▎|[A
Processing fluds in https://hranidengi.com/forums/konfliktnye-situacii/:  32%|▎|[A
Processing fluds in https://hranidengi.com/forums/konfliktnye-situacii/:  36%|▎|[A
Processing fluds in https://hranidengi.com/forums/konfliktnye-situacii/:  41%|▍|[A
Processing fluds in https://hranidengi.com/forums/konfliktnye-situacii/:  45%|▍

0



Processing fluds in https://hranidengi.com/forums/kreditnye-karty/:   0%| | 0/25[A
Processing fluds in https://hranidengi.com/forums/kreditnye-karty/:   4%| | 1/25[A
Processing fluds in https://hranidengi.com/forums/kreditnye-karty/:   8%| | 2/25[A
Processing topics:  25%|██████▌                   | 2/8 [00:45<02:03, 20.53s/it][A

1



Processing fluds in https://hranidengi.com/forums/debetovye-karty/:   0%| | 0/26[A
Processing topics:  38%|█████████▊                | 3/8 [00:51<01:08, 13.68s/it][A

2



Processing fluds in https://hranidengi.com/forums/kredity/:   0%| | 0/21 [00:00<[A
Processing fluds in https://hranidengi.com/forums/kredity/:   5%| | 1/21 [00:02<[A
Processing topics:  50%|█████████████             | 4/8 [00:58<00:45, 11.35s/it][A

3



Processing fluds in https://hranidengi.com/forums/kreditnaja-istorija/:   0%| | [A
Processing fluds in https://hranidengi.com/forums/kreditnaja-istorija/:  12%|▏| [A
Processing fluds in https://hranidengi.com/forums/kreditnaja-istorija/:  25%|▎| [A
Processing fluds in https://hranidengi.com/forums/kreditnaja-istorija/:  38%|▍| [A
Processing fluds in https://hranidengi.com/forums/kreditnaja-istorija/:  50%|▌| [A
Processing fluds in https://hranidengi.com/forums/kreditnaja-istorija/:  62%|▋| [A
Processing fluds in https://hranidengi.com/forums/kreditnaja-istorija/:  75%|▊| [A
Processing fluds in https://hranidengi.com/forums/kreditnaja-istorija/:  88%|▉| [A
Processing fluds in https://hranidengi.com/forums/kreditnaja-istorija/: 100%|█| [A
Processing topics:  62%|████████████████▎         | 5/8 [01:27<00:52, 17.59s/it][A
Processing fluds in https://hranidengi.com/forums/juridicheskaja-konsultacija/: [A
Processing fluds in https://hranidengi.com/forums/juridicheskaja-konsultaci

5



Processing fluds in https://hranidengi.com/forums/moshennicheskie-tranzakcii/:  [A
Processing fluds in https://hranidengi.com/forums/moshennicheskie-tranzakcii/:  [A
Processing fluds in https://hranidengi.com/forums/moshennicheskie-tranzakcii/:  [A
Processing fluds in https://hranidengi.com/forums/moshennicheskie-tranzakcii/:  [A
Processing fluds in https://hranidengi.com/forums/moshennicheskie-tranzakcii/:  [A
Processing fluds in https://hranidengi.com/forums/moshennicheskie-tranzakcii/:  [A
Processing fluds in https://hranidengi.com/forums/moshennicheskie-tranzakcii/:  [A
Processing fluds in https://hranidengi.com/forums/moshennicheskie-tranzakcii/:  [A
Processing fluds in https://hranidengi.com/forums/moshennicheskie-tranzakcii/:  [A
Processing fluds in https://hranidengi.com/forums/moshennicheskie-tranzakcii/:  [A
Processing topics:  88%|██████████████████████▊   | 7/8 [02:39<00:27, 27.96s/it][A

6



Processing fluds in https://hranidengi.com/forums/sekrety-i-lajfxaki.34/:   0%| [A
Processing fluds in https://hranidengi.com/forums/sekrety-i-lajfxaki.34/:   5%| [A
Processing fluds in https://hranidengi.com/forums/sekrety-i-lajfxaki.34/:  10%| [A
Processing fluds in https://hranidengi.com/forums/sekrety-i-lajfxaki.34/:  15%|▏[A
Processing fluds in https://hranidengi.com/forums/sekrety-i-lajfxaki.34/:  20%|▏[A