In [None]:
# Блок 1: Импортируем библиотеки
import aiohttp
import asyncio
import csv
import os
from aiohttp import ClientTimeout

In [None]:
# Блок 2: Функция для получения тредов
async def fetch_threads(session, board, max_threads_per_board):
    url = f"https://2ch.hk/{board}/catalog.json"
    try:
        async with session.get(url, ssl=False) as response:
            if response.status == 200:
                data = await response.json()
                return data.get("threads", [])[:max_threads_per_board]
    except Exception as e:
        print(f"Ошибка при запросе к {url}: {e}")
    return []

In [None]:
# Блок 3: Функция для получения постов
async def fetch_posts(session, board, thread_num):
    url = f"https://2ch.hk/{board}/res/{thread_num}.json"
    try:
        async with session.get(url, ssl=False) as response:
            if response.status == 200:
                data = await response.json()
                return data.get("threads", [])[0].get("posts", [])
    except Exception as e:
        print(f"Ошибка при запросе к {url}: {e}")
    return []

In [None]:
# Блок 4: Функция для сохранения данных
def save_to_csv(board, posts, file_prefix):
    os.makedirs("data", exist_ok=True)

    file_name = f"data/{file_prefix}_{board}.csv"  
    file_exists = os.path.isfile(file_name)

    with open(file_name, mode="a", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        if not file_exists:
            writer.writerow(["thread_num", "post_num", "name", "comment", "timestamp", "views"])

        for post in posts:
            writer.writerow([
                post.get("thread_num", ""), 
                post.get("num", ""), 
                post.get("name", ""), 
                post.get("comment", ""), 
                post.get("timestamp", ""), 
                post.get("views", "")
            ])

In [None]:
# Блок 5: Основная функция для сбора данных
async def collect_data(boards, file_prefix, max_threads_per_board=10000):
    timeout = ClientTimeout(total=30, connect=10, sock_read=10)
    async with aiohttp.ClientSession(timeout=timeout) as session:
        for board in boards:
            print(f"Сбор данных с раздела /{board}/...")
            threads = await fetch_threads(session, board, max_threads_per_board)
            if not threads:
                continue

            posts = []
            for thread in threads:
                thread_num = thread.get("no") or thread.get("num")
                if not thread_num:
                    continue

                thread_posts = await fetch_posts(session, board, thread_num)
                if thread_posts:
                    for post in thread_posts:
                        post["thread_num"] = thread_num
                        posts.append(post)

                if len(posts) >= 100:
                    save_to_csv(board, posts, file_prefix)
                    posts.clear()

            if posts:
                save_to_csv(board, posts, file_prefix)

In [None]:
# Блок 6: Запуск программы
def run_data_collection():
    boards = ["b", "po", "soc", "pr"]
    file_prefix = "2ch_data"
    asyncio.run(collect_data(boards, file_prefix, max_threads_per_board=10000))

In [None]:
# Блок 7: Запуск всей программы 
if __name__ == "__main__":
    boards = ["b", "po", "soc", "pr"]
    file_prefix = "2ch_data"
    await collect_data(boards, file_prefix, max_threads_per_board=10000)