In [14]:
import time
import concurrent.futures
import threading
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import random


In [15]:
# 동기화를 위한 Lock 객체 생성
lock = threading.Lock()

all_problems = []

# 중복 체크를 위한 함수
def is_duplicate_problem(problems_list, link):
    return any(problem['link'] == link for problem in problems_list)

def extract_problem_data(html):
    soup = BeautifulSoup(html, 'html.parser')
    problem_rows = soup.select("tr")

    problems = []
    for row in problem_rows:
        try:
            title_elem = row.select_one("span.__Latex__")
            link_elem = row.find('a', href=True)
            difficulty_img = row.select_one("img")

            tags = []
            tag_elements = row.select("a.css-18la3yb")
            for tag_element in tag_elements:
                korean_tag_elem = tag_element.select_one("span.css-1rqtlpb")
                english_tag_elem = tag_element.select_one("span.css-1pecrf1")

                korean_tag = korean_tag_elem.text.strip() if korean_tag_elem else None
                english_tag = english_tag_elem.text.strip() if english_tag_elem else None

                if korean_tag and english_tag:
                    tags.append(f"#{korean_tag} ({english_tag})")
                elif korean_tag:
                    tags.append(f"#{korean_tag}")
                elif english_tag:
                    tags.append(f"#{english_tag}")

            solved_count_elem = row.select_one("div.css-1ujcjo0")
            avg_attempt_elem = row.select("div.css-1ujcjo0")

            title = title_elem.text.strip() if title_elem else "No Title"
            link = link_elem['href'] if link_elem else "No Link"
            solved_count = solved_count_elem.text.strip() if solved_count_elem else "No Data"
            avg_attempt = avg_attempt_elem[1].text.strip() if len(avg_attempt_elem) > 1 else "No Data"

            difficulty = difficulty_img['alt'] if difficulty_img else "No Difficulty"

            problems.append({
                "title": title,
                "tags": tags,
                "solved_count": solved_count,
                "average_attempts": avg_attempt,
                "link": link,
                "difficulty": difficulty
            })
        except Exception as e:
            print(f"문제 정보 추출 중 오류 발생: {e}")
            continue

    return problems

def load_problem_data(url):
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    local_driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

    try:
        local_driver.get(url)

        # 페이지가 로드될 때까지 기다림
        WebDriverWait(local_driver, 30).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, 'tr'))
        )

        # 스크롤을 최하단까지 진행하여 모든 요소 로드
        local_driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)  # 페이지 로딩을 위한 대기

        # 모든 버튼을 클릭하여 페이지 업데이트
        buttons = local_driver.find_elements(By.CLASS_NAME, 'css-gv0s7n')
        for button in buttons:
            try:
                local_driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", button)
                WebDriverWait(local_driver, 10).until(
                    EC.element_to_be_clickable(button)
                )
                button.click()
                time.sleep(2)  # 페이지 업데이트를 위한 대기
            except Exception as e:
                continue

        html = local_driver.page_source
        problems = extract_problem_data(html)

        with lock:
            for problem in problems:
                if not is_duplicate_problem(all_problems, problem['link']):
                    all_problems.append(problem)

    finally:
        local_driver.quit()

def load_problems_parallel():
    urls1 = [f"https://solved.ac/problems/level/7?page={i}" for i in range(1, 19)] # 레벨 7
    urls2 = [f"https://solved.ac/problems/level/8?page={i}" for i in range(1, 20)] # 레벨 8
    urls3 = [f"https://solved.ac/problems/level/9?page={i}" for i in range(1, 20)] # 레벨 9

    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        executor.map(load_problem_data, urls1)
        executor.map(load_problem_data, urls2)
        executor.map(load_problem_data, urls3)


In [10]:
load_problems_parallel()

버튼 클릭 실패: Message: 

버튼 클릭 실패: Message: 

버튼 클릭 실패: Message: 



In [11]:
print(f"총 문제 수: {len(all_problems)}")

총 문제 수: 151


In [12]:
if all_problems:
    random_problems = random.sample(all_problems, min(2, len(all_problems)))
    for idx, problem in enumerate(random_problems):
        print(f"문제 {idx + 1}")
        print(f"제목: {problem['title']}")
        print(f"태그: {', '.join(problem['tags'])}")
        print(f"푼 사람 수: {problem['solved_count']}")
        print(f"평균 시도: {problem['average_attempts']}")
        print(f"난이도: {problem['difficulty']}")  # 난이도 출력 추가
        print(f"링크: {problem['link']}\n")
else:
    print("문제를 찾을 수 없습니다.")

문제 1
제목: 올바른 배열
태그: 
푼 사람 수: 1,970
평균 시도: 2.11
난이도: Silver IV
링크: https://www.acmicpc.net/problem/1337

문제 2
제목: 스위치 켜고 끄기
태그: 
푼 사람 수: 13,201
평균 시도: 4.90
난이도: Silver IV
링크: https://www.acmicpc.net/problem/1244

