In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import json
import ast
import traceback

def safe_get(url, max_retry=2):
    """driver.get() 재시도 래퍼 함수"""
    for attempt in range(max_retry):
        try:
            driver.get(url)
            time.sleep(2)
            return True
        except Exception as e:
            print(f"[GET 실패] {url} | {attempt+1}회 재시도: {e}")
            time.sleep(5)
    return False

# 브라우저 실행 및 로그인
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 10)

safe_get("https://portal.dankook.ac.kr/p/S01/")

USER_ID = input("학번: ")
USER_PW = input("비밀번호: ")
driver.find_element(By.ID, "user_id").send_keys(USER_ID)
driver.find_element(By.ID, "user_password").send_keys(USER_PW)
driver.find_element(By.ID, "user_password").send_keys(Keys.RETURN)
time.sleep(2)

# VOC Q&A 메뉴 진입
safe_get("https://voc.dankook.ac.kr/tiad/menu?redirectUrl=vocm/findVocList.do")

results = []

# 전체 페이지 크롤링
for page in range(1, 2786): 
    print(f"\n📄 {page}페이지 처리 중...")
    if not safe_get(f"https://voc.dankook.ac.kr/tiad/vocm/findVocList.do?pagination.currentPageNo={page}"):
        print(f"페이지 {page} 접근 실패 → 건너뜀")
        continue

    rows = driver.find_elements(By.CSS_SELECTOR, "table tbody tr")

    for i in range(len(rows)):
        try:
            rows = driver.find_elements(By.CSS_SELECTOR, "table tbody tr")
            row = rows[i]
            tds = row.find_elements(By.TAG_NAME, "td")

            if len(tds) < 5:
                continue

            campus = tds[1].text.strip()
            if campus != "죽전":
                continue

            status = tds[4].text.strip()
            if status != "완료":
                continue

            category = tds[2].text.strip()
            title_cell = tds[3]
            a_tag = title_cell.find_element(By.TAG_NAME, "a")
            title_text = a_tag.text.strip()

            data_params_raw = a_tag.get_attribute("data-params")
            data_params = ast.literal_eval(data_params_raw)
            recv_seq = data_params.get("recvSeq")
            if not recv_seq:
                continue

            detail_url = f"https://voc.dankook.ac.kr/tiad/vocm/findVocDetail.do?recvSeq={recv_seq}"
            print(f"\n접속 시도: {detail_url}")
            print(f"캠퍼스: {campus} | 📂 분류: {category}")

            if not safe_get(detail_url):
                print("상세 페이지 접속 실패 → 건너뜀")
                continue

            # 질문 본문 크롤링
            try:
                question_pre = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.vocCont > pre")))
                question_detail = question_pre.text.strip()
                question = f"{title_text} - {question_detail}"
            except:
                print("질문 본문 없음 → 건너뜀")
                driver.back()
                time.sleep(2)
                continue

            # 답변 <pre> 크롤링
            wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "ul.reply_wrap li div.cont pre")))
            reply_blocks = driver.find_elements(By.CSS_SELECTOR, "ul.reply_wrap > li")
            target_answer = None

            for block in reversed(reply_blocks):
                try:
                    pre = block.find_element(By.CSS_SELECTOR, "div.cont > pre")
                    if pre and pre.text.strip():
                        target_answer = pre.text.strip()
                        break
                except:
                    continue

            if not target_answer:
                print("답변 없음 → 건너뜀")
                driver.back()
                time.sleep(2)
                continue

            results.append({
                "campus": campus,
                "category": category,
                "question": question,
                "answer": target_answer
            })

            print(f"저장됨: {question[:30]}...")
            driver.back()
            time.sleep(2)

        except Exception as e:
            print("상세 크롤링 실패: 에러 정보 ↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓")
            traceback.print_exc()
            try:
                driver.back()
                time.sleep(2)
            except:
                pass
            continue

    if page % 100 == 0:
        with open(f"voc_qna_page{page}_죽전.json", "w", encoding="utf-8") as f:
            json.dump(results, f, ensure_ascii=False, indent=2)
        print(f"{page}페이지까지 저장 완료")

# 최종 저장
with open("voc_qna_죽전.json", "w", encoding="utf-8") as f:
    json.dump(results, f, ensure_ascii=False, indent=2)

print(f"\n전체 크롤링 완료! 총 {len(results)}개 저장됨 → voc_qna_죽전.json")

# 브라우저 닫기
driver.quit()

파일 날라가서 코드만 다시 가져오고 다시 실행하기엔 너무 오래 걸려서 출력 결과는 없어요 ㅠㅠ