In [1]:
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import NoAlertPresentException
import pandas as pd
from datetime import datetime
import math
import re
import os
import csv

In [2]:
# 날짜 형식을 변환하는 함수 정의
def convert_date_format(date_str):
    pattern = r"(\d{4})(\d{2})(\d{2})"
    return re.sub(pattern, r"\1-\2-\3", date_str)

def load_and_preprocess_data(filepath):
    data = pd.read_excel(filepath)
    print("케이스의 수 : ", len(data))
    
    # 성별 전처리
    data.loc[data.iloc[:, 2] == '남', data.columns[2]] = 'M'
    data.loc[data.iloc[:, 2] == '여', data.columns[2]] = 'F'
    
    # NAME 텍스트 변환
    data['NAME'] = data['NAME'].astype(str)
    
   
    # 생년월일 전처리
    data['BIRTHDAY'] = data['BIRTHDAY'].astype(str)
    data['BIRTHDAY'] = data['BIRTHDAY'].apply(convert_date_format)
    data['BIRTHDAY'] = pd.to_datetime(data['BIRTHDAY'],format='%Y-%m-%d')
    
    # 검사일 컬럼 추가 및 오늘 날짜로 설정
    data.insert(5,'검사일', pd.to_datetime(datetime.today().strftime('%Y-%m-%d')))
    
    
    return data

In [3]:
def create_usertestingNo_list(driver, data):
    # 검사 실시 화면으로 전환
    window_handles = driver.window_handles
    print(window_handles)
    window_handle = window_handles[1]
    driver.switch_to.window(window_handle)
    print(window_handle)
        
    usertestingNo_list = []    
    
      # 파일이 존재하지 않으면 헤더 추가
    current_dir = os.getcwd()
    file_path = os.path.join(current_dir, 'usertestingNo_list.csv')
    if not os.path.exists(file_path):
        with open(file_path, mode='w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(["Row Number", "User Testing No"])  # 헤더 작성
    
    for row in range(len(data)):
        RESPONE = data.iloc[row, :].to_list()
        RESPONE = ["" if (isinstance(value, float) and math.isnan(value)) else value for value in RESPONE]
        time.sleep(1)   
            
        try:
            # 인적사항 입력
            print("실행 행 : ", row+1)
            print(RESPONE[1])
            
            #이름
            name_box = driver.find_element(By.ID, 'name')
            name_box.send_keys(RESPONE[1])
            
            # 성별 선택 - 라벨 클릭 방식
            label_index = 1 if RESPONE[2] == 'M' else 2
            radio_label = driver.find_element(By.XPATH, '//*[@id="firstTable"]/tbody/tr[5]/td/label[{}]'.format(label_index))
            radio_label.click()
        
            
            # 생년월일 클릭
            birthday_box = driver.find_element(By.ID, 'birthday')
            birthday_box.click()

            # 년 드롭다운 요소 선택
            dropdown_year = driver.find_element(By.CLASS_NAME, 'ui-datepicker-year')
            dropdown_year = Select(dropdown_year) # Select 객체 생성
            dropdown_year.select_by_visible_text("{}".format(RESPONE[4].strftime('%Y'))) # 옵션 선택

            # 월 드롭다운 요소 선택
            dropdown_month = driver.find_element(By.CLASS_NAME, 'ui-datepicker-month')
            dropdown_month = Select(dropdown_month) # Select 객체 생성
            dropdown_month.select_by_visible_text("{}".format(RESPONE[4].strftime('%#m월'))) # 옵션 선택

            # 일 클릭
            day_element = driver.find_element(By.XPATH, "//a[@class='ui-state-default' and text()='{}']".format(RESPONE[4].strftime('%#d')))
            day_element.click()


            
            # 지역
            dropdown_element = driver.find_element(By.ID, 'atRegionCd') # 드롭다운 요소 선택 
            dropdown = Select(dropdown_element) # Select 객체 생성
            dropdown.select_by_visible_text("{}".format(RESPONE[3]))
            
            # 다음
            next_box = driver.find_element(By.ID, 'submitBtn')
            next_box.click()
            time.sleep(1)
            
            # 검사실시
            next_box = driver.find_element(By.ID, 'formSubmitBtn')
            next_box.click()
            time.sleep(1)
            
            
            # 문항입력
            for Q_num in range(0, len(RESPONE[6:])):
                choice = '//*[@id="test-area"]/div/ul/li[{0}]/div[2]/ul/li[{1}]'.format(2 * Q_num + 1, RESPONE[6:][Q_num])
                choice = driver.find_element(By.XPATH, choice)
                actions = ActionChains(driver)
                actions.move_to_element(choice).perform()
                choice.click()

            # 온코 리스트화
            element = driver.find_element(By.XPATH, "//*[@id='userTestingNo']")
            value = element.get_attribute("value")
            usertestingNo_list.append(value)
            
            # 온코 csv 파일 기록
            with open(file_path, mode='a', newline='') as file:
                writer = csv.writer(file)
                writer.writerow([row+1, value])

            # 제출
            next_box = driver.find_element(By.XPATH, '//*[@id="test-control"]/div/div[3]/ul')
            next_box.click()
            next_box = driver.find_element(By.XPATH, '//*[@id="modal3"]/div/div[2]/a[2]')
            next_box.click()            
            time.sleep(1)
            next_box = driver.find_element(By.XPATH, '/html/body/div/div/div/div/div[3]/a[1]')
            next_box.click()
            time.sleep(1)
            driver.switch_to.alert.accept()
            time.sleep(1)
            
        except Exception as e:
            print(f"{row}행에서 오류 발생 : {e}. 코드 실행 종료.")
            raise
    
    return usertestingNo_list

In [4]:
def save_report(driver, usertestingNo_list):
    for onco in usertestingNo_list:
        # 결과보기 리스트업 화면 전환(초기 화면)
        window_handles = driver.window_handles
        window_handle = window_handles[0]
        driver.switch_to.window(window_handle)

        # 해당 온코의 결과보고서 팝업
        box = driver.find_element(By.XPATH, f'//*[@id="{onco}"]')
        actions = ActionChains(driver)
        actions.move_to_element(box).perform()
        box.click()

        # 해당 온코의 결과보고서 핸들 가져오기
        window_handles = driver.window_handles
        window_handle = window_handles[-1]
        driver.switch_to.window(window_handle)

        time.sleep(3)
        # 저장 버튼
        save_button = driver.find_element(By.XPATH, '//*[@id="OZViewer"]/div[1]/input[1]')
        save_button.click()

        # html 형식으로 저장(드롭다운)
        dropdown_element = driver.find_element(By.XPATH, '//*[@id="oztab_exportdlg_filetype"]')
        dropdown = Select(dropdown_element)
        dropdown.select_by_visible_text("Web Page(*.html)")

        # 저장 클릭
        save_confirm = driver.find_element(By.XPATH, '/html/body/div[2]/div[3]/div/button[2]/span')
        save_confirm.click()

<span style="font-size:20px;">**첫번째, driver=initialize_driver() 실행**</span>

로그인->자동응답하고자 하는 검사 검사 선택->검사 실시 초기화면(ex 버전선택 등)

<img src="attachment:3efd778d-fbef-46d7-9771-cb9154c5f0e6.png" alt="description" width="500" height="300">

In [5]:
pip install webdriver-manager

Note: you may need to restart the kernel to use updated packages.


In [6]:
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service

# ChromeDriver를 자동으로 업데이트하고 실행
def start_browser():
    # ChromeDriverManager를 통해 ChromeDriver 설치 및 관리
    service = Service(ChromeDriverManager().install())
    
    # Chrome 웹 드라이버를 설정
    options = webdriver.ChromeOptions()
    driver = webdriver.Chrome(service=service, options=options)
    
    return driver

# 예시: 브라우저 열기
driver = start_browser()
driver.get('https://inpsyt.co.kr/psy/onlineCodeSum/list')

<span style="font-size:20px;">**두번째, data=load_and_preprocess_data 실행**</span>

케이스가 담겨 있는 엑셀 파일 불러오기(경로 확인 필수), 전처리

In [7]:
data=load_and_preprocess_data("C:/Users/USER/Desktop/Inspection/PASHE.xlsx")
pd.set_option('display.max_rows', 20)   # 항상 10개만 출력
pd.set_option('display.max_columns', 20)
data

케이스의 수 :  228


Unnamed: 0.1,Unnamed: 0,NAME,SEX,REGION,BIRTHDAY,검사일,Q1,Q2,Q3,Q4,...,Q241,Q242,Q243,Q244,Q245,Q246,Q247,Q248,Q249,Q250
0,V1.0,C1,M,경기도,2005-03-12,2025-06-17,1,1,1,3,...,3,1,3,3,3,3,3,3,2,3
1,V1.0,C4,M,인천광역시,2001-03-06,2025-06-17,2,2,2,3,...,2,3,3,1,3,3,1,1,1,2
2,V1.0,C6,M,부산광역시,2004-10-21,2025-06-17,2,1,2,2,...,2,1,1,3,1,3,3,2,2,2
3,V1.0,C12,M,서울특별시,2001-11-11,2025-06-17,2,2,2,3,...,2,3,3,1,3,3,1,1,1,2
4,V1.0,C13,M,경상남도,2007-05-26,2025-06-17,1,2,3,2,...,2,1,3,1,2,1,2,2,1,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
223,V1.0,C39,M,인천광역시,2002-10-06,2025-06-17,3,1,1,2,...,3,2,3,3,3,1,3,1,1,1
224,V1.0,C151,M,경기도,1995-06-18,2025-06-17,1,3,3,2,...,2,1,3,2,1,2,3,1,3,3
225,V1.0,C152,M,광주광역시,1998-11-03,2025-06-17,3,1,3,1,...,1,1,1,3,3,2,3,2,2,2
226,V1.0,C153,M,대구광역시,1997-08-15,2025-06-17,1,1,3,3,...,2,2,1,1,3,1,2,3,3,3


<span style="font-size:20px;">**세번째, usertestingNo_list=create_usertestingNo_list(driver, data) 실행**</span>

케이스 엑셀 파일로 자동응답 실시하면서 해당 usertesting no 리스트화

In [8]:
usertestingNo_list=create_usertestingNo_list(driver, data)

['D4A62A89F982909EF626555CA153DCB5', 'EBAC837D014F124E86915F46A6D6AFED']
EBAC837D014F124E86915F46A6D6AFED
실행 행 :  1
C1
실행 행 :  2
C4
실행 행 :  3
C6
실행 행 :  4
C12
실행 행 :  5
C13
실행 행 :  6
C16
실행 행 :  7
C17
실행 행 :  8
C19
실행 행 :  9
C21
실행 행 :  10
C22
실행 행 :  11
C23
실행 행 :  12
C24
실행 행 :  13
C25
실행 행 :  14
C26
실행 행 :  15
C34
실행 행 :  16
C35
실행 행 :  17
C45
실행 행 :  18
C48
실행 행 :  19
C58
실행 행 :  20
C59
실행 행 :  21
C63
실행 행 :  22
C64
실행 행 :  23
C66
실행 행 :  24
C68
실행 행 :  25
C70
실행 행 :  26
C74
실행 행 :  27
C75
실행 행 :  28
C77
실행 행 :  29
C78
실행 행 :  30
C80
실행 행 :  31
C81
실행 행 :  32
C83
실행 행 :  33
C84
실행 행 :  34
C86
실행 행 :  35
C88
실행 행 :  36
C89
실행 행 :  37
C90
실행 행 :  38
C92
실행 행 :  39
C93
실행 행 :  40
C97
실행 행 :  41
C98
실행 행 :  42
C99
실행 행 :  43
C100
실행 행 :  44
C104
실행 행 :  45
C105
실행 행 :  46
C106
실행 행 :  47
C107
실행 행 :  48
C110
실행 행 :  49
C111
실행 행 :  50
C115
실행 행 :  51
C119
실행 행 :  52
C122
실행 행 :  53
C123
실행 행 :  54
C124
실행 행 :  55
C125
실행 행 :  56
C126
실행 행 :  57
C128
실행 행 :  58
C129
실행 행 :  59
C132
실행 행

In [17]:
usertestingNo_list

['723f660af264402e9820',
 'a1ebbd9e4b1c45d9a950',
 '240503b9bb534fa2aaf3',
 '5c26789e8a324a5bb83a',
 '8b50058e07324d359f1c',
 '4625995eee004c9da1f8',
 'c3653c7c34fa45f28175',
 'd21d0eb05ed743378d13',
 '116e0818692342ecbcda',
 '310cfdf45ff84f38915b',
 '967da11de7ea4d8086ea',
 '3398a69cddc547eda971',
 '0af5efc4fa62485d912f',
 'feecee0659614217943e',
 '6a50315f3bfd44939721',
 '02c03090c554496e83e4',
 '4ee880d9654b4f90ab4b',
 '431ef6d4bcfa4e739f62',
 '5d05decac07548848cd0',
 'b69caea36cf2401d9c18',
 'b51155529d5a430ebb59',
 '33b800e7cf2b4f9f90f2',
 '2a9e3066745b460cb0b1',
 'cdf1409fba414b32b4e7',
 '39a37a83ad584bf3b88c',
 '2c56111f64d74d5daed8',
 '0698e205120c402599dc',
 '99d3a7564e3f4286ac06',
 'c7e250dc6b604e0a98e8',
 '44804156b83c4baea364',
 '0e19d2077ec84765a101',
 '76b67575441a46c3839c',
 '5a4c141750ee4dedace0',
 'c29d9dabfb624e789882',
 '39f9609f0cd74da38c85',
 '63e142c7dea5447588ff',
 'cd07260adae44e97a15a',
 '9d3cfe3f92e5470b9b21',
 'd2aad96281334abd9458',
 'aeb59014bf9e4104b4a5',


In [18]:
# 중단되었을 때
import os

In [19]:
os.getcwd()

'C:\\Users\\user\\Desktop\\Inspection'

<span style="font-size:20px;">**네번째, save_report(driver, usertestingNo_list) 실행**</span>

<span style="color:red;">저장하고자 하는 검사 결과 목록 화면에서 목록 개수 최대화 하고 대기

자동응답 실시한 usertesting no 별로 결과보고서 html 형식으로 저장
    
<img src="attachment:e44aaa4f-0cf8-4579-b73f-c67e07eb2f48.png" alt="description" width="500" height="300">

In [20]:
save_report(driver, usertestingNo_list)

<span style="font-size:20px;">**다섯번째, 하단 셀 실행**</span>

<span style="color:red;">다운받아진 결과보고서(html) 모두 result_inspection 폴더에 이동

저장한 결과보고서를 크롤링하여 엑셀화
    
결과보고서가 어디에 저장되어 있는지 <span style="color:red;">directory = 'C:/Users/USER/Desktop/result_inspection'</span>  
크롤링한 엑셀을 어디에 저장할지 <span style="color:red;">excel_path = 'C:/Users/USER/Desktop/result_inspection/PASHE_OZ_DATA.xlsx'</span>  
어떤 검사를 크롤링할지 <span style="color:red;">html_pattern = '*PASHE*.html'</span>  
건들지 마세요 <span style="color:red;">xpath = '//*[starts-with(@id, "f_") and contains(@id, "s0")]//nobr'</span>  
    
잘 설정되어 있는지 확인

In [21]:
import selenium
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
import os
import glob
import pandas as pd
from selenium.webdriver.common.by import By

def find_html_files(directory, pattern):
    search_pattern = os.path.join(directory, pattern)
    html_files = glob.glob(search_pattern)
    re_html_files = [html_file.replace("\\", "/") for html_file in html_files]
    return re_html_files

def extract_data_from_html(html_file_path, driver, xpath):
    driver.get(html_file_path)
    elements = driver.find_elements(By.XPATH, xpath)
    if elements:
        return [element.text for element in elements]
    else:
        return []

def save_to_excel(dataframe, excel_path):
    dataframe.to_excel(excel_path, index=False)

def main(directory, excel_path, html_pattern, xpath):
    service = Service(ChromeDriverManager().install())
    # 브라우저 옵션
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')  # 브라우저를 백그라운드에서 실행
    driver = webdriver.Chrome(service=service, options=options)
    
    # HTML 파일 찾기
    html_files = find_html_files(directory, html_pattern)
    
    # 결과를 저장할 빈 데이터프레임
    result_df = pd.DataFrame()

    # 각 HTML 파일에서 데이터 추출
    for html_file_path in html_files:
        print(f"Processing file: {html_file_path}")
        values = extract_data_from_html('file:///' + html_file_path, driver, xpath)
        if values:
            df = pd.DataFrame([values])
            result_df = pd.concat([result_df, df], axis=0, ignore_index=True)
        else:
            print(f"No elements found in {html_file_path} with the given XPath.")
    
    # 브라우저 종료
    driver.quit()
    
    # 결과를 엑셀 파일로 저장
    save_to_excel(result_df, excel_path)
    print(f"Data saved to {excel_path}")

if __name__ == "__main__":
    # 메인 함수 인자 설정
    directory = 'C:/Users/USER/Desktop/result_inspection'
    excel_path = 'C:/Users/USER/Desktop/result_inspection/PASHE_OZ_DATA.xlsx'
    html_pattern = '*PASHE*.html'
    xpath = '//*[starts-with(@id, "f_") and contains(@id, "s0")]//nobr'
    
    main(directory, excel_path, html_pattern, xpath)

Processing file: C:/Users/USER/Desktop/result_inspection/PASHE (1).html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE (10).html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE (100).html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE (11).html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE (12).html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE (13).html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE (14).html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE (15).html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE (16).html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE (17).html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE (18).html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE (19).html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE (2).html
Processing file: C:/Users/USER/Desktop/result_inspec

Processing file: C:/Users/USER/Desktop/result_inspection/PASHE - 2025-06-17T160703.595.html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE - 2025-06-17T160708.184.html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE - 2025-06-17T160712.836.html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE - 2025-06-17T160717.417.html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE - 2025-06-17T160722.036.html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE - 2025-06-17T160726.709.html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE - 2025-06-17T160731.323.html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE - 2025-06-17T160735.898.html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE - 2025-06-17T160740.557.html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE - 2025-06-17T160745.217.html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE - 2025-06-17T1607

Processing file: C:/Users/USER/Desktop/result_inspection/PASHE - 2025-06-17T161356.885.html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE - 2025-06-17T161401.486.html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE - 2025-06-17T161406.103.html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE - 2025-06-17T161410.675.html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE - 2025-06-17T161415.303.html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE - 2025-06-17T161419.949.html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE - 2025-06-17T161424.699.html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE - 2025-06-17T161429.256.html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE - 2025-06-17T161433.896.html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE - 2025-06-17T161438.583.html
Processing file: C:/Users/USER/Desktop/result_inspection/PASHE - 2025-06-17T1614