In [1]:
import os
import re
import requests
import pandas as pd
from bs4 import BeautifulSoup
from openpyxl import load_workbook
from openpyxl.styles import Alignment

In [2]:
url = "https://www.acmicpc.net/step"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
}

response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")

In [3]:
class Step:
  def __init__(self, num=0, title="", description="", total=0, problems=None, link=""):
    self.num = num
    self.title = title
    self.description = description
    self.total = total
    self.problems = problems if problems is not None else []
    self.link = link
  def __str__(self):
    problems_str = "\n".join(map(str, self.problems))
    return f"[{self.num}] {self.title} ({self.total}문제)\n{self.description}\n{problems_str}\n{self.link}"
  
class Problem:
  def __init__(self, num=0, title="", link=""):
    self.num = num
    self.title = title
    self.link = link
  def __str__(self):
    return f"[{self.num}] {self.title} - {self.link}"

In [4]:
trs = soup.find("tbody").find_all("tr")

steps = []
for tr in trs:
  tds = tr.find_all("td")
  if len(tds) >= 5:
    num = int(tds[0].text)
    title = tds[1].text
    description = tds[2].text
    total = int(tds[4].text)
    link = "https://www.acmicpc.net" + tds[1].find("a")["href"]
    step_obj = Step(num, title, description, total, None, link)
    steps.append(step_obj)

In [5]:
for step in steps:
  step_response = requests.get(step.link, headers=headers)
  step_soup = BeautifulSoup(step_response.text, "html.parser")
  step_trs = step_soup.find("tbody").find_all("tr")

  for step_tr in step_trs:
    step_tds = step_tr.find_all("td")
    if len(step_tds) >= 5:
      num = int(step_tds[1].text)
      title = step_tds[2].text
      link = "https://www.acmicpc.net" + step_tds[2].find("a")["href"]
      problem_obj = Problem(num, title, link)
      step.problems.append(problem_obj)

In [6]:
max_step = max([step.num for step in steps])
step_numbers = [0]*(max_step+1)
step_problems = [[]]*(max_step+1)
for step in steps:
  step_numbers[step.num] = step.total
  step_problems[step.num] = [problem.num for problem in step.problems]
print(step_numbers)
print(step_problems)

[0, 13, 7, 12, 10, 11, 8, 4, 7, 6, 8, 7, 6, 11, 8, 9, 11, 5, 5, 7, 8, 16, 6, 5, 9, 7, 6, 6, 5, 16, 3, 7, 5, 9, 7, 4, 6, 10, 4, 10, 8, 8, 5, 8, 8, 6, 12, 6, 6, 8, 4, 11, 5, 5, 11, 7, 8]
[[], [2557, 1000, 1001, 10998, 1008, 10869, 10926, 18108, 10430, 2588, 11382, 10171, 10172], [1330, 9498, 2753, 14681, 2884, 2525, 2480], [2739, 10950, 8393, 25304, 25314, 15552, 11021, 11022, 2438, 2439, 10952, 10951], [10807, 10871, 10818, 2562, 10810, 10813, 5597, 3052, 10811, 1546], [27866, 2743, 9086, 11654, 11720, 10809, 2675, 1152, 2908, 5622, 11718], [25083, 3003, 2444, 10988, 1157, 2941, 1316, 25206], [2738, 2566, 10798, 2563], [2745, 11005, 2720, 2903, 2292, 1193, 2869], [5086, 2501, 9506, 1978, 2581, 11653], [27323, 1085, 3009, 15894, 9063, 10101, 5073, 14215], [24262, 24263, 24264, 24265, 24266, 24267, 24313], [2798, 2231, 19532, 1018, 1436, 2839], [2750, 2587, 25305, 2751, 10989, 1427, 11650, 11651, 1181, 10814, 18870], [10815, 14425, 7785, 1620, 10816, 1764, 1269, 11478], [1934, 13241, 1735

In [24]:
def extract(str1):
    match = re.search(r'\d+', str1)
    if match: return int(match.group())
    else: return -1

def find(str1):
    for idx, problems in enumerate(step_problems):
        if (extract(str1) in problems): return idx
    return -1

def file_count(item, step, lang, wrong, num = 0):
    cnt = 0
    for filename in sorted(os.listdir(os.path.join(item, step))):
        if os.path.isfile(os.path.join(item, step, filename)):
            if (filename.endswith(f".{lang}")):
                if (num == 0 or extract(filename) in step_problems[num]):
                    cnt += 1
                else:
                    wrong.setdefault(item, {}).setdefault(step, []).append(filename)
    return cnt

In [38]:
data = []
wrong = {}

for item in sorted(os.listdir('.')):
    if not (os.path.isdir(item) and item.startswith("BAEKJOON")):
        continue
    
    total = 0
    lang = item[9:-1]
    langData = {"lang": lang+" 언어"}
    
    for step in sorted(os.listdir(item)):
        if lang == "C": lang = "c"
        if lang == "C++": lang = "cpp"
        if lang == "Java": lang = "java"
        if lang == "Python": lang = "py"
        
        if step == "기타":
            cnt = file_count(item, step, lang, wrong)
            langData[step] = f"{cnt:3d} 개"
            total += cnt
        if not (os.path.isdir(os.path.join(item, step)) and step.startswith("step")): continue
        
        num = int(step.split()[1])
        cnt = file_count(item, step, lang, wrong, num)
        remain = step_numbers[num] - cnt
        
        if (remain == 0): langData[step] = (f"{cnt:02d} / {step_numbers[num]:02d}")
        else: langData[step] = (f"{cnt:02d} / {step_numbers[num]:02d} ({remain:02d})")
        total += cnt
    
    langData["total"] = f"{total:3d} 개"
    data.append(langData)

In [39]:
print("data", *data, sep='\n')
print("wrong", *wrong.items(), sep='\n')

data
{'lang': 'C 언어', 'step 01': '13 / 13', 'step 02': '07 / 07', 'step 03': '12 / 12', 'step 04': '10 / 10', 'step 05': '11 / 11', 'step 06': '08 / 08', 'step 07': '04 / 04', 'step 08': '07 / 07', 'step 09': '06 / 06', 'step 10': '08 / 08', 'step 11': '07 / 07', 'step 12': '06 / 06', 'step 13': '11 / 11', 'step 14': '08 / 08', 'step 15': '09 / 09', 'step 16': '11 / 11', 'step 17': '05 / 05', 'step 18': '05 / 05', 'step 19': '07 / 07', 'step 20': '08 / 08', 'step 21': '08 / 16 (08)', 'total': '171 개'}
{'lang': 'C++ 언어', 'step 01': '13 / 13', 'step 02': '07 / 07', 'step 03': '12 / 12', 'step 04': '10 / 10', 'step 05': '11 / 11', 'step 06': '08 / 08', 'step 07': '04 / 04', 'step 08': '07 / 07', 'step 09': '06 / 06', 'step 10': '08 / 08', 'step 11': '07 / 07', 'step 12': '06 / 06', 'step 13': '11 / 11', 'step 14': '08 / 08', 'step 15': '09 / 09', 'step 16': '11 / 11', 'step 17': '05 / 05', 'step 18': '05 / 05', 'step 19': '07 / 07', 'step 20': '08 / 08', 'step 21': '08 / 16 (08)', 'total'

In [40]:
import shutil

for lang in wrong:
    print(f"{'='*10} {lang} {'='*10}")
    for step in wrong[lang]:
        print(f"<<<<< {step} >>>>>")
        for str1 in wrong[lang][step]:
            sub = find(str1)
            print(f"{str1} -> step {sub:02d}")
            if (sub == -1): continue
            src = f"{lang}/{step}/{str1}"
            dst = f"{lang}/step {sub:02d}/{str1}"
            shutil.move(src, dst)
            wrong[lang][step].remove(str1)

In [51]:
data = []
wrong = {}
MAX = 0

for item in sorted(os.listdir('.')):
    if not (os.path.isdir(item) and item.startswith("BAEKJOON")):
        continue

    total = 0
    lang = item[9:-1]
    langData = {"lang": lang+" 언어"}
    
    for step in sorted(os.listdir(item)):
        if lang == "C": lang = "c"
        if lang == "C++": lang = "cpp"
        if lang == "Java": lang = "java"
        if lang == "Python": lang = "py"
        
        if step == "기타":
            cnt = file_count(item, step, lang, wrong)
            langData[step] = f"{cnt:3d} 개"
            total += cnt
        if not (os.path.isdir(os.path.join(item, step)) and step.startswith("step")): continue
        
        num = int(step.split()[1])
        cnt = file_count(item, step, lang, wrong, num)
        remain = step_numbers[num] - cnt
        
        if remain == 0: langData[step] = (f"{cnt:02d} / {step_numbers[num]:02d}")
        else: langData[step] = (f"{cnt:02d} / {step_numbers[num]:02d} ({remain:02d})")
        total += cnt
        
        MAX = max(MAX, num)

    langData["total"] = f"{total:3d} 개"
    data.append(langData)

# 열(column) 기준 정렬
col_data = [["언어"] + [langData["lang"] for langData in data]]
for num in range(1, MAX+1):
    step = f"step {num:02d}"
    col_data.append([step] + [langData.setdefault(step, "") for langData in data])
col_data.append([])
col_data.append(["기타"] + [langData.setdefault("기타", "") for langData in data])
col_data.append(["총합"] + [langData["total"] for langData in data])
df = pd.DataFrame(col_data)

# 1. 엑셀 파일로 저장 (pandas)
xlsx_path = "count.xlsx"
df.to_excel(xlsx_path, index=False, header=False)

# 2. 저장한 엑셀 파일 열어서 openpyxl로 열 너비 조정
wb = load_workbook(xlsx_path)
ws = wb.active

# 셀 클리어 후 B2부터 다시 쓰기
ws.delete_rows(1, ws.max_row)
ws.delete_cols(1, ws.max_column)

align_center = Alignment(horizontal='center', vertical='center')

for r, row in enumerate(col_data, start=2):       # B2부터 시작
    for c, val in enumerate(row, start=2):
        cell = ws.cell(row=r, column=c, value=val)
        cell.alignment = align_center             # ✅ 중앙 정렬 적용

# 각 열 너비 조정
for idx, col in enumerate(ws.columns):
    if (idx == 0): continue
    col_letter = col[0].column_letter
    ws.column_dimensions[col_letter].width = 15

wb.save(xlsx_path)

In [46]:
wrong = []

for item in sorted(os.listdir('.')):
    if not (os.path.isdir(item) and item.startswith("BAEKJOON")):
        continue
    
    lang = item[9:-1]
    
    for step in sorted(os.listdir(item)):
        if lang == "C": lang = "c"
        if lang == "C++": lang = "cpp"
        if lang == "Java": lang = "java"
        if lang == "Python": lang = "py"
        if not (os.path.isdir(os.path.join(item, step))): continue
        for filename in sorted(os.listdir(os.path.join(item, step))):
            if os.path.isfile(os.path.join(item, step, filename)):
                if (filename.endswith(f".{lang}")):
                    if (' ' in filename):
                        wrong.append(f"{item}/{step}/{filename}")

print(*wrong, sep="\n")
print(len(wrong))

BAEKJOON(C)/step 02/1330 B5.c
BAEKJOON(C)/step 02/14681 B5.c
BAEKJOON(C)/step 02/2480 B4.c
BAEKJOON(C)/step 02/2525 B3.c
BAEKJOON(C)/step 02/2753 B5.c
BAEKJOON(C)/step 02/2884 B3.c
BAEKJOON(C)/step 02/9498 B5.c
BAEKJOON(C)/step 03/10950 B5.c
BAEKJOON(C)/step 03/10951 B5.c
BAEKJOON(C)/step 03/10952 B5.c
BAEKJOON(C)/step 03/11021 B5.c
BAEKJOON(C)/step 03/11022 B5.c
BAEKJOON(C)/step 03/15552 B4.c
BAEKJOON(C)/step 03/2438 B5.c
BAEKJOON(C)/step 03/2439 B4.c
BAEKJOON(C)/step 03/25304 B4.c
BAEKJOON(C)/step 03/25314 B5.c
BAEKJOON(C)/step 03/2739 B5.c
BAEKJOON(C)/step 03/8393 B5.c
BAEKJOON(C)/step 04/10807 B5.c
BAEKJOON(C)/step 04/10810 B3.c
BAEKJOON(C)/step 04/10811 B2.c
BAEKJOON(C)/step 04/10813 B2.c
BAEKJOON(C)/step 04/10818 B3.c
BAEKJOON(C)/step 04/10871 B5.c
BAEKJOON(C)/step 04/1546 B1.c
BAEKJOON(C)/step 04/2562 B3.c
BAEKJOON(C)/step 04/3052 B2.c
BAEKJOON(C)/step 04/5597 B5.c
BAEKJOON(C)/step 05/10809 B2.c
BAEKJOON(C)/step 05/1152 B2.c
BAEKJOON(C)/step 05/11654 B5.c
BAEKJOON(C)/step 05/117

In [48]:
import shutil

for file in wrong:
    s = file.split("/")
    folder = s[0]
    step = s[1]
    filename = s[2]
    new_filename = filename.replace(' ', '_')
    path = os.path.join(folder, step, filename)
    new_path = os.path.join(folder, step, new_filename)
    os.rename(path, new_path)
    print(new_path)

BAEKJOON(C)/step 02/1330_B5.c
BAEKJOON(C)/step 02/14681_B5.c
BAEKJOON(C)/step 02/2480_B4.c
BAEKJOON(C)/step 02/2525_B3.c
BAEKJOON(C)/step 02/2753_B5.c
BAEKJOON(C)/step 02/2884_B3.c
BAEKJOON(C)/step 02/9498_B5.c
BAEKJOON(C)/step 03/10950_B5.c
BAEKJOON(C)/step 03/10951_B5.c
BAEKJOON(C)/step 03/10952_B5.c
BAEKJOON(C)/step 03/11021_B5.c
BAEKJOON(C)/step 03/11022_B5.c
BAEKJOON(C)/step 03/15552_B4.c
BAEKJOON(C)/step 03/2438_B5.c
BAEKJOON(C)/step 03/2439_B4.c
BAEKJOON(C)/step 03/25304_B4.c
BAEKJOON(C)/step 03/25314_B5.c
BAEKJOON(C)/step 03/2739_B5.c
BAEKJOON(C)/step 03/8393_B5.c
BAEKJOON(C)/step 04/10807_B5.c
BAEKJOON(C)/step 04/10810_B3.c
BAEKJOON(C)/step 04/10811_B2.c
BAEKJOON(C)/step 04/10813_B2.c
BAEKJOON(C)/step 04/10818_B3.c
BAEKJOON(C)/step 04/10871_B5.c
BAEKJOON(C)/step 04/1546_B1.c
BAEKJOON(C)/step 04/2562_B3.c
BAEKJOON(C)/step 04/3052_B2.c
BAEKJOON(C)/step 04/5597_B5.c
BAEKJOON(C)/step 05/10809_B2.c
BAEKJOON(C)/step 05/1152_B2.c
BAEKJOON(C)/step 05/11654_B5.c
BAEKJOON(C)/step 05/117