In [1]:
import os
import re
import shutil
import requests
import pandas as pd
from bs4 import BeautifulSoup
from openpyxl import load_workbook
from openpyxl.styles import Alignment

In [2]:
url = "https://www.acmicpc.net/step"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
}

response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")

In [3]:
class Step:
  def __init__(self, num=0, title="", description="", total=0, problems=None, link=""):
    self.num = num
    self.title = title
    self.description = description
    self.total = total
    self.problems = problems if problems is not None else []
    self.link = link
  def __str__(self):
    problems_str = "\n".join(map(str, self.problems))
    return f"[{self.num}] {self.title} ({self.total}문제)\n{self.description}\n{problems_str}\n{self.link}"
  
class Problem:
  def __init__(self, num=0, title="", link=""):
    self.num = num
    self.title = title
    self.link = link
  def __str__(self):
    return f"[{self.num}] {self.title} - {self.link}"

In [7]:
trs = soup.find("tbody").find_all("tr")

steps = []
for tr in trs:
  tds = tr.find_all("td")
  if len(tds) >= 5:
    num = int(tds[0].text)
    title = tds[1].text
    description = tds[2].text
    total = int(tds[4].text)
    link = "https://www.acmicpc.net" + tds[1].find("a")["href"]
    step_obj = Step(num, title, description, total, None, link)
    steps.append(step_obj)

In [8]:
for step in steps:
  step_response = requests.get(step.link, headers=headers)
  step_soup = BeautifulSoup(step_response.text, "html.parser")
  step_trs = step_soup.find("tbody").find_all("tr")

  for step_tr in step_trs:
    step_tds = step_tr.find_all("td")
    if len(step_tds) >= 5:
      num = int(step_tds[1].text)
      title = step_tds[2].text
      link = "https://www.acmicpc.net" + step_tds[2].find("a")["href"]
      problem_obj = Problem(num, title, link)
      step.problems.append(problem_obj)

In [9]:
max_step = max([step.num for step in steps])
step_numbers = [0]*(max_step+1)
step_problems = [[]]*(max_step+1)
for step in steps:
  step_numbers[step.num] = step.total
  step_problems[step.num] = [problem.num for problem in step.problems]
print(step_numbers)
print(step_problems)

[0, 13, 7, 12, 10, 11, 8, 4, 7, 6, 8, 7, 6, 11, 8, 9, 11, 5, 5, 7, 8, 16, 6, 5, 9, 7, 6, 6, 5, 16, 3, 7, 5, 9, 7, 4, 6, 10, 4, 10, 8, 8, 5, 8, 8, 6, 12, 6, 6, 8, 4, 11, 5, 5, 11, 7, 8]
[[], [2557, 1000, 1001, 10998, 1008, 10869, 10926, 18108, 10430, 2588, 11382, 10171, 10172], [1330, 9498, 2753, 14681, 2884, 2525, 2480], [2739, 10950, 8393, 25304, 25314, 15552, 11021, 11022, 2438, 2439, 10952, 10951], [10807, 10871, 10818, 2562, 10810, 10813, 5597, 3052, 10811, 1546], [27866, 2743, 9086, 11654, 11720, 10809, 2675, 1152, 2908, 5622, 11718], [25083, 3003, 2444, 10988, 1157, 2941, 1316, 25206], [2738, 2566, 10798, 2563], [2745, 11005, 2720, 2903, 2292, 1193, 2869], [5086, 2501, 9506, 1978, 2581, 11653], [27323, 1085, 3009, 15894, 9063, 10101, 5073, 14215], [24262, 24263, 24264, 24265, 24266, 24267, 24313], [2798, 2231, 19532, 1018, 1436, 2839], [2750, 2587, 25305, 2751, 10989, 1427, 11650, 11651, 1181, 10814, 18870], [10815, 14425, 7785, 1620, 10816, 1764, 1269, 11478], [1934, 13241, 1735

In [43]:
lang_extension_map = {"C": "c", "C++": "cpp", "Java": "java", "Python": "py"}

def extract(str1):
    match = re.search(r'\d+', str1)
    if match: return int(match.group())
    else: return -1

def find(str1):
    for idx, problems in enumerate(step_problems):
        if (extract(str1) in problems): return idx
    return -1

def file_count(item, step, extension, wrong, num = 0):
    cnt = 0
    for filename in sorted(os.listdir(os.path.join(item, step))):
        if os.path.isfile(os.path.join(item, step, filename)):
            if (filename.endswith(f".{extension}")):
                if (num == 0 or extract(filename) in step_problems[num]):
                    cnt += 1
                else:
                    wrong.setdefault(item, {}).setdefault(step, []).append(filename)
    return cnt

def makeMD(col):
    md_lines = []

    # 첫 번째 줄 (헤더)
    header = "| " + " | ".join(col[0]) + " |"
    md_lines.append(header)

    # 두 번째 줄 (구분선)
    separator = "| " + " | ".join([':---:'] * len(col[0])) + " |"
    md_lines.append(separator)

    # 나머지 데이터 줄
    for row in col[1:]:
        line = "| " + " | ".join(row) + " |"
        md_lines.append(line)

    # 최종 결과
    markdown_table = "\n".join(md_lines)

    # 출력해보기
    print(markdown_table)

In [91]:
wrong = {}

for item in sorted(os.listdir('.')):
    if not (os.path.isdir(item) and item.startswith("BAEKJOON")):
        continue
    
    for step in sorted(os.listdir(item)):
        if not (os.path.isdir(os.path.join(item, step))) or step == ".vscode": continue
        extension = lang_extension_map[item[9:-1]]
        if step == "기타": file_count(item, step, extension, wrong)
        else: file_count(item, step, extension, wrong, int(step.split()[1]))
        
print("wrong", *wrong.items(), sep='\n')
if not wrong: print("틀린게 없음")
for lang in wrong:
    print(f"{'='*10} {lang} {'='*10}")
    for step in wrong[lang]:
        print(f"<<<<< {step} >>>>>")
        for str1 in wrong[lang][step]:
            sub = find(str1)
            print(f"{str1} -> step {sub:02d}")
            if (sub == -1): continue
            src = f"{lang}/{step}/{str1}"
            dst = f"{lang}/step {sub:02d}/{str1}"
            shutil.move(src, dst)
            wrong[lang][step].remove(str1)

wrong
틀린게 없음


In [101]:
data = []
wrong = {}
MAX = 0

for item in sorted(os.listdir('.')):
    if not (os.path.isdir(item) and item.startswith("BAEKJOON")):
        continue

    total = 0
    lang = item[9:-1]
    langData = {"Language": lang}
    
    for step in sorted(os.listdir(item)):
        if not (os.path.isdir(os.path.join(item, step))) or step == ".vscode": continue
        extension = lang_extension_map[lang]
        
        if step == "기타":
            cnt = file_count(item, step, extension, wrong)
            langData["etc"] = f"{cnt:3d}"
            total += cnt
        else:
            num = int(step.split()[1])
            MAX = max(MAX, num)
            cnt = file_count(item, step, extension, wrong, num)
            remain = step_numbers[num] - cnt
            
            if remain == 0: langData[step] = f"{cnt:02d} / {step_numbers[num]:02d}"
            else: langData[step] = f"{cnt:02d} / {step_numbers[num]:02d} ({remain:02d})"
            total += cnt

    langData["total"] = f"{total:3d}"
    data.append(langData)

# 열(column) 기준 정렬
col_data = [["Language"] + [langData["Language"] for langData in data]]
for num in range(1, MAX+1):
    step = f"step {num:02d}"
    col_data.append([step] + [langData.setdefault(step, "") for langData in data])
col_data.append(["etc"] + [langData.setdefault("etc", "") for langData in data])
col_data.append(["total"] + [langData["total"] for langData in data])

In [102]:
level = []
level_map = {"B": 0, "S": 1, "G": 2, "P": 3, "D": 4, "R": 5}
levels = ["B", "S", "G", "P", "D", "R"]
MAX = 0

for item in sorted(os.listdir('.')):
    if not (os.path.isdir(item) and item.startswith("BAEKJOON")):
        continue
    
    total = 0
    lang = item[9:-1]
    langData = {"Language": lang}
    
    for step in sorted(os.listdir(item)):
        if not (os.path.isdir(os.path.join(item, step))): continue
        extension = lang_extension_map[lang]
        
        for filename in sorted(os.listdir(os.path.join(item, step))):
            if os.path.isfile(os.path.join(item, step, filename)):
                if (filename.endswith(f".{extension}")):
                    score = filename.split('.')[0].split('_')[-1]
                    langData.setdefault(score[0], [0]*6)[int(score[1])] += 1
                    total += 1
                    MAX = max(MAX, level_map[score[0]])
    
    langData["total"] = f"{total:3d}"
    level.append(langData)

# 열(column) 기준 정렬
col_level = [["Language"] + [langData["Language"] for langData in level]]
for T in range(MAX+1):
    T = levels[T]
    for S in range(5, 0, -1):
        col_level.append([T+str(S)] + [str(langData.setdefault(T, [0]*6)[S]) for langData in level])
    col_level.append([])
    col_level.append(["All " + T] + [str(sum(langData.setdefault(T, [0]*6))) for langData in level])
    col_level.append([])
col_level.append(["total"] + [langData["total"] for langData in level])

In [106]:
csv_path = "count.csv"
with open(csv_path,"w") as f:
    for row in col_data:
        f.write(",".join(f"{s:>15}" for s in row))
        f.write('\n')
total_col = col_data + [[], []] + col_level
makeMD(total_col)

| Language | C | C++ | Java | Python |
| :---: | :---: | :---: | :---: | :---: |
| step 01 | 13 / 13 | 13 / 13 | 13 / 13 | 13 / 13 |
| step 02 | 07 / 07 | 07 / 07 | 07 / 07 | 07 / 07 |
| step 03 | 12 / 12 | 12 / 12 | 12 / 12 | 12 / 12 |
| step 04 | 10 / 10 | 10 / 10 | 10 / 10 | 10 / 10 |
| step 05 | 11 / 11 | 11 / 11 | 11 / 11 | 11 / 11 |
| step 06 | 08 / 08 | 08 / 08 | 08 / 08 | 08 / 08 |
| step 07 | 04 / 04 | 04 / 04 | 04 / 04 | 04 / 04 |
| step 08 | 07 / 07 | 07 / 07 | 07 / 07 | 07 / 07 |
| step 09 | 06 / 06 | 06 / 06 | 06 / 06 | 06 / 06 |
| step 10 | 08 / 08 | 08 / 08 | 08 / 08 | 08 / 08 |
| step 11 | 07 / 07 | 07 / 07 | 07 / 07 | 07 / 07 |
| step 12 | 06 / 06 | 06 / 06 | 06 / 06 | 06 / 06 |
| step 13 | 11 / 11 | 11 / 11 | 11 / 11 | 11 / 11 |
| step 14 | 08 / 08 | 08 / 08 | 08 / 08 | 08 / 08 |
| step 15 | 09 / 09 | 09 / 09 | 09 / 09 | 09 / 09 |
| step 16 | 11 / 11 | 11 / 11 | 11 / 11 | 11 / 11 |
| step 17 | 05 / 05 | 05 / 05 | 05 / 05 | 05 / 05 |
| step 18 | 05 / 05 | 05 / 05 | 05 