In [4]:
import os
import re
import shutil
import requests
import pandas as pd
from bs4 import BeautifulSoup
from openpyxl import load_workbook
from openpyxl.styles import Alignment

In [5]:
url = "https://www.acmicpc.net/step"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
}

response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")

In [6]:
class Step:
  def __init__(self, num=0, title="", description="", total=0, problems=None, link=""):
    self.num = num
    self.title = title
    self.description = description
    self.total = total
    self.problems = problems if problems is not None else []
    self.link = link
  def __str__(self):
    problems_str = "\n".join(map(str, self.problems))
    return f"[{self.num}] {self.title} ({self.total}문제)\n{self.description}\n{problems_str}\n{self.link}"
  
class Problem:
  def __init__(self, num=0, title="", link=""):
    self.num = num
    self.title = title
    self.link = link
  def __str__(self):
    return f"[{self.num}] {self.title} - {self.link}"

In [7]:
trs = soup.find("tbody").find_all("tr")

steps = []
for tr in trs:
  tds = tr.find_all("td")
  if len(tds) >= 5:
    num = int(tds[0].text)
    title = tds[1].text
    description = tds[2].text
    total = int(tds[4].text)
    link = "https://www.acmicpc.net" + tds[1].find("a")["href"]
    step_obj = Step(num, title, description, total, None, link)
    steps.append(step_obj)

In [8]:
for step in steps:
  step_response = requests.get(step.link, headers=headers)
  step_soup = BeautifulSoup(step_response.text, "html.parser")
  step_trs = step_soup.find("tbody").find_all("tr")

  for step_tr in step_trs:
    step_tds = step_tr.find_all("td")
    if len(step_tds) >= 5:
      num = int(step_tds[1].text)
      title = step_tds[2].text
      link = "https://www.acmicpc.net" + step_tds[2].find("a")["href"]
      problem_obj = Problem(num, title, link)
      step.problems.append(problem_obj)

In [9]:
max_step = max([step.num for step in steps])
step_numbers = [0]*(max_step+1)
step_problems = [[]]*(max_step+1)
for step in steps:
  step_numbers[step.num] = step.total
  step_problems[step.num] = [problem.num for problem in step.problems]
print(step_numbers)
print(step_problems)

[0, 13, 7, 12, 10, 11, 8, 4, 7, 6, 8, 7, 6, 11, 8, 9, 11, 5, 5, 7, 8, 16, 6, 5, 9, 7, 6, 6, 5, 16, 3, 7, 5, 9, 7, 4, 6, 10, 4, 10, 8, 8, 5, 8, 8, 6, 12, 6, 6, 8, 4, 11, 5, 5, 11, 7, 8]
[[], [2557, 1000, 1001, 10998, 1008, 10869, 10926, 18108, 10430, 2588, 11382, 10171, 10172], [1330, 9498, 2753, 14681, 2884, 2525, 2480], [2739, 10950, 8393, 25304, 25314, 15552, 11021, 11022, 2438, 2439, 10952, 10951], [10807, 10871, 10818, 2562, 10810, 10813, 5597, 3052, 10811, 1546], [27866, 2743, 9086, 11654, 11720, 10809, 2675, 1152, 2908, 5622, 11718], [25083, 3003, 2444, 10988, 1157, 2941, 1316, 25206], [2738, 2566, 10798, 2563], [2745, 11005, 2720, 2903, 2292, 1193, 2869], [5086, 2501, 9506, 1978, 2581, 11653], [27323, 1085, 3009, 15894, 9063, 10101, 5073, 14215], [24262, 24263, 24264, 24265, 24266, 24267, 24313], [2798, 2231, 19532, 1018, 1436, 2839], [2750, 2587, 25305, 2751, 10989, 1427, 11650, 11651, 1181, 10814, 18870], [10815, 14425, 7785, 1620, 10816, 1764, 1269, 11478], [1934, 13241, 1735

In [10]:
lang_extension_map = {"C": "c", "C++": "cpp", "Java": "java", "Python": "py"}

def extract(str1):
    match = re.search(r'\d+', str1)
    if match: return int(match.group())
    else: return -1

def find(str1):
    for idx, problems in enumerate(step_problems):
        if (extract(str1) in problems): return idx
    return -1

def file_count(item, step, extension, wrong, num = 0):
    cnt = 0
    for filename in sorted(os.listdir(os.path.join(item, step))):
        if os.path.isfile(os.path.join(item, step, filename)):
            if (filename.endswith(f".{extension}")):
                if (num == 0 or extract(filename) in step_problems[num]):
                    cnt += 1
                else:
                    wrong.setdefault(item, {}).setdefault(step, []).append(filename)
    return cnt

def makeMD(col):
    md_lines = []

    # 첫 번째 줄 (헤더)
    header = "| " + " | ".join(col[0]) + " |"
    md_lines.append(header)

    # 두 번째 줄 (구분선)
    separator = "| " + " | ".join([':---:'] * len(col[0])) + " |"
    md_lines.append(separator)

    # 나머지 데이터 줄
    for row in col[1:]:
        line = "| " + " | ".join(row) + " |"
        md_lines.append(line)

    # 최종 결과
    markdown_table = "\n".join(md_lines)

    # 출력해보기
    print(markdown_table)

In [11]:
wrong = {}

for item in sorted(os.listdir('.')):
    if not (os.path.isdir(item) and item.startswith("BAEKJOON")):
        continue
    
    for step in sorted(os.listdir(item)):
        if not (os.path.isdir(os.path.join(item, step))) or step == ".vscode": continue
        extension = lang_extension_map[item[9:-1]]
        if step == "기타": file_count(item, step, extension, wrong)
        else: file_count(item, step, extension, wrong, int(step.split()[1]))
        
print("wrong", *wrong.items(), sep='\n')
if not wrong: print("틀린게 없음")
for lang in wrong:
    print(f"{'='*10} {lang} {'='*10}")
    for step in wrong[lang]:
        print(f"<<<<< {step} >>>>>")
        for str1 in wrong[lang][step]:
            sub = find(str1)
            print(f"{str1} -> step {sub:02d}")
            if (sub == -1): continue
            src = f"{lang}/{step}/{str1}"
            dst = f"{lang}/step {sub:02d}/{str1}"
            shutil.move(src, dst)
            wrong[lang][step].remove(str1)

wrong
틀린게 없음


In [12]:
data = []
wrong = {}
MAX = 0

for item in sorted(os.listdir('.')):
    if not (os.path.isdir(item) and item.startswith("BAEKJOON")):
        continue

    total = 0
    lang = item[9:-1]
    langData = {"Language": lang}
    
    for step in sorted(os.listdir(item)):
        if not (os.path.isdir(os.path.join(item, step))) or step == ".vscode": continue
        extension = lang_extension_map[lang]
        
        if step == "기타":
            cnt = file_count(item, step, extension, wrong)
            langData["etc"] = f"{cnt:3d}"
            total += cnt
        else:
            num = int(step.split()[1])
            MAX = max(MAX, num)
            cnt = file_count(item, step, extension, wrong, num)
            remain = step_numbers[num] - cnt
            
            if remain == 0: langData[step] = f"{cnt:02d} / {step_numbers[num]:02d}"
            else: langData[step] = f"{cnt:02d} / {step_numbers[num]:02d} ({remain:02d})"
            total += cnt

    langData["total"] = f"{total:3d}"
    data.append(langData)

# 열(column) 기준 정렬
col_data = [["Language"] + [langData["Language"] for langData in data]]
for num in range(1, MAX+1):
    step = f"step {num:02d}"
    col_data.append([step] + [langData.setdefault(step, "") for langData in data])
col_data.append(["etc"] + [langData.setdefault("etc", "") for langData in data])
col_data.append(["total"] + [langData["total"] for langData in data])

In [13]:
level = []
level_map = {"B": 0, "S": 1, "G": 2, "P": 3, "D": 4, "R": 5}
levels = ["B", "S", "G", "P", "D", "R"]
MAX = 0

for item in sorted(os.listdir('.')):
    if not (os.path.isdir(item) and item.startswith("BAEKJOON")):
        continue
    
    total = 0
    lang = item[9:-1]
    langData = {"Language": lang}
    
    for step in sorted(os.listdir(item)):
        if not (os.path.isdir(os.path.join(item, step))): continue
        extension = lang_extension_map[lang]
        
        for filename in sorted(os.listdir(os.path.join(item, step))):
            if os.path.isfile(os.path.join(item, step, filename)):
                if (filename.endswith(f".{extension}")):
                    score = filename.split('.')[0].split('_')[-1]
                    langData.setdefault(score[0], [0]*6)[int(score[1])] += 1
                    total += 1
                    MAX = max(MAX, level_map[score[0]])
    
    langData["total"] = f"{total:3d}"
    level.append(langData)

# 열(column) 기준 정렬
col_level = [["Language"] + [langData["Language"] for langData in level]]
for T in range(MAX+1):
    T = levels[T]
    for S in range(5, 0, -1):
        col_level.append([T+str(S)] + [str(langData.setdefault(T, [0]*6)[S]) for langData in level])
    col_level.append([])
    col_level.append(["All " + T] + [str(sum(langData.setdefault(T, [0]*6))) for langData in level])
    col_level.append([])
col_level.append(["total"] + [langData["total"] for langData in level])

In [14]:
csv_path = "count.csv"
with open(csv_path,"w") as f:
    for row in col_data:
        f.write(",".join(f"{s:>15}" for s in row))
        f.write('\n')
total_col = col_data + [[], []] + col_level
makeMD(total_col)

| Language | C | C++ | Java | Python |
| :---: | :---: | :---: | :---: | :---: |
| step 01 | 13 / 13 | 13 / 13 | 13 / 13 | 13 / 13 |
| step 02 | 07 / 07 | 07 / 07 | 07 / 07 | 07 / 07 |
| step 03 | 12 / 12 | 12 / 12 | 12 / 12 | 12 / 12 |
| step 04 | 10 / 10 | 10 / 10 | 10 / 10 | 10 / 10 |
| step 05 | 11 / 11 | 11 / 11 | 11 / 11 | 11 / 11 |
| step 06 | 08 / 08 | 08 / 08 | 08 / 08 | 08 / 08 |
| step 07 | 04 / 04 | 04 / 04 | 04 / 04 | 04 / 04 |
| step 08 | 07 / 07 | 07 / 07 | 07 / 07 | 07 / 07 |
| step 09 | 06 / 06 | 06 / 06 | 06 / 06 | 06 / 06 |
| step 10 | 08 / 08 | 08 / 08 | 08 / 08 | 08 / 08 |
| step 11 | 07 / 07 | 07 / 07 | 07 / 07 | 07 / 07 |
| step 12 | 06 / 06 | 06 / 06 | 06 / 06 | 06 / 06 |
| step 13 | 11 / 11 | 11 / 11 | 11 / 11 | 11 / 11 |
| step 14 | 08 / 08 | 08 / 08 | 08 / 08 | 08 / 08 |
| step 15 | 09 / 09 | 09 / 09 | 09 / 09 | 09 / 09 |
| step 16 | 11 / 11 | 11 / 11 | 11 / 11 | 11 / 11 |
| step 17 | 05 / 05 | 05 / 05 | 05 / 05 | 05 / 05 |
| step 18 | 05 / 05 | 05 / 05 | 05 

In [15]:
def check(lang:str, total):
    total = list(map(int, total.split(" ")))
    solves = []
    folder = {}
    path = f"./BAEKJOON({lang})"
    extension = lang_extension_map[lang]
    for step in sorted(os.listdir(path)):
        if not(os.path.isdir(os.path.join(path, step))): continue
        for filename in os.listdir(os.path.join(path, step)):
            if os.path.isfile(os.path.join(path, step, filename)):
                if (filename.endswith(f".{extension}")):
                    num = extract(filename)
                    solves.append(num)
                    folder.setdefault(num, []).append(step)
    for i in folder:
        if (len(folder[i]) != 1):
            print(i, "중복", folder[i])
    
    if (len(total) - len(set(total)) != 0):
        print("에러: 주어진 문제가 중복이 된 문제가 존재합니다.")
        return {"t-s": set(), "s-t": set(), "s": set(), "t": set()}
    
    dic = {"t-s": sorted(list(set(total) - set(solves))), "s-t": sorted(list(set(solves) - set(total))), "s": sorted(list(set(solves))), "t": sorted(list(set(total)))}
    
    print("내가 푼 총 문제 수 :", len(dic["t"]))
    print("현재 폴더에 있는 총 문제 수 :", len(dic["s"]))
    print("제출한 문제 중 없는 문제 :", len(dic["t-s"]), dic["t-s"])
    print("미완성 문제 :", len(dic["s-t"]), dic["s-t"])
    return dic

In [16]:
python_total = "1000 1001 1002 1003 1005 1008 1010 1012 1016 1018 1022 1026 1037 1069 1085 1086 1110 1149 1152 1157 1158 1167 1181 1193 1197 1202 1259 1260 1269 1271 1300 1305 1311 1316 1330 1406 1427 1436 1450 1463 1504 1520 1541 1546 1557 1620 1629 1644 1647 1654 1676 1697 1707 1717 1725 1735 1753 1764 1766 1774 1780 1786 1806 1809 1874 1904 1912 1920 1927 1929 1931 1932 1934 1949 1956 1966 1967 1976 1978 1990 1991 1992 2042 2075 2098 2108 2110 2156 2162 2163 2164 2166 2178 2206 2213 2231 2239 2252 2259 2263 2292 2293 2338 2346 2357 2393 2420 2438 2439 2440 2444 2447 2467 2470 2475 2480 2482 2485 2501 2525 2530 2531 2533 2557 2558 2559 2562 2563 2565 2566 2577 2579 2580 2581 2587 2588 2606 2609 2618 2629 2630 2667 2675 2696 2720 2738 2739 2740 2741 2742 2743 2744 2745 2750 2751 2752 2753 2754 2775 2798 2805 2839 2845 2851 2869 2884 2903 2908 2920 2941 3003 3009 3015 3046 3052 3273 3665 3687 3733 4101 4134 4153 4195 4386 4470 4779 4803 4913 4948 4949 4999 5073 5086 5337 5338 5339 5341 5522 5532 5543 5554 5596 5597 5622 5639 5670 5717 6497 6549 6840 7287 7561 7562 7568 7569 7576 7579 7662 7785 7869 7891 8370 8393 8437 8545 8871 8958 9012 9019 9063 9086 9095 9184 9251 9252 9316 9370 9372 9375 9461 9498 9506 9653 9654 9663 9935 10039 10101 10156 10170 10171 10172 10189 10250 10430 10699 10718 10757 10768 10773 10797 10798 10807 10808 10809 10810 10811 10813 10814 10815 10816 10818 10828 10830 10844 10845 10866 10869 10870 10871 10872 10926 10942 10950 10951 10952 10986 10988 10989 10998 11005 11021 11022 11025 11047 11049 11050 11051 11053 11054 11066 11279 11286 11382 11399 11401 11404 11444 11478 11505 11650 11651 11653 11654 11657 11659 11660 11718 11720 11723 11724 11725 11726 11727 11728 11729 11758 11779 11780 11866 11942 11945 11948 12015 12789 12852 12865 13241 13277 13294 13305 13549 13866 13909 13913 14002 14003 14215 14425 14581 14645 14681 14725 14888 14889 14928 15000 15232 15439 15552 15596 15633 15649 15650 15651 15652 15654 15657 15663 15666 15680 15681 15727 15733 15740 15829 15873 15894 15962 15963 15964 16139 16170 16199 16204 16394 16401 16430 16486 16928 16953 17103 17219 17256 17298 17299 17386 17387 17404 17472 17626 18108 18110 18258 18352 18409 18870 19532 19944 20040 20149 20492 20551 20920 20944 23292 23293 23301 24060 24082 24262 24263 24264 24265 24266 24267 24313 24416 24444 24445 24479 24480 24511 24723 24883 24900 25083 25184 25192 25206 25304 25305 25308 25311 25314 25372 25501 25640 25682 25955 26069 26711 27172 27294 27323 27433 27434 27866 27889 27959 28065 28113 28116 28125 28126 28127 28129 28135 28136 28137 28235 28278 28279 28292 28444 28691 28701 28702 29699 29731 29751 30007 30030 30087 30402 30618 30676 30802 30803 30804 31403 31922 31923 31924 31925 31926 31929 32260 32384 32775 32929 32978 32979 32980 32981 32982 32983 32986 33515"
python = check('Python', python_total)

14425 중복 ['step 14', 'step 41']
10816 중복 ['step 14', 'step 25']
2252 중복 ['step 30', '기타']
내가 푼 총 문제 수 : 509
현재 폴더에 있는 총 문제 수 : 336
제출한 문제 중 없는 문제 : 173 [1016, 1022, 1026, 1110, 1158, 1259, 1271, 1406, 1557, 1647, 1676, 1874, 1966, 1990, 2042, 2163, 2239, 2259, 2338, 2357, 2393, 2420, 2440, 2467, 2475, 2530, 2558, 2577, 2609, 2741, 2742, 2744, 2752, 2754, 2775, 2845, 2851, 2920, 3046, 4101, 4153, 4470, 4913, 4999, 5337, 5338, 5339, 5522, 5532, 5543, 5554, 5596, 5717, 7287, 7561, 7568, 7662, 8370, 8958, 9095, 9316, 9653, 9654, 10039, 10156, 10170, 10250, 10699, 10718, 10757, 10768, 10797, 10808, 10828, 10845, 10866, 10942, 11025, 11051, 11505, 11726, 11942, 11945, 11948, 13277, 13294, 13866, 14581, 14645, 14928, 15596, 15633, 15654, 15657, 15663, 15666, 15680, 15727, 15733, 15740, 15829, 15873, 15962, 15963, 15964, 16170, 16199, 16204, 16394, 16401, 16430, 16486, 16953, 17256, 18110, 18352, 19944, 20492, 20551, 23292, 23293, 23301, 24082, 24883, 24900, 25311, 25372, 25640, 25955, 26711, 

In [17]:
java_total = "1000 1001 1003 1008 1010 1018 1022 1037 1085 1149 1152 1157 1181 1193 1269 1300 1316 1330 1427 1436 1463 1541 1546 1620 1629 1654 1735 1764 1780 1904 1912 1920 1927 1929 1931 1932 1934 1978 1992 2075 2108 2110 2156 2164 2231 2292 2346 2438 2439 2444 2447 2480 2485 2501 2525 2557 2559 2562 2563 2565 2566 2579 2580 2581 2587 2588 2630 2675 2720 2738 2739 2740 2743 2745 2750 2751 2753 2798 2805 2839 2869 2884 2903 2908 2941 3003 3009 3052 4134 4779 4948 4949 5073 5086 5597 5622 6549 7785 8393 9012 9063 9086 9184 9251 9461 9498 9506 9663 10101 10171 10172 10430 10773 10798 10807 10809 10810 10811 10813 10814 10815 10816 10818 10830 10844 10869 10870 10871 10872 10926 10950 10951 10952 10986 10988 10989 10998 11005 11021 11022 11047 11050 11053 11054 11279 11286 11382 11399 11401 11444 11478 11650 11651 11653 11654 11659 11660 11718 11720 11723 11726 11729 11866 12015 12095 12789 12865 13241 13305 13909 14215 14425 14681 14888 14889 15439 15552 15649 15650 15651 15652 15894 16139 17103 17219 18108 18258 18870 19532 20920 24060 24262 24263 24264 24265 24266 24267 24313 24416 24511 24723 25083 25192 25206 25304 25305 25314 25501 25682 26069 27323 27433 27866 28278 28279"
java = check('Java', java_total)

10816 중복 ['step 14', 'step 25']
내가 푼 총 문제 수 : 215
현재 폴더에 있는 총 문제 수 : 223
제출한 문제 중 없는 문제 : 0 []
미완성 문제 : 8 [1202, 1520, 2293, 2629, 2696, 7579, 11049, 11066]


In [18]:
c_total = "1000 1001 1008 1010 1018 1037 1085 1149 1152 1157 1181 1193 1269 1316 1330 1427 1436 1546 1620 1735 1764 1904 1912 1929 1932 1934 1978 2108 2164 2231 2292 2346 2438 2439 2444 2447 2480 2485 2501 2525 2557 2562 2563 2566 2579 2580 2581 2587 2588 2675 2720 2738 2739 2743 2745 2750 2751 2753 2798 2839 2869 2884 2903 2908 2941 3003 3009 3052 4134 4779 4948 4949 5073 5086 5597 5622 7785 8393 9012 9063 9086 9184 9461 9498 9506 9663 10101 10171 10172 10430 10773 10798 10807 10809 10810 10811 10813 10814 10815 10816 10818 10869 10870 10871 10872 10926 10950 10951 10952 10988 10989 10998 11005 11021 11022 11050 11382 11478 11650 11651 11653 11654 11718 11720 11729 11866 12789 13241 13909 14215 14425 14681 14888 14889 15439 15552 15649 15650 15651 15652 15894 17103 18108 18258 18870 19532 20920 24060 24262 24263 24264 24265 24266 24267 24313 24416 24511 24723 25083 25192 25206 25304 25305 25314 25501 26069 27323 27433 27866 28278 28279"
c = check('C', c_total)

내가 푼 총 문제 수 : 171
현재 폴더에 있는 총 문제 수 : 171
제출한 문제 중 없는 문제 : 0 []
미완성 문제 : 0 []


In [19]:
cpp_total = "1000 1001 1008 1010 1018 1037 1085 1149 1152 1157 1181 1193 1269 1316 1330 1427 1436 1546 1620 1735 1764 1904 1912 1929 1932 1934 1978 2108 2164 2231 2292 2346 2438 2439 2444 2447 2480 2485 2501 2525 2557 2562 2563 2566 2579 2580 2581 2587 2588 2675 2720 2738 2739 2743 2745 2750 2751 2753 2798 2839 2869 2884 2903 2908 2941 3003 3009 3052 4134 4779 4948 4949 5073 5086 5597 5622 7785 8393 9012 9063 9086 9184 9461 9498 9506 9663 10101 10171 10172 10430 10773 10798 10807 10809 10810 10811 10813 10814 10815 10816 10818 10869 10870 10871 10872 10926 10950 10951 10952 10988 10989 10998 11005 11021 11022 11050 11382 11478 11650 11651 11653 11654 11718 11720 11729 11866 12789 13241 13909 14215 14425 14681 14888 14889 15439 15552 15649 15650 15651 15652 15894 17103 18108 18258 18870 19532 20920 24060 24262 24263 24264 24265 24266 24267 24313 24416 24511 24723 25083 25192 25206 25304 25305 25314 25501 26069 27323 27433 27866 28278 28279"
cpp = check('C++', cpp_total)

내가 푼 총 문제 수 : 171
현재 폴더에 있는 총 문제 수 : 171
제출한 문제 중 없는 문제 : 0 []
미완성 문제 : 0 []
