
# **Unconstructed Preprocessor**


In [None]:
from __future__ import annotations
import os, json, re
from datetime import datetime
from typing import Any, Dict, List, Optional, Tuple, Iterable
from collections import Counter
from pathlib import Path
import pandas as pd

class UnconstructedPreprocessor:
    # -------------------- Helpers --------------------
    DATE_FMT = "%Y-%m-%d"

    @staticmethod
    def _parse_date(s: Optional[str]) -> Optional[datetime]:
        if s in (None, "", "null"):
            return None
        try:
            return datetime.strptime(str(s)[:10], UnconstructedPreprocessor.DATE_FMT)
        except Exception:
            return None

    @staticmethod
    def _days_between(d1: Optional[datetime], d2: Optional[datetime]) -> Optional[int]:
        if d1 is None or d2 is None:
            return None
        return (d1 - d2).days

    @staticmethod
    def _estimate_typos_korean(text: str) -> int:
        if not text:
            return 0
        dbl_spaces = len(re.findall(r" {2,}", text))
        repeat_punct = len(re.findall(r"([\.?!,~\-])\1{2,}", text))
        latin_tokens = re.findall(r"\b[A-Za-z]{2,}\b", text)
        return dbl_spaces + repeat_punct + len(latin_tokens)

    @staticmethod
    def _file_exists(p: Path) -> bool:
        try:
            return p.exists() and p.is_file()
        except Exception:
            return False

    @staticmethod
    def _resolve_any(base_dir: Path, candidates: Iterable[str]) -> Path:
        """Try exact, json/, and recursive search under base_dir."""
        for c in candidates:
            p = Path(c)
            if p.is_absolute() and UnconstructedPreprocessor._file_exists(p):
                return p
            if UnconstructedPreprocessor._file_exists(base_dir / c):
                return (base_dir / c)
            if UnconstructedPreprocessor._file_exists(base_dir / "json" / c):
                return (base_dir / "json" / c)
        # recursive: try last candidate's basename
        for c in candidates:
            name = Path(c).name
            matches = list(base_dir.rglob(name))
            for m in matches:
                if UnconstructedPreprocessor._file_exists(m):
                    return m
        # if still not found, raise helpful error
        tried = []
        for c in candidates:
            tried.extend([str(Path(c)), str(base_dir / c), str(base_dir / "json" / c), f"{str(base_dir)}/**/{Path(c).name}"])
        raise FileNotFoundError(f"다음 경로에서 파일을 찾지 못했습니다:\n" + "\n".join(tried))

    # -------------------- LLM --------------------
    HR_SYSTEM = """당신은 채용 담당자입니다.
입력으로 주어진 '지원자 자료'가 '목표 직종'에 얼마나 적합한지 0~100으로 평가하세요.
- 90~100: 직무핵심 역량과 직접 연결, 최근 경력/훈련/성과가 뚜렷
- 70~89: 관련성이 높고 실무 연결고리가 충분
- 40~69: 부분 관련. 기초 역량은 있으나 연결고리/증거가 부족
- 10~39: 간접적, 전환 가능성은 있으나 근거 약함
- 0~9: 관련 근거 없음
반드시 JSON으로 답변: {\"score\": int, \"rationale\": \"짧은 이유\"}"""

    FEWSHOT = {
        "자기소개서": [
            {"input":{"job":"데이터 분석가","text":"통계학 전공, 머신러닝 프로젝트 다수 수행, Python/SQL/시각화로 성과 수치 제시"},
             "output":{"score":95,"rationale":"핵심 역량, 실무 성과 구체적"}},
            {"input":{"job":"프론트엔드 개발자","text":"React/TypeScript 기반 대시보드 개발, 성능 최적화로 LCP 40% 개선"},
             "output":{"score":93,"rationale":"직접 성능 개선 성과"}},
            {"input":{"job":"회계","text":"K-IFRS 재무제표 작성, 결산/세무조정, 전표 처리 자동화 경험"},
             "output":{"score":90,"rationale":"핵심 실무 지식"}},
        ],
        "이력서": [
            {"input":{"job":"데이터 엔지니어","text":"데이터 파이프라인 운영, Spark SQL 튜닝, Kafka 스트리밍 구축"},
             "output":{"score":92,"rationale":"프로덕션 파이프라인 경험"}},
            {"input":{"job":"마케팅 분석가","text":"퍼포먼스 캠페인 ROI 분석, 리타게팅 최적화"},
             "output":{"score":88,"rationale":"분석/성과 근거"}},
        ],
        "직업훈련": [
            {"input":{"job":"데이터 분석가","text":"빅데이터 분석(파이썬/SQL/머신러닝) 수료, 팀 프로젝트 산출물"},
             "output":{"score":85,"rationale":"핵심 커리큘럼 수료"}},
            {"input":{"job":"프론트엔드 개발자","text":"React/Next.js 심화, 테스트/성능 최적화 모듈"},
             "output":{"score":82,"rationale":"현업 연계 과정"}},
        ],
        "자격증": [
            {"input":{"job":"데이터 분석가","text":"ADsP, SQLD, 빅데이터분석기사"},
             "output":{"score":88,"rationale":"핵심 자격 보유"}},
            {"input":{"job":"회계","text":"전산회계1급, FAT"},
             "output":{"score":86,"rationale":"직무 핵심 자격"}},
        ]
    }

    @staticmethod
    def _offline_score(section: str, text: str, job_examples: List[str]) -> Tuple[int,str]:
        if not text or text.strip() == "정보 없음":
            return 10, "자료 부족"
        keywords = [
            '데이터','분석','SQL','파이썬','머신','시각화','대시보드','A/B','통계','모델','예측',
            'React','TypeScript','API','Spring','배포','ETL','Spark','Kafka','Airflow',
            '회계','결산','세무','채용','온보딩','GA4','ROI','엑셀','보고서','대학','경력'
        ]
        hits = sum(1 for k in keywords if k.lower() in text.lower())
        base = 45
        return min(95, base + hits*5), f"키워드 {hits}개 매칭"

    def _build_prompt(self, section: str, job_name: str, job_examples: List[str], text: str) -> str:
        import json as _json
        def shot(s):
            return f"[예시]\\n직무: {s['input']['job']}\\n자료:\\n{s['input']['text']}\\n=> {_json.dumps(s['output'], ensure_ascii=False)}"
        examples = "\\n\\n".join(shot(s) for s in self.FEWSHOT.get(section, []))
        job_hint = f"참고 직무 예시: {', '.join(job_examples[:12])}" if job_examples else "참고 직무 예시: 없음"
        return f"""[평가 섹션] {section}
[목표 직종] {job_name}
{job_hint}

{examples}

[지원자 자료]
{text}

[응답 형식] JSON 한 줄 ({{"score": 0-100 정수, "rationale": "간단한 이유"}})
"""

    def _score_with_llm(self, section: str, job_name: str, job_examples: List[str], text: str) -> Tuple[int,str]:
        if not self.api_key:
            return self._offline_score(section, text, job_examples)
        try:
            os.environ["OPENAI_API_KEY"] = self.api_key
            from openai import OpenAI
            client = OpenAI()
            sys_msg = {"role":"system","content": self.HR_SYSTEM}
            user_msg = {"role":"user","content": self._build_prompt(section, job_name, job_examples, text)}
            resp = client.chat.completions.create(
                model="gpt-4o-mini",
                messages=[sys_msg, user_msg],
                temperature=0.2,
                response_format={"type":"json_object"},
            )
            content = resp.choices[0].message.content
            import json as _json
            data = _json.loads(content)
            score = int(max(0, min(100, int(data.get("score", 0)))))
            why = str(data.get("rationale", ""))[:240]
            return score, why
        except Exception:
            return self._offline_score(section, text, job_examples)

    # -------------------- Init --------------------
    def __init__(
        self,
        base_dir: str,
        coverletters_file: str = "COVERLETTERS_JSON.json",
        trainings_file: str = "TRAININGS_JSON.json",
        licenses_file: str = "LICENSES_JSON.json",
        resume_file: str = "RESUME_JSON.json",
        certinfo_file: str = "CERTIFICATION_INFO_JSON.json",
        job_excel_file: Optional[str] = "2025-직종 분류표.xlsx",
        job_csv_file: Optional[str] = "job_subcategories.csv",
        api_key: Optional[str] = None,
        max_cover_len: int = 800,
        cover_exceed_ratio: float = 0.85,
        allowed_seek_ids: Optional[List[str]] = None,
    ):
        self.base_dir = Path(base_dir)
        self.api_key = api_key
        self.max_cover_len = max_cover_len
        self.cover_exceed_ratio = cover_exceed_ratio
        self.allowed_seek_ids = set(allowed_seek_ids) if allowed_seek_ids else None

        # resolve files (exact, json/, recursive)
        self.coverletters_path = self._resolve_any(self.base_dir, [coverletters_file])
        self.trainings_path    = self._resolve_any(self.base_dir, [trainings_file])
        self.licenses_path     = self._resolve_any(self.base_dir, [licenses_file])
        self.resume_path       = self._resolve_any(self.base_dir, [resume_file])
        self.certinfo_path     = self._resolve_any(self.base_dir, [certinfo_file])

        # load jsons
        self.coverletters = json.load(open(self.coverletters_path, encoding="utf-8"))
        self.trainings    = json.load(open(self.trainings_path,    encoding="utf-8"))
        self.licenses     = json.load(open(self.licenses_path,     encoding="utf-8"))
        self.resumes      = json.load(open(self.resume_path,       encoding="utf-8"))
        self.certinfo     = json.load(open(self.certinfo_path,     encoding="utf-8"))

        # job classification
        self.jobmap = pd.DataFrame(columns=["소분류코드","소분류명","세분류명"])
        used_job = False
        if job_excel_file:
            try:
                p = self._resolve_any(self.base_dir, [job_excel_file])
                xls = pd.ExcelFile(p)
                df_raw = pd.read_excel(xls, xls.sheet_names[0])
                records, current_sub_code, current_sub_name = [], None, None
                for _, row in df_raw.iterrows():
                    sub_code = row.get("Unnamed: 4"); sub_name = row.get("Unnamed: 5")
                    code = row.get("Unnamed: 6"); jobname = row.get("Unnamed: 7")
                    if pd.notna(sub_code):
                        current_sub_code = str(int(sub_code)) if isinstance(sub_code,(int,float)) else str(sub_code).strip()
                        current_sub_name = str(sub_name).strip() if pd.notna(sub_name) else ""
                        continue
                    if pd.notna(code) and pd.notna(jobname) and current_sub_code:
                        records.append({"소분류코드": current_sub_code, "소분류명": current_sub_name, "세분류명": str(jobname).strip()})
                if records:
                    self.jobmap = pd.DataFrame(records).groupby(["소분류코드","소분류명"])["세분류명"].apply(list).reset_index()
                    used_job = True
            except FileNotFoundError:
                used_job = False
        if not used_job and job_csv_file:
            try:
                p = self._resolve_any(self.base_dir, [job_csv_file])
                df = pd.read_csv(p)
                if "세분류명" in df.columns and df["세분류명"].apply(lambda x: isinstance(x, str) and x.startswith("[")).any():
                    df["세분류명"] = df["세분류명"].apply(lambda s: eval(s) if isinstance(s,str) else [])
                self.jobmap = df[["소분류코드","소분류명","세분류명"]].copy()
            except FileNotFoundError:
                pass

        self.js_lookup = {str(r["소분류코드"]).strip(): r["세분류명"] for _, r in self.jobmap.iterrows()}
        self.js_name   = {str(r["소분류코드"]).strip(): r["소분류명"] for _, r in self.jobmap.iterrows()}

        # indices
        self.seek_to_jhnt, self.jhnt_to_seek, self.seek_to_jhcr_de, self.seek_to_hope = {}, {}, {}, {}
        for row in self.certinfo:
            seek = row.get("SEEK_CUST_NO"); jhnt = row.get("JHNT_CTN")
            if seek:
                self.seek_to_hope[seek] = (row.get("HOPE_JSCD1") or "")
                if row.get("JHCR_DE"): self.seek_to_jhcr_de[seek] = row.get("JHCR_DE")
            if seek and jhnt:
                self.seek_to_jhnt[seek] = jhnt
                self.jhnt_to_seek[jhnt] = seek

        self.jhnt_trainings = {row.get("JHNT_CTN"): row.get("TRAININGS", []) for row in self.trainings}
        self.jhnt_licenses  = {row.get("JHNT_CTN"): row.get("LICENSES",  []) for row in self.licenses}
        self.seek_coverletters = {row.get("SEEK_CUST_NO"): row.get("COVERLETTERS", []) for row in self.coverletters}
        self.seek_resumes      = {row.get("SEEK_CUST_NO"): row.get("RESUMES",       []) for row in self.resumes}

    # -------------------- Universe --------------------
    def _seek_ids(self) -> List[str]:
        ids = [row.get("SEEK_CUST_NO") for row in self.certinfo if row.get("SEEK_CUST_NO")]
        if self.allowed_seek_ids is not None:
            ids = [i for i in ids if i in self.allowed_seek_ids]
        return sorted(set(ids))

    def _hope_info(self, seek_id: str) -> Tuple[str, str, List[str]]:
        hope = str(self.seek_to_hope.get(seek_id) or "").strip()
        job_name = self.js_name.get(hope, f"소분류 {hope}") if hope else "미상"
        job_examples = self.js_lookup.get(hope, [])
        return hope, job_name, job_examples

    # -------------------- RESUME Parsing (rules) --------------------
    def _build_resume_sections(self, seek_id: str) -> Dict[str, List[Dict[str, Any]]]:
        sections = {
            "학력": [], "개인경력": [], "봉사활동": [],
            "논문": [], "수상경력": [], "참여프로젝트": [],
            "훈련통합": [], "해외연수": [], "외국어능력": [],
            "전산자격통합": []
        }
        for resume in self.seek_resumes.get(seek_id, []):
            for it in (resume.get("ITEMS") or []):
                sec = it.get("RESUME_ITEM_CLCD_NM") or it.get("DS_RESUME_ITEM_CLCD") or ""
                nm  = it.get("RESUME_ITEM_1_NM") or ""
                val = it.get("RESUME_ITEM_1_VAL") or ""
                st  = it.get("HIST_STDT") or ""
                en  = it.get("HIST_ENDT") or ""
                rec = {"sec":sec,"name":nm,"value":val,"start":st,"end":en}
                sec_norm = sec.replace(" ", "") if isinstance(sec, str) else sec
                if sec_norm in ["학력","개인경력","봉사활동","논문","수상경력","참여프로젝트","해외연수","외국어능력","전산능력","자격면허","훈련","직업훈련"]:
                    if sec_norm in ["전산능력","자격면허"]:
                        sections["전산자격통합"].append(rec)
                    elif sec_norm in ["훈련","직업훈련"]:
                        sections["훈련통합"].append(rec)
                    else:
                        sections[sec_norm].append(rec)
        return sections

    # -------------------- Sections (numeric + qualitative) --------------------
    def process_coverletters(self, seek_id: str) -> Dict[str, Any]:
        texts = []
        items = []
        for c in self.seek_coverletters.get(seek_id, []):
            if str(c.get("BASS_SFID_YN","")).upper()=="Y":
                items = c.get("ITEMS", []) or []
                for it in items:
                    t = it.get("SELF_INTRO_CONT") or ""
                    if t: texts.append(t.strip())
                break
        full_text = "\n\n".join(texts) if texts else "정보 없음"
        lens = [len(it.get("SELF_INTRO_CONT") or "") for it in items] if items else []
        max_len = max(lens) if lens else 0
        typo = sum(self._estimate_typos_korean(it.get("SELF_INTRO_CONT") or "") for it in items) if items else 0

        hope, job_name, job_examples = self._hope_info(seek_id)
        score, why = self._score_with_llm("자기소개서", job_name, job_examples, full_text)
        return {
            "SEEK_CUST_NO": seek_id,
            "HOPE_JSCD1": hope, "HOPE_JOB_NAME": job_name,
            "cover_items_count": len(items),
            "cover_max_chars": max_len,
            "cover_exceed_85pct": int(max_len >= self.max_cover_len * self.cover_exceed_ratio),
            "cover_typo_count": typo,
            "cover_score": score, "cover_why": why
        }

    def process_resume(self, seek_id: str) -> Dict[str, Any]:
        secs = self._build_resume_sections(seek_id)

        # Numeric summaries
        def _count(key): return len(secs.get(key, []))
        edu_cnt = _count("학력")
        exp_cnt = _count("개인경력")
        vol_cnt = _count("봉사활동")
        pap_cnt = _count("논문")
        awd_cnt = _count("수상경력")
        prj_cnt = _count("참여프로젝트")
        trn_cnt = _count("훈련통합")
        ov_cnt  = _count("해외연수")
        lang_cnt= _count("외국어능력")
        itc_cnt = _count("전산자격통합")

        # Sum experience duration (days)
        def _sum_days(items):
            total = 0
            for r in items:
                d1 = self._parse_date(r.get("end"))
                d2 = self._parse_date(r.get("start"))
                if d1 and d2:
                    total += self._days_between(d1, d2) or 0
            return total
        exp_days = _sum_days(secs.get("개인경력", []))

        # Build text block following rules
        lines = []
        def add_block(title, arr, formatter=lambda r: f"{r.get('name') or r.get('value')} ({r.get('start')}~{r.get('end')})"):
            if not arr: return
            lines.append(f"[{title}]")
            for r in arr:
                if title=="논문":
                    nm = r.get("name") or r.get("value")
                    body = r.get("value") if r.get("name") else ""
                    lines.append(f"- {nm}: {body} ({r.get('start')}~{r.get('end')})")
                else:
                    lines.append(f"- {formatter(r)}")

        add_block("학력", secs.get("학력", []))
        add_block("개인경력", secs.get("개인경력", []))
        add_block("봉사활동", secs.get("봉사활동", []))
        add_block("참여프로젝트", secs.get("참여프로젝트", []))
        add_block("수상경력", secs.get("수상경력", []))
        add_block("논문", secs.get("논문", []))
        add_block("훈련·직업훈련(통합)", secs.get("훈련통합", []))
        add_block("해외연수", secs.get("해외연수", []), formatter=lambda r: f"{r.get('value') or r.get('name')} ({r.get('start')}~{r.get('end')})")
        add_block("외국어능력", secs.get("외국어능력", []))
        add_block("전산능력+자격면허(통합)", secs.get("전산자격통합", []))

        text = "\n".join(lines) if lines else "정보 없음"

        hope, job_name, job_examples = self._hope_info(seek_id)
        score, why = self._score_with_llm("이력서", job_name, job_examples, text)
        return {
            "SEEK_CUST_NO": seek_id, "HOPE_JSCD1": hope, "HOPE_JOB_NAME": job_name,
            "edu_count": edu_cnt, "exp_count": exp_cnt, "exp_days": exp_days,
            "vol_count": vol_cnt, "paper_count": pap_cnt, "award_count": awd_cnt,
            "project_count": prj_cnt, "training_resume_count": trn_cnt,
            "overseas_count": ov_cnt, "lang_count": lang_cnt, "it_comp_license_count": itc_cnt,
            "resume_score": score, "resume_why": why
        }

    def process_trainings(self, seek_id: str) -> Dict[str, Any]:
        secs = self._build_resume_sections(seek_id)
        resume_train = secs.get("훈련통합", [])

        jhnt = self.seek_to_jhnt.get(seek_id)
        tr_json = self.jhnt_trainings.get(jhnt, []) if jhnt else []

        def _to_rec_from_json(t):
            return {"name": t.get("TRNG_NM") or "", "start": t.get("TRNG_BGDE") or "", "end": t.get("TRNG_ENDE") or ""}
        def _to_rec_from_resume(t):
            return {"name": t.get("name") or t.get("value") or "", "start": t.get("start") or "", "end": t.get("end") or ""}

        combined = [ _to_rec_from_json(t) for t in tr_json ] + [ _to_rec_from_resume(t) for t in resume_train ]

        seen = set(); uniq = []
        for r in combined:
            key = (r["name"], r["start"], r["end"])
            if key not in seen:
                seen.add(key); uniq.append(r)

        ends = [self._parse_date(r["end"]) for r in uniq if r.get("end")]
        ends = [d for d in ends if d]
        last_end = max(ends).strftime(self.DATE_FMT) if ends else None
        jobseek = self.seek_to_jhcr_de.get(seek_id)
        gap = self._days_between(self._parse_date(jobseek), self._parse_date(last_end)) if (jobseek and last_end) else None

        text = "\n".join([f"{r['name']} ({r['start']}~{r['end']})" for r in uniq]) if uniq else "정보 없음"

        hope, job_name, job_examples = self._hope_info(seek_id)
        score, why = self._score_with_llm("직업훈련", job_name, job_examples, text)
        return {
            "SEEK_CUST_NO": seek_id, "HOPE_JSCD1": hope, "HOPE_JOB_NAME": job_name,
            "training_count_total": len(uniq),
            "training_last_end": last_end,
            "jobseek_date": jobseek,
            "days_last_training_to_jobseek": gap,
            "training_score": score, "training_why": why
        }

    def process_licenses(self, seek_id: str) -> Dict[str, Any]:
        secs = self._build_resume_sections(seek_id)
        resume_itlic = secs.get("전산자격통합", [])

        jhnt = self.seek_to_jhnt.get(seek_id)
        lic_json = self.jhnt_licenses.get(jhnt, []) if jhnt else []

        def _to_rec_from_json(l):
            return {"cat": l.get("QULF_LCNS_LCFN") or "", "name": l.get("QULF_LCNS_NM") or "", "acq": l.get("ACQ_DE") or ""}
        def _to_rec_from_resume(l):
            return {"cat": l.get("sec") or "", "name": l.get("name") or l.get("value") or "", "acq": l.get("end") or ""}

        combined = [ _to_rec_from_json(l) for l in lic_json ] + [ _to_rec_from_resume(l) for l in resume_itlic ]

        seen = set(); uniq = []
        for r in combined:
            key = (r["cat"], r["name"], r["acq"])
            if key not in seen:
                seen.add(key); uniq.append(r)

        cats = [r["cat"] for r in uniq if r.get("cat")]
        cnt = Counter(cats) if cats else Counter()
        has_nat_tech = int(cnt.get("국가기술자격", 0) > 0)
        has_nat_prof = int(cnt.get("국가전문자격", 0) > 0)
        has_priv     = int(cnt.get("민간자격", 0) > 0)
        top_cat = None
        if cnt:
            top_cat = sorted(cnt.items(), key=lambda kv: (-kv[1], kv[0]))[0][0]

        text = "\n".join([f"{r['cat']} - {r['name']} (취득:{r['acq']})" for r in uniq]) if uniq else "정보 없음"

        hope, job_name, job_examples = self._hope_info(seek_id)
        score, why = self._score_with_llm("자격증", job_name, job_examples, text)
        return {
            "SEEK_CUST_NO": seek_id, "HOPE_JSCD1": hope, "HOPE_JOB_NAME": job_name,
            "license_total": len(uniq),
            "has_국가기술자격": has_nat_tech,
            "has_국가전문자격": has_nat_prof,
            "has_민간자격": has_priv,
            "top_license_category": top_cat,
            "license_score": score, "license_why": why
        }

    # -------------------- Build & Save --------------------
    def build_section_df(self, section: str) -> pd.DataFrame:
        rows = []
        for seek in self._seek_ids():
            if section == "coverletters":
                rows.append(self.process_coverletters(seek))
            elif section == "resume":
                rows.append(self.process_resume(seek))
            elif section == "trainings":
                rows.append(self.process_trainings(seek))
            elif section == "licenses":
                rows.append(self.process_licenses(seek))
        return pd.DataFrame(rows).sort_values("SEEK_CUST_NO").reset_index(drop=True)

    def save_all_sections(self, out_dir: str | Path) -> Dict[str, Path]:
        out_dir = Path(out_dir)
        out_dir.mkdir(parents=True, exist_ok=True)
        outputs = {}
        mapping = {
            "coverletters": "coverletters_metrics_scores.csv",
            "resume": "resume_metrics_scores.csv",
            "trainings": "trainings_metrics_scores.csv",
            "licenses": "licenses_metrics_scores.csv",
        }
        for sec, fname in mapping.items():
            df = self.build_section_df(sec)
            p = out_dir / fname
            df.to_csv(p, index=False, encoding="utf-8-sig")
            outputs[sec] = p
        return outputs


In [None]:
BASE = '/Users/jongrakjeong/Library/Mobile Documents/com~apple~CloudDocs/Study/Campus/Project/laborlab'

pre = UnconstructedPreprocessor(
    base_dir=BASE,
    coverletters_file="/json/COVERLETTERS_JSON.json",
    trainings_file="/json/TRAININGS_JSON.json",
    licenses_file="/json/LICENSES_JSON.json",
    resume_file="/json/RESUME_JSON.json",
    certinfo_file="/json/CERTIFICATION_INFO_JSON.json",
    job_csv_file="job_subcategories.csv",
    api_key=None
    cover_exceed_ratio=0.85,
    allowed_seek_ids=None
)

out_paths = pre.save_all_sections(BASE)
print(out_paths)


{'coverletters': PosixPath('/Users/jongrakjeong/Library/Mobile Documents/com~apple~CloudDocs/Study/Campus/Project/laborlab/coverletters_metrics_scores.csv'), 'resume': PosixPath('/Users/jongrakjeong/Library/Mobile Documents/com~apple~CloudDocs/Study/Campus/Project/laborlab/resume_metrics_scores.csv'), 'trainings': PosixPath('/Users/jongrakjeong/Library/Mobile Documents/com~apple~CloudDocs/Study/Campus/Project/laborlab/trainings_metrics_scores.csv'), 'licenses': PosixPath('/Users/jongrakjeong/Library/Mobile Documents/com~apple~CloudDocs/Study/Campus/Project/laborlab/licenses_metrics_scores.csv')}
