In [1]:
!pip -q install sentence-transformers openai

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m94.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m71.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m38.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m29.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━

In [2]:
# -*- coding: utf-8 -*-
# Hybrid LLM + SBERT for skills→occupations, taxonomy, relations
import os, re, json, numpy as np, pandas as pd
from typing import List, Any, Optional
from kaggle_secrets import UserSecretsClient
from tqdm import tqdm
from openai import OpenAI
from types import SimpleNamespace
from sentence_transformers import SentenceTransformer, util
from sklearn.cluster import KMeans

# ---------------- Core IO ----------------
def get_client():
    user_secrets = UserSecretsClient()
    k = user_secrets.get_secret("OPENAI_API_KEY")  # ไปตั้งค่า secret ใน Kaggle ก่อน
    b = None
    try:
        b = user_secrets.get_secret("OPENAI_BASE_URL")
    except Exception:
        pass
    if not k:
        raise RuntimeError("OPENAI_API_KEY missing in Kaggle Secrets")
    return OpenAI(api_key=k, base_url=b) if b else OpenAI(api_key=k)

def ensure_dir(p: str): os.makedirs(p, exist_ok=True)
def dumpj(p: str, o: Any):
    ensure_dir(os.path.dirname(p))
    with open(p, "w", encoding="utf-8") as f: json.dump(o, f, ensure_ascii=False, indent=2)

def pick_col(df: pd.DataFrame, prefs: List[str]) -> str:
    for c in prefs:
        if c in df.columns: return c
    objs = [c for c in df.columns if df[c].dtype == object]
    return objs[0] if objs else df.columns[0]

def _slug(s: str) -> str:
    s = re.sub(r"[^\w\- ]+", "", s).strip().replace("-", "_")
    s = re.sub(r"\s+", "_", s)
    if not s: s = "unnamed"
    if s[0].isdigit(): s = "_" + s
    return s

# ---------------- Prompts ----------------
SYS_A = (
    "You are an ontology labeler. Given a SKILL term and a closed set of OCCUPATION labels, "
    "assign ALL occupations from the list that could plausibly require the skill. "
    "Be generous. Return strictly a JSON array of strings from the list. Never return an empty array."
)
SYS_C = (
    "You are an ontology relation classifier. Given two OCCUPATION labels and a closed set of non-taxonomic "
    "relations (e.g., collaborates_with, depends_on, related_to), return exactly ONE label."
)
AUTO_SYS_B = (
    "You design occupation taxonomies.\n"
    "Given a list of OCCUPATION labels, create HIGH-LEVEL PARENT categories and map each child to exactly ONE parent.\n"
    "OUTPUT STRICTLY JSON ONLY: an array of [child, parent] pairs. No commentary."
)

# ---------------- LLM helpers ----------------
def chat(client: OpenAI, model: str, sys: str, usr: str, mt: int = 512) -> str:
    r = client.chat.completions.create(
        model=model,
        messages=[{"role": "system", "content": sys}, {"role": "user", "content": usr}],
        temperature=0,
        max_tokens=mt,
    )
    return r.choices[0].message.content.strip()

def _extract_json_array(txt: str):
    m = re.search(r"\[.*?\]", txt, re.S)
    if not m: return None
    try:
        arr = json.loads(m.group(0))
        return arr if isinstance(arr, list) else [arr]
    except Exception:
        return None

def _json_only(text: str):
    m = re.search(r"\{.*\}", text, re.S)
    if not m: return None
    try: return json.loads(m.group(0))
    except Exception: return None

# ---------------- Embedding layer ----------------
EMB_NAME = os.getenv("EMB_MODEL_NAME") or "sentence-transformers/all-MiniLM-L6-v2"
_emb_model = SentenceTransformer(EMB_NAME)

def emb_encode(texts: List[str]): return _emb_model.encode(texts, convert_to_tensor=True)
def emb_cos(a, b): return util.cos_sim(a, b)

# ---------------- Param selection by LLM ----------------
def _score_stats(arr: np.ndarray, topn: int = 30) -> dict:
    a = np.asarray(arr).astype("float32")
    if a.size == 0: a = np.array([0.0], dtype="float32")
    a_sorted = np.sort(a)[::-1]
    top = a_sorted[:topn].tolist()
    return {
        "n": int(a.size),
        "mean": float(a.mean()),
        "std": float(a.std()),
        "min": float(a.min()),
        "max": float(a.max()),
        "top": top,
        "gaps": [float(top[i] - top[i+1]) for i in range(len(top)-1)]
    }

def _llm_pick_k_thr_for_skill(client: OpenAI, model: str, skill: str, scores: np.ndarray, n_occ: int):
    stats = _score_stats(scores, topn=min(30, len(scores)))
    sys = "You choose hyperparameters for candidate selection from similarity scores. Output strictly one JSON."
    usr = (
        "We map a SKILL to a subset of OCCUPATIONS using cosine similarity.\n"
        f"Skill: {skill}\nNum occupations: {n_occ}\n"
        "Similarity summary:\n" + json.dumps(stats, ensure_ascii=False, indent=2) + "\n"
        'Return JSON: {"k": int in [1,n_occ], "threshold": float in [0,1]}.\n'
        "Pick k at elbow/large-gap. threshold is minimal include score. "
        "Prefer small k if gaps are large. Prefer larger k if decay is smooth."
    )
    try:
        obj = _json_only(chat(client, model, sys, usr, 256)) or {}
        k = int(obj.get("k", 0)); thr = float(obj.get("threshold", -1))
        if 1 <= k <= n_occ and 0.0 <= thr <= 1.0: return k, thr
    except Exception:
        pass
    # fallback
    top = stats["top"]; gaps = np.array(stats["gaps"]) if stats["gaps"] else np.array([])
    k_fb = max(1, int(np.argmax(gaps)) + 1) if gaps.size else min(5, n_occ)
    thr_fb = float(max(0.0, min(0.95, np.percentile(scores, 85)))) if scores.size else 0.5
    return k_fb, thr_fb

def _llm_pick_sim_bounds(client: OpenAI, model: str, pair_sims: np.ndarray):
    stats = {
        "n": int(pair_sims.size),
        "mean": float(pair_sims.mean()),
        "std": float(pair_sims.std()),
        "p25": float(np.percentile(pair_sims, 25)),
        "p50": float(np.percentile(pair_sims, 50)),
        "p75": float(np.percentile(pair_sims, 75)),
        "p90": float(np.percentile(pair_sims, 90)),
        "min": float(pair_sims.min()),
        "max": float(pair_sims.max()),
    }
    sys = "You set decision thresholds from a similarity distribution. Output strictly one JSON."
    usr = (
        "We classify relations between OCCUPATION pairs using cosine similarity as soft evidence.\n"
        "Pick two thresholds with 0 ≤ sim_low < sim_high ≤ 1.\n"
        "Above sim_high: auto-label 'related_to'. Below sim_low: defer to model.\n"
        "Similarity summary:\n" + json.dumps(stats, ensure_ascii=False, indent=2) +
        '\nReturn JSON: {"sim_high": float, "sim_low": float}.'
    )
    try:
        obj = _json_only(chat(client, model, sys, usr, 128)) or {}
        hi = float(obj.get("sim_high", -1)); lo = float(obj.get("sim_low", -1))
        if 0 <= lo < hi <= 1: return hi, lo
    except Exception:
        pass
    return float(stats["p90"]), float(stats["p25"])

# ---------------- Task A ----------------
def taskA_map_skills_to_occ(skills_csv: str, occ_csv: str, out_dir: str, model: str) -> List[str]:
    df_s = pd.read_csv(skills_csv)
    df_o = pd.read_csv(occ_csv)
    s_col = pick_col(df_s, ["skill", "skills", "name", "title"])
    o_col = pick_col(df_o, ["occupation", "job", "title", "name"])

    skills = [str(x).strip() for x in df_s[s_col].dropna().tolist() if str(x).strip()]
    occup = list(dict.fromkeys([str(x).strip() for x in df_o[o_col].dropna().tolist() if str(x).strip()]))

    client = get_client()
    occ_emb_t = emb_encode(occup)
    data = []
    dbg_dir = os.path.join(out_dir, "_debug_raw"); ensure_dir(dbg_dir)

    for s in tqdm(skills, desc="TaskA: skills→occupations"):
        s_emb_t = emb_encode([s])
        scores = emb_cos(s_emb_t, occ_emb_t)[0].cpu().numpy()

        k_candidate, sim_threshold = _llm_pick_k_thr_for_skill(client, model, s, scores, len(occup))

        top_idx = np.argsort(-scores)[:k_candidate].tolist()
        cand_labels = [occup[i] for i in top_idx]
        thr_keep = [occup[i] for i in range(len(occup)) if scores[i] >= sim_threshold]
        cand_labels = list(dict.fromkeys(cand_labels + thr_keep))

        tmpl = (
            "Skill: {skill}\n"
            "Candidate occupation labels (from similarity-based selection): {labels}\n"
            "Answer with ALL matching occupations strictly as a JSON array of strings from the candidate list."
        )
        u = tmpl.format(skill=s, labels=", ".join(cand_labels))
        txt = ""
        try:
            txt = chat(client, model, SYS_A, u, 512)
            labs = _extract_json_array(txt) or []
        except Exception:
            labs = []

        labs = [lab for lab in labs if lab in cand_labels]
        if not labs: labs = thr_keep or cand_labels[: min(3, len(cand_labels))]

        try:
            with open(os.path.join(dbg_dir, f"{_slug(s)[:80]}.txt"), "w", encoding="utf-8") as f:
                f.write(txt if isinstance(txt, str) else str(txt))
        except Exception:
            pass

        for lab in labs: data.append({"text": s, "label": lab})

    dumpj(f"{out_dir}/data.json", data)
    dumpj(f"{out_dir}/label_mapper.json", {str(i): l for i, l in enumerate(occup)})
    dumpj(
        f"{out_dir}/templates.json",
        ["Classify the SKILL into one or more OCCUPATION labels. Term: {text}. Labels: {labels}. Answer with a JSON array of labels."]
    )

    occ2skills = {occ: [] for occ in occup}
    for r in data: occ2skills[r["label"]].append(r["text"])
    occ2skills = {occ: sorted(dict.fromkeys(sk_list)) for occ, sk_list in occ2skills.items()}
    dumpj(f"{out_dir}/occ2skills.json", [{"occupation": occ, "skills": sks} for occ, sks in occ2skills.items()])

    ents = [{"entity": sk, "type": occ} for occ, sks in occ2skills.items() for sk in sks]
    per = {occ: len(occ2skills[occ]) for occ in occup}
    dumpj(f"{os.path.dirname(out_dir)}/jobskillsset_entities.json", ents)
    dumpj(f"{os.path.dirname(out_dir)}/stats.json", {"kb_name": "JobSkillsSet", "n_entities": len(ents), "n_types": len(occup), "per_type": per})
    return occup

# ---------------- Task B (embedding-aided seeds + LLM) ----------------
def _cluster_and_name_with_llm(client: OpenAI, model: str, occ_types: List[str]) -> List[List[str]]:
    n = len(occ_types)
    k = min(8, max(2, int(np.sqrt(n)))) if n > 8 else max(2, min(4, n))
    X = emb_encode(occ_types).cpu().numpy()
    km = KMeans(n_clusters=k, n_init="auto", random_state=42)
    cl = km.fit_predict(X)
    reps, parent_names = [], []
    for c in range(k):
        idx = np.where(cl == c)[0].tolist()
        sample = [occ_types[i] for i in idx[:8]]
        reps.append(sample)
    for sample in reps:
        name = chat(get_client(), model, "Name taxonomic parent",
                    "Name a high-level OCCUPATION parent for:\n- " + "\n- ".join(sample) + "\nReturn only the name.", 32)
        name = re.sub(r"[\n\[\]\{\}\"]", "", name).strip() or "Category"
        parent_names.append(name)
    return [[occ_types[i], parent_names[cl[i]]] for i in range(n)]

def llm_propose_taxonomy_seeds(client: OpenAI, model: str, occ_types: List[str]) -> List[List[str]]:
    usr = "Occupations:\n" + "\n".join(f"- {o}" for o in occ_types)
    for _ in range(2):
        txt = chat(client, model, AUTO_SYS_B, usr, mt=1024)
        arr = _extract_json_array(txt)
        ok = []
        if isinstance(arr, list):
            for p in arr:
                if isinstance(p, list) and len(p) == 2 and all(isinstance(x, str) and x.strip() for x in p):
                    ok.append([p[0].strip(), p[1].strip()])
        if ok: return ok
    try: return _cluster_and_name_with_llm(client, model, occ_types)
    except Exception: pass
    # simple fallback
    buckets = {
        "Software & Engineering": ("Engineer","Developer","Programmer","Architect","Software"),
        "Analytics": ("Analyst","Science","Scientist","BI","Intelligence"),
        "IT Operations": ("Administrator","Network","Systems"),
        "Management": ("Manager","Project","Program","Product"),
    }
    pairs = []
    for o in occ_types:
        parent = "Other"
        low = o.lower()
        for b,kws in buckets.items():
            if any(k.lower() in low for k in kws): parent=b; break
        pairs.append([o,parent])
    return pairs

def taskB_occ_taxonomy(occ_types: List[str], out_dir: str, model: str, seeds: Optional[List[List[str]]] = None):
    pairs = [{"child": c, "parent": p} for c,p in (seeds or [(c,"Occupation") for c in occ_types])]
    dumpj(f"{out_dir}/pairs.json", pairs)
    dumpj(f"{out_dir}/label_mapper.json", {"0": "is-a"})
    dumpj(f"{out_dir}/template.json",
          ["Decide if child is-a parent. Answer only 'yes' or 'no'.\nChild: {child}\nParent: {parent}\nAnswer:"])

# ---------------- Task C (relations with LLM-calibrated bounds) ----------------
def taskC_occ_relations(occ_types: List[str], out_dir: str, model: str, rel_labels: List[str], max_pairs: int = 120):
    client = get_client()
    pairs = [(occ_types[i], occ_types[j]) for i in range(len(occ_types)) for j in range(i+1, len(occ_types))]
    pairs = pairs[:max_pairs]

    # estimate similarity distribution from a sample
    occ_emb_np = emb_encode(occ_types).cpu().numpy()
    rng = np.random.default_rng(42)
    if len(occ_types) > 1:
        idxs = rng.choice(len(occ_types), size=min(200, len(occ_types)), replace=False)
        sims = []
        for i in range(len(idxs)):
            for j in range(i+1, len(idxs)):
                a, b = occ_emb_np[idxs[i]], occ_emb_np[idxs[j]]
                s = float(np.dot(a,b)/(np.linalg.norm(a)*np.linalg.norm(b)+1e-9))
                sims.append(s)
        sims = np.array(sims, dtype="float32") if sims else np.array([0.5], dtype="float32")
    else:
        sims = np.array([0.5], dtype="float32")

    sim_high, sim_low = _llm_pick_sim_bounds(client, model, sims)

    tmpl = (
        "Head OCCUPATION: {h}\nTail OCCUPATION: {t}\n"
        "Candidate relation labels: {labels}\n"
        "Embedding cosine similarity between head and tail: {sim:.3f}\n"
        "Use the similarity as soft evidence only. Return exactly ONE label."
    )

    name2idx = {n:i for i,n in enumerate(occ_types)}
    occ_emb_t = emb_encode(occ_types)

    out = []
    for h, t in tqdm(pairs, desc="TaskC: relations"):
        sim = float(emb_cos(occ_emb_t[name2idx[h]], occ_emb_t[name2idx[t]]).cpu().numpy()[0][0])
        if sim >= sim_high:
            lab = "related_to"
        elif sim <= sim_low:
            lab = chat(client, model, SYS_C, tmpl.format(h=h,t=t,labels=", ".join(rel_labels), sim=sim), 8)
            if lab not in rel_labels: lab = rel_labels[0]
        else:
            lab = chat(client, model, SYS_C, tmpl.format(h=h,t=t,labels=", ".join(rel_labels), sim=sim), 8)
            if lab not in rel_labels: lab = "related_to"
        out.append({"head": h, "tail": t, "label": lab, "sim": round(sim,3)})

    dumpj(f"{out_dir}/pairs.json", out)
    dumpj(f"{out_dir}/label_mapper.json", {str(i): l for i,l in enumerate(rel_labels)})
    dumpj(f"{out_dir}/templates.json",
          ["Classify the relation between Head and Tail strictly as one of: {labels}.\nHead: {head}\nTail: {tail}\nAnswer:"])

# ---------------- CLI ----------------
def main():
    args_dict = {
        "occupations_csv": "/kaggle/input/ontology/occ_update.csv",
        "skills_csv": "/kaggle/input/ontology/skill_update.csv",
        "out_root": "Occupations_Skills_Mapping_Sbert_LLM4o",
        "model": os.getenv("OPENAI_CHAT_MODEL") or "gpt-4o",
        "taxonomy_seeds": 'auto',
        "taxonomy_mode": "auto",
        "relation_labels": ["related_to", "collaborates_with", "depends_on"],
    }
    args = SimpleNamespace(**args_dict)

    # Task A
    A_dir = f"{args.out_root}/TaskA/JobSkillsSet"
    occ_types = taskA_map_skills_to_occ(args.skills_csv, args.occupations_csv, A_dir, args.model)

    # Task B seeds
    auto = args.taxonomy_mode == "auto" or (
        isinstance(args.taxonomy_seeds, str) and args.taxonomy_seeds.strip().lower() == "auto"
    )
    if auto:
        client = get_client()
        seeds = llm_propose_taxonomy_seeds(client, args.model, occ_types)
    else:
        try: seeds = json.loads(args.taxonomy_seeds)
        except Exception: seeds = []

    dbg_dir = f"{args.out_root}/TaskB/_debug"; ensure_dir(dbg_dir)
    dumpj(f"{dbg_dir}/auto_seeds.json", seeds or [])

    # Task B
    B_dir = f"{args.out_root}/TaskB/Occupations"
    taskB_occ_taxonomy(occ_types, B_dir, args.model, seeds)

    # Task C
    C_dir = f"{args.out_root}/TaskC/Occupations"
    taskC_occ_relations(occ_types, C_dir, args.model, args.relation_labels)

    print(f"Done → {args.out_root}")

if __name__ == "__main__":
    main()


2025-09-24 10:12:05.615616: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1758708725.945026      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1758708726.038003      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:   0%|          | 0/90 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:   1%|          | 1/90 [00:02<03:41,  2.49s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:   2%|▏         | 2/90 [00:04<02:48,  1.92s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:   3%|▎         | 3/90 [00:05<02:25,  1.68s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:   4%|▍         | 4/90 [00:06<02:20,  1.63s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:   6%|▌         | 5/90 [00:08<02:11,  1.55s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:   7%|▋         | 6/90 [00:09<02:11,  1.57s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:   8%|▊         | 7/90 [00:11<02:09,  1.56s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:   9%|▉         | 8/90 [00:14<02:34,  1.89s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  10%|█         | 9/90 [00:15<02:23,  1.78s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  11%|█         | 10/90 [00:16<02:07,  1.59s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  12%|█▏        | 11/90 [00:17<01:54,  1.45s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  13%|█▎        | 12/90 [00:19<01:47,  1.37s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  14%|█▍        | 13/90 [00:20<01:53,  1.47s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  16%|█▌        | 14/90 [00:22<02:01,  1.60s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  17%|█▋        | 15/90 [00:23<01:50,  1.47s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  18%|█▊        | 16/90 [00:25<01:55,  1.56s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  19%|█▉        | 17/90 [00:27<02:00,  1.64s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  20%|██        | 18/90 [00:28<01:52,  1.56s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  21%|██        | 19/90 [00:30<01:48,  1.53s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  22%|██▏       | 20/90 [00:31<01:40,  1.43s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  23%|██▎       | 21/90 [00:32<01:31,  1.33s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  24%|██▍       | 22/90 [00:34<01:48,  1.59s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  26%|██▌       | 23/90 [00:36<01:40,  1.50s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  27%|██▋       | 24/90 [00:37<01:41,  1.53s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  28%|██▊       | 25/90 [00:39<01:40,  1.54s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  29%|██▉       | 26/90 [00:41<01:43,  1.61s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  30%|███       | 27/90 [00:42<01:31,  1.45s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  31%|███       | 28/90 [00:43<01:32,  1.50s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  32%|███▏      | 29/90 [00:44<01:25,  1.41s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  33%|███▎      | 30/90 [00:46<01:22,  1.38s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  34%|███▍      | 31/90 [00:47<01:20,  1.36s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  36%|███▌      | 32/90 [00:48<01:12,  1.25s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  37%|███▋      | 33/90 [00:52<01:59,  2.09s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  38%|███▊      | 34/90 [00:54<01:45,  1.88s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  39%|███▉      | 35/90 [00:55<01:41,  1.85s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  40%|████      | 36/90 [00:56<01:29,  1.66s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  41%|████      | 37/90 [00:58<01:27,  1.65s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  42%|████▏     | 38/90 [00:59<01:18,  1.52s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  43%|████▎     | 39/90 [01:01<01:15,  1.48s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  44%|████▍     | 40/90 [01:02<01:16,  1.53s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  46%|████▌     | 41/90 [01:04<01:16,  1.56s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  47%|████▋     | 42/90 [01:05<01:12,  1.51s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  48%|████▊     | 43/90 [01:08<01:30,  1.93s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  49%|████▉     | 44/90 [01:10<01:20,  1.76s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  50%|█████     | 45/90 [01:11<01:08,  1.52s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  51%|█████     | 46/90 [01:12<01:01,  1.40s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  52%|█████▏    | 47/90 [01:13<01:01,  1.44s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  53%|█████▎    | 48/90 [01:15<01:01,  1.47s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  54%|█████▍    | 49/90 [01:16<01:01,  1.51s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  56%|█████▌    | 50/90 [01:17<00:53,  1.33s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  57%|█████▋    | 51/90 [01:19<00:53,  1.38s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  58%|█████▊    | 52/90 [01:20<00:50,  1.33s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  59%|█████▉    | 53/90 [01:22<00:57,  1.56s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  60%|██████    | 54/90 [01:24<01:01,  1.71s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  61%|██████    | 55/90 [01:26<00:56,  1.60s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  62%|██████▏   | 56/90 [01:27<00:55,  1.62s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  63%|██████▎   | 57/90 [01:28<00:49,  1.50s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  64%|██████▍   | 58/90 [01:30<00:44,  1.40s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  66%|██████▌   | 59/90 [01:31<00:44,  1.45s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  67%|██████▋   | 60/90 [01:33<00:45,  1.51s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  68%|██████▊   | 61/90 [01:34<00:43,  1.51s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  69%|██████▉   | 62/90 [01:36<00:41,  1.47s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  70%|███████   | 63/90 [01:38<00:43,  1.62s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  71%|███████   | 64/90 [01:39<00:40,  1.55s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  72%|███████▏  | 65/90 [01:41<00:38,  1.55s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  73%|███████▎  | 66/90 [01:42<00:35,  1.49s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  74%|███████▍  | 67/90 [01:43<00:32,  1.40s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  76%|███████▌  | 68/90 [01:45<00:31,  1.43s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  77%|███████▋  | 69/90 [01:46<00:32,  1.55s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  78%|███████▊  | 70/90 [01:48<00:33,  1.68s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  79%|███████▉  | 71/90 [01:51<00:37,  1.99s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  80%|████████  | 72/90 [01:52<00:31,  1.77s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  81%|████████  | 73/90 [01:54<00:28,  1.69s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  82%|████████▏ | 74/90 [01:56<00:27,  1.72s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  83%|████████▎ | 75/90 [01:57<00:23,  1.57s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  84%|████████▍ | 76/90 [01:58<00:20,  1.50s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  86%|████████▌ | 77/90 [01:59<00:18,  1.39s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  87%|████████▋ | 78/90 [02:01<00:16,  1.39s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  88%|████████▊ | 79/90 [02:02<00:15,  1.39s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  89%|████████▉ | 80/90 [02:04<00:13,  1.37s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  90%|█████████ | 81/90 [02:05<00:12,  1.40s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  91%|█████████ | 82/90 [02:07<00:12,  1.52s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  92%|█████████▏| 83/90 [02:08<00:09,  1.43s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  93%|█████████▎| 84/90 [02:09<00:08,  1.36s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  94%|█████████▍| 85/90 [02:10<00:06,  1.33s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  96%|█████████▌| 86/90 [02:15<00:08,  2.19s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  97%|█████████▋| 87/90 [02:17<00:06,  2.23s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  98%|█████████▊| 88/90 [02:18<00:03,  1.98s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations:  99%|█████████▉| 89/90 [02:20<00:01,  1.80s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskA: skills→occupations: 100%|██████████| 90/90 [02:21<00:00,  1.58s/it]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

TaskC: relations: 100%|██████████| 45/45 [00:19<00:00,  2.25it/s]

Done → Occupations_Skills_Mapping_Sbert_LLM4o



