In [1]:
import pandas as pd

skill_def = pd.read_csv('resources/skill_definitions.csv')
skill_def.head().to_dict()

{'Skill': {0: 'Ability To Meet Deadlines',
  1: 'Acceptance Testing',
  2: 'Account Management',
  3: 'Accounting Systems',
  4: 'Accounts Payable / Receivable'},
 'Skill Definition': {0: 'Capacity to complete a project or task by a predetermined date and time.',
  1: 'Uses an engineering quality assurance process to measure how closely a particular output meets the specified requirements by way of feedback from end users.',
  2: 'Account management is the practice of developing and maintaining relationships with individual clients to retain, grow, and/or land new business.',
  3: 'Accounting Systems is a computer application that records and processes accounting transactions per business requirements.',
  4: 'Accounts Payable refers to knowledge of bookkeeping methods to tracking payments a company owes to its suppliers in order to approve and process invoices. Accounts receivable are claims for payment held by a business for goods or services that have been ordered but remain unpaid.

### First approach using huggingface classifier (bad)

In [3]:
# from transformers import pipeline

# input_text = """
# The image sequence appears to be a presentation slide or series of slides related to the Federal Aviation Administration (FAA), prominently featuring the FAA logo—a globe with a winged emblem and the text "Federal Aviation Administration" encircling it. The slides vary slightly in design, with some including airplane icons and a text bar reading "This is the FAA." The background is a gradient of blue shades. The slides emphasize the FAA\'s role in aviation, displaying statistics such as 5,000 flights in the air at any given time and 44,000 flights each day, with 2.6 million passengers traveling daily and 9.7 million scheduled passenger flights annually. Key organizations highlighted include the Air Traffic Organization (ATO), Office of Airports (ARP), Aviation Safety (AVS), Security and Hazardous Materials (ASH), and Commercial Space Transportation (AST), underscoring the complexity and scale of air travel management and safety.
# """

# # Load a pre-trained zero-shot classification model
# classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# # Define your classification labels and their desciptions
# labels = skill_def['Skill'] + ' (' + skill_def['Skill Type'] + ')'

# # Perform zero-shot classification
# result = classifier(input_text, candidate_labels=labels)

# # Display the results
# print("Input Text:", input_text)
# print("Predicted Labels with Scores:")
# for label, score in zip(result["labels"], result["scores"]):
#     print(f"{label}: {score:.4f}")

# # Optional: Save results to a DataFrame for further analysis
# results_df = pd.DataFrame({
#     "Label": result["labels"],
#     "Score": result["scores"]
# }).sort_values(by='Score', ascending=False)
# print("\nResults DataFrame:")
# print(results_df)

### Setting up reading metadata

In [3]:
from llm_confidence.logprobs_handler import LogprobsHandler
logprobs_handler = LogprobsHandler()

In [4]:
from openai import OpenAI
import json, time

class TextLabeler:
    def __init__(self, data_path: str, skill_def: pd.DataFrame, client: OpenAI, model: str, max_retries: int = 3, delay: float = 1.0, output_path:str="resources/results.json"):
        self.data_path = data_path
        with open(data_path, 'r') as file:
            self.data = json.load(file)
        self.skill_def = skill_def
        self.client = client
        self.model = model
        self.max_retries = max_retries
        self.delay = delay
        self.output_path = output_path

    def _build_user_prompt_content(self, entry: dict):
        return '\n'.join([f"{key}: {entry[key]}" for key in ['title', 'description', 'summary']])
    
    def _build_system_prompt_content(self):
        return f"""You are in the context of labeling a Delta airline training video.
        You are labeling the given title/description/summary, picking the most appropriate labels that perfectly fit \
        everything out of THE ONLY FOLLOWING OPTIONS FOR LABELS: \
        {'; '.join('(id: ' + self.skill_def.index.astype(str) + ', label: ' + self.skill_def['Skill'] + ')')}.\
        Your output should be in descending order of the best labels that fit the title/description/summary STRICTLY following this output structure \
        structure with NO EXTRA CONTENT (should be only around 3-5 labels): \"Labels: <id of first label>, <id of second label>, ..., <id of last label>\""""
    
    def build_labels(self, entry: dict):
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[
                {"role": "system", "content": self._build_system_prompt_content()},
                {"role": "user", "content": self._build_user_prompt_content(entry)},
            ],
            stream=False,
            logprobs=True,
            top_logprobs=
        )
        
        print(response.choices[0].message.content)

        # Logprobs
        print(response.choices[0])
        response_logprobs = response.choices[0].logprobs.content# if hasattr(response.choices[0], 'logprobs') else []
        logprobs_formatted = logprobs_handler.format_logprobs(response_logprobs)
        confidence = logprobs_handler.process_logprobs(logprobs_formatted)
        print(confidence)

        label_ids = response.choices[0].message.content[len('Labels: '):].split(', ')
        filtered_skills_df = self.skill_def.iloc[[int(id) for id in label_ids]]
        return filtered_skills_df['Skill'].to_list()
    
    def revise_metadata_labels(self):
        for entry in self.data['data']:
            print(f"Generating labels for {entry['title']}...")
            for attempt in range(self.max_retries):
                try:
                    new_labels = self.build_labels(entry)
                except:
                    print("An error has occured.")
                    if attempt == self.max_retries - 1:
                        raise
                    time.sleep(self.delay * (attempt + 1))
                else:
                    break
            entry['labels'] = new_labels
            
            # For saving progress
            with open(self.output_path, 'w') as file:
                json.dump(self.data, file, indent=4)
            print(f"Added labels for {entry['title']}")
        print('Added all entry labels successfully.')

SyntaxError: invalid syntax (3865814531.py, line 37)

### Second approach using deepseek chat (much better results)

In [6]:
deep_seek_client = OpenAI(api_key="sk-REDACTED", base_url="https://api.deepseek.com")
deep_seek_tl = TextLabeler('resources/bedrock_demo.json', skill_def, deep_seek_client, 'deepseek-chat')
deep_seek_tl.revise_metadata_labels()

Generating labels for Fingerprint Technician - Fingerprint Capture Training...
Labels: 17, 158, 224, 265, 360
Choice(finish_reason='stop', index=0, logprobs=ChoiceLogprobs(content=[ChatCompletionTokenLogprob(token='Labels', bytes=[76, 97, 98, 101, 108, 115], logprob=0.0, top_logprobs=[]), ChatCompletionTokenLogprob(token=':', bytes=[58], logprob=0.0, top_logprobs=[]), ChatCompletionTokenLogprob(token=' ', bytes=[32], logprob=0.0, top_logprobs=[]), ChatCompletionTokenLogprob(token='17', bytes=[49, 55], logprob=-0.00013543092, top_logprobs=[]), ChatCompletionTokenLogprob(token=',', bytes=[44], logprob=0.0, top_logprobs=[]), ChatCompletionTokenLogprob(token=' ', bytes=[32], logprob=0.0, top_logprobs=[]), ChatCompletionTokenLogprob(token='158', bytes=[49, 53, 56], logprob=-0.026875073, top_logprobs=[]), ChatCompletionTokenLogprob(token=',', bytes=[44], logprob=0.0, top_logprobs=[]), ChatCompletionTokenLogprob(token=' ', bytes=[32], logprob=0.0, top_logprobs=[]), ChatCompletionTokenLogprob(

### Third approach using gpt 4o mini (slightly worse results)

In [3]:
gpt_4o_mini_client = OpenAI(api_key="sk-REDACTED")
gpt_4o_mini_tl = TextLabeler('resources/bedrock_demo.json', skill_def, gpt_4o_mini_client, 'gpt-4o-mini')
gpt_4o_mini_tl.revise_metadata_labels()

Generating labels for Fingerprint Technician - Fingerprint Capture Training...
Labels: 17, 18, 340, 40, 7
Added labels for Fingerprint Technician - Fingerprint Capture Training
Generating labels for CCT - Privacy and Data Protection...
Labels: 265, 340, 370, 374, 219
An error has occured.
Labels: 65, 268, 272, 216, 280
Added labels for CCT - Privacy and Data Protection
Generating labels for Fire Extinguisher Safety...
Labels: 117, 275, 277, 340
Added labels for Fire Extinguisher Safety
Generating labels for Delta Incident Reporting training course video capture (NO AUDIO)...
Labels: 30, 275, 278, 340, 336
Added labels for Delta Incident Reporting training course video capture (NO AUDIO)
Generating labels for Corporate Safety - Incident Investigation...
Labels: 30, 275, 277, 72, 65
Added labels for Corporate Safety - Incident Investigation
Generating labels for Interview Skills For Hiring Managers...
Labels: 9, 340, 37, 40, 221
Added labels for Interview Skills For Hiring Managers
Gener

### Fourth approach using gpt 4.1 (about the same as deepseek)

In [6]:
gpt_4_client = OpenAI(api_key="sk-REDACTED")
gpt_4_tl = TextLabeler('resources/bedrock_demo.json', skill_def, gpt_4_client, 'gpt-4.1-2025-04-14')
gpt_4_tl.revise_metadata_labels()

Generating labels for Fingerprint Technician - Fingerprint Capture Training...
Labels: 340, 329, 17, 261
Added labels for Fingerprint Technician - Fingerprint Capture Training
Generating labels for CCT - Privacy and Data Protection...
Labels: 265, 97, 340, 346, 277
Added labels for CCT - Privacy and Data Protection
Generating labels for Fire Extinguisher Safety...
Labels: 277, 117, 360, 7
Added labels for Fire Extinguisher Safety
Generating labels for Delta Incident Reporting training course video capture (NO AUDIO)...
Labels: 174, 360, 275, 277, 117
Added labels for Delta Incident Reporting training course video capture (NO AUDIO)
Generating labels for Corporate Safety - Incident Investigation...
Labels: 174, 277, 275, 360, 230
Added labels for Corporate Safety - Incident Investigation
Generating labels for Interview Skills For Hiring Managers...
Labels: 340, 9, 319, 181
Added labels for Interview Skills For Hiring Managers
Generating labels for Information Security Awareness - Standa

In [18]:
messages = [
    {"role": "system",
     "content": "Return exactly one JSON object with keys city and country"},
    {"role": "user", "content": "Tell me a European capital"}
]

resp = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=messages,
    logprobs=True,
    top_logprobs=2,
    temperature=0,
    response_format={"type": "json_object"},
)

tokens = resp.choices[0].logprobs.content        # list[ChatCompletionTokenLogprob]  :contentReference[oaicite:1]{index=1}
logprobs_formatted = logprobs_handler.format_logprobs(tokens)

confidence = logprobs_handler.process_logprobs(
    logprobs_formatted,
    # optional: nested_keys_dct=... if you want to aggregate sub‑fields
)

print(resp.choices[0].message.content)   # → {"city":"Paris","country":"France"}
print(confidence)                        # → {"city":0.97,"country":0.93}


{
  "city": "Berlin",
  "country": "Germany"
}
{'city': 0.7698, 'country': 1.0}


In [19]:
import math
overall_p = math.exp(sum(t.logprob for t in tokens))
# or average log‑prob for length‑independent metric:
avg_lp = sum(t.logprob for t in tokens) / len(tokens)


# USE THIS FOR LOGPROBS

In [7]:
"""
TextLabeler — Robust skill labeling tool for Delta Air Lines training content

A production-grade tool that:
- Automatically resets skill table indexes to 0-based integers for reliable ID lookup
- Maintains a human-readable "Skill" column alongside machine-friendly IDs
- Returns per-label confidence scores for each prediction
- Handles API retries and incremental saving automatically

Typical usage:
1. Prepare skill definitions CSV with a "Skill" column
2. Load input JSON containing training content metadata
3. Process entries to generate and save labeled results
"""

from __future__ import annotations
import json, time
import pandas as pd
from openai import OpenAI
from llm_confidence.logprobs_handler import LogprobsHandler


# ---------------------------------------------------------------------------- helpers
def _prepare_skill_df(df: pd.DataFrame) -> pd.DataFrame:
    """
    Prepare skill definition dataframe for consistent processing.
    
    Args:
        df: Raw skill definitions dataframe
        
    Returns:
        Processed dataframe with:
        - Cleaned column names (stripped whitespace)
        - Guaranteed "Skill" column (renames first column if needed)
        - Reset 0-based integer index (used as reference IDs)
        
    Note:
        The reset index becomes the canonical ID system used throughout labeling
    """
    df = df.copy()
    df.columns = df.columns.str.strip()
    if "Skill" not in df.columns:
        df.rename(columns={df.columns[0]: "Skill"}, inplace=True)
    return df.reset_index(drop=True)  # Ensure 0-based integer IDs


# ---------------------------------------------------------------------------- main class
class TextLabeler:
    """
    Automated skill labeling system for training content metadata.
    
    Attributes:
        client: OpenAI API client instance
        model: Name of LLM model to use (e.g., "gpt-4")
        max_retries: Maximum API call attempts before failing
        delay: Base delay between retry attempts (seconds)
        output_path: Path for incremental result saving
        data: Loaded content metadata to process
        skill_def: Processed skill definitions dataframe
        log_handler: Confidence score calculator
    """
    
    def __init__(
        self,
        data_path: str,
        skill_def: pd.DataFrame,
        *,
        client: OpenAI,
        model: str,
        max_retries: int = 3,
        delay: float = 1.0,
        output_path: str = "resources/results.json",
    ):
        """
        Initialize the text labeler with configuration and data sources.
        
        Args:
            data_path: Path to JSON file containing content metadata
            skill_def: Dataframe of skill definitions (must contain "Skill" column)
            client: Configured OpenAI client instance
            model: LLM model identifier to use
            max_retries: API call retry attempts (default: 3)
            delay: Base delay between retries in seconds (default: 1.0)
            output_path: Results output path (default: "resources/results.json")
        """
        self.client = client
        self.model = model
        self.max_retries = max_retries
        self.delay = delay
        self.output_path = output_path

        with open(data_path, "r") as f:
            self.data = json.load(f)

        self.skill_def = _prepare_skill_df(skill_def)
        self.log_handler = LogprobsHandler()

    # ---------------------------------------------------------------- prompt builders
    def _build_user_prompt_content(self, entry: dict) -> str:
        """
        Construct user prompt from content metadata entry.
        
        Args:
            entry: Dictionary containing content metadata with keys:
                   - title
                   - description 
                   - summary
                   
        Returns:
            Concatenated string of relevant content fields
        """
        return "\n".join(
            f"{k}: {entry[k]}" for k in ("title", "description", "summary")
        )

    def _build_system_prompt_content(self) -> str:
        """
        Construct the system prompt with skill labeling instructions.
        
        Returns:
            Complete system prompt containing:
            - Task description
            - Required response format
            - Complete skill list with IDs and names
            - Example response
        """
        label_list = "; ".join(
            f"(id: {idx}, label: {name})"
            for idx, name in zip(self.skill_def.index, self.skill_def["Skill"])
        )
        return (
            "You are an expert skill‑tagger for Delta Air Lines training videos.\n"
            "Select the 3‑5 best‑fitting labels from the list below and reply with "
            "ONE JSON object whose keys are label_1, label_2, … and whose values "
            "are the integer ids.  No commentary, no other keys.\n\n"
            f"AVAILABLE LABELS: {label_list}\n\n"
            'EXAMPLE: {"label_1": 4, "label_2": 17, "label_3": 9}'
        )

    # ---------------------------------------------------------------- single call
    def build_labels(self, entry: dict) -> tuple[list[str], dict[str, float]]:
        """
        Generate skill labels for a single content entry.
        
        Args:
            entry: Content metadata dictionary to label
            
        Returns:
            Tuple containing:
            - List of skill names (ordered by relevance)
            - Dictionary of confidence scores per label key
            
        Raises:
            OpenAI API exceptions after max retries exceeded
        """
        resp = self.client.chat.completions.create(
            model=self.model,
            messages=[
                {"role": "system", "content": self._build_system_prompt_content()},
                {"role": "user", "content": self._build_user_prompt_content(entry)},
            ],
            logprobs=True,
            top_logprobs=2,
            response_format={"type": "json_object"},
            temperature=0,
            stream=False,
        )

        reply_json = json.loads(resp.choices[0].message.content.strip())
        tokens = resp.choices[0].logprobs.content

        # Calculate confidence scores for each label
        logp_fmt = self.log_handler.format_logprobs(tokens)
        confidences = self.log_handler.process_logprobs(logp_fmt)  # {'label_1': p1, ...}

        # Convert numeric IDs to human-readable skill names
        label_ids = [reply_json[k] for k in sorted(reply_json)]  # label_1, label_2, ...
        skills = self.skill_def.loc[label_ids, "Skill"].tolist()

        return skills, confidences

    # ---------------------------------------------------------------- bulk driver
    def revise_metadata_labels(self) -> None:
        """
        Process all content entries to generate and save skill labels.
        
        Processes all entries in self.data['data'], adding:
        - "labels": List of skill names
        - "confidences": Per-label confidence scores
        
        Saves results incrementally to output_path.
        
        Note:
            Implements retry logic with exponential backoff
            Provides real-time console feedback
            Maintains results through interruptions
        """
        for entry in self.data["data"]:
            print(f"\n▶ Generating labels for: {entry['title']}")
            for attempt in range(self.max_retries):
                try:
                    labels, conf = self.build_labels(entry)
                except Exception as e:
                    print("Error:", e)
                    if attempt == self.max_retries - 1:
                        raise
                    time.sleep(self.delay * (attempt + 1))
                else:
                    break

            entry["labels"] = labels
            entry["confidences"] = conf

            # Incremental save for crash recovery
            with open(self.output_path, "w") as f:
                json.dump(self.data, f, indent=4)

            print("✓ Added labels:", labels)
            print("  confidences :", conf)

        print("\nAll entries processed successfully.")


# ---------------------------------------------------------------------------- usage example
if __name__ == "__main__":
    # Example initialization and execution
    skill_def = pd.read_csv("resources/skill_definitions.csv")  # Must contain "Skill" column
    client = OpenAI(api_key="sk-REDACTED")
    
    labeler = TextLabeler(
        data_path="resources/bedrock_demo.json",
        skill_def=skill_def,
        client=client,
        model="gpt-4o-mini",
    )
    
    labeler.revise_metadata_labels()


▶ Generating labels for: Fingerprint Technician - Fingerprint Capture Training
✓ Added labels: ['Airport Security', 'Aviation Safety', 'Compliance Training', 'Training And Development']
  confidences : {'label_1': 0.7914, 'label_2': 0.4579, 'label_3': 0.1365, 'label_4': 0.2716}

▶ Generating labels for: CCT - Privacy and Data Protection
✓ Added labels: ['Compliance Training', 'Risk Management', 'Training And Development', 'Understanding Customer Needs', 'Version Control']
  confidences : {'label_1': 0.802, 'label_2': 0.3266, 'label_3': 0.1062, 'label_4': 0.098, 'label_5': 0.0803}

▶ Generating labels for: Fire Extinguisher Safety
✓ Added labels: ['Emergency Procedures', 'Safety Training', 'Workplace Safety']
  confidences : {'label_1': 0.9218, 'label_2': 0.5571, 'label_3': 0.9881}

▶ Generating labels for: Delta Incident Reporting training course video capture (NO AUDIO)
✓ Added labels: ['Aviation Safety', 'Emergency Procedures', 'Safety Assurance', 'Risk Management', 'Training And De