## 0. Install Required Dependencies

**Run this cell first if you encounter import errors**

In [1]:
import sys
import subprocess

# Install required dependencies
dependencies = ['accelerate>=0.26.0', 'datasets']

for dep in dependencies:
    print(f"Installing {dep}...")
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", dep])

print("‚úÖ All dependencies installed! Please restart the kernel (Kernel > Restart Kernel) and then run all cells.")

Installing accelerate>=0.26.0...
Installing datasets...
‚úÖ All dependencies installed! Please restart the kernel (Kernel > Restart Kernel) and then run all cells.


# Fine-tune SentenceTransformer Models for ITSM Tickets
This notebook fine-tunes the **all-mpnet-base-v2** embedding model (and can be adapted for others) using contrastive learning with pseudo-labeled training data from your ITSM tickets.
## Approach
- **Positive pairs**: Tickets from the same category (assumed similar)
- **Negative pairs**: Tickets from different categories (assumed dissimilar)
- **Loss function**: Cosine Similarity Loss (contrastive learning)
- **Base model**: sentence-transformers/all-mpnet-base-v2 (768-dim embeddings)

## 1. Setup and Imports

In [2]:
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(os.getcwd()), '..'))

import json
import torch
from datetime import datetime
import logging
from datasets import DatasetDict  # <-- Added here

# Import sentence-transformers
from sentence_transformers import SentenceTransformer, InputExample, losses
from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator
from torch.utils.data import DataLoader

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

print("‚úÖ Imports successful")
print(f"PyTorch version: {torch.__version__}")
print(f"Device: {'CUDA' if torch.cuda.is_available() else 'CPU'}")

  from .autonotebook import tqdm as notebook_tqdm


‚úÖ Imports successful
PyTorch version: 2.9.1
Device: CPU


## 2. Configuration

In [3]:
# Training configuration
CONFIG = {
    'base_model': 'sentence-transformers/all-mpnet-base-v2',
    'source_data': '../data/servicenow_incidents_full.json',  # Source incidents
    'output_dir': 'models/all-mpnet-finetuned',
    'epochs': 12,  # Start lean to avoid overfitting; increase once eval is stable
    'batch_size': 32,
    'learning_rate': 2e-5,
    'warmup_steps': 100,
    'eval_split': 0.1  # 10% for evaluation
}

print("Configuration:")
for key, value in CONFIG.items():
    print(f"  {key}: {value}")

Configuration:
  base_model: sentence-transformers/all-mpnet-base-v2
  source_data: ../data/servicenow_incidents_full.json
  output_dir: models/all-mpnet-finetuned
  epochs: 12
  batch_size: 32
  learning_rate: 2e-05
  warmup_steps: 100
  eval_split: 0.1


## 3. Load Training Data

In [4]:
# Generate training pairs from ServiceNow incidents
import random
from collections import defaultdict

# Load ServiceNow incidents
incidents_file = "data/servicenow_incidents_full.json"
print(f"Loading incidents from: {incidents_file}")

with open(incidents_file, 'r') as f:
    incidents = json.load(f)

print(f"Loaded {len(incidents)} incidents")

# Group incidents by category
category_groups = defaultdict(list)
for incident in incidents:
    category = incident.get('category', 'Unknown')
    if category and category != '':
        # Create text representation combining short_description and description
        text = f"{incident.get('short_description', '')}. {incident.get('description', '')}"
        category_groups[category].append({
            'id': incident.get('incident_number', incident.get('sys_id', '')),
            'text': text.strip(),
            'category': category
        })

print(f"\nCategories found: {len(category_groups)}")
for cat, items in category_groups.items():
    print(f"  {cat}: {len(items)} incidents")

# Generate positive pairs (same category)
positive_pairs = []
for category, items in category_groups.items():
    if len(items) >= 2:
        # Create pairs within the same category
        for i in range(len(items)):
            for j in range(i + 1, min(i + 6, len(items))):  # Limit pairs per incident
                positive_pairs.append({
                    'ticket1_id': items[i]['id'],
                    'ticket2_id': items[j]['id'],
                    'text1': items[i]['text'],
                    'text2': items[j]['text'],
                    'category1': category,
                    'category2': category
                })

# Generate negative pairs (different categories)
negative_pairs = []
categories = list(category_groups.keys())
for i in range(len(categories)):
    for j in range(i + 1, len(categories)):
        cat1_items = category_groups[categories[i]]
        cat2_items = category_groups[categories[j]]

        # Sample random pairs between different categories
        num_pairs = min(len(cat1_items) * 2, len(cat2_items) * 2, 50)
        for _ in range(num_pairs):
            item1 = random.choice(cat1_items)
            item2 = random.choice(cat2_items)
            negative_pairs.append({
                'ticket1_id': item1['id'],
                'ticket2_id': item2['id'],
                'text1': item1['text'],
                'text2': item2['text'],
                'category1': item1['category'],
                'category2': item2['category']
            })

print(f"\nüìä Generated Training Pairs:")
print(f"  Positive pairs: {len(positive_pairs)}")
print(f"  Negative pairs: {len(negative_pairs)}")
print(f"  Total pairs: {len(positive_pairs) + len(negative_pairs)}")

# Save to training_pairs.json for future use
training_data = {
    'positive_pairs': positive_pairs,
    'negative_pairs': negative_pairs,
    'metadata': {
        'num_incidents': len(incidents),
        'num_categories': len(category_groups),
        'generated_on': datetime.now().isoformat()
    }
}

training_pairs_path = os.path.join(os.getcwd(), 'data', 'training_pairs.json')
os.makedirs(os.path.dirname(training_pairs_path), exist_ok=True)
with open(training_pairs_path, 'w') as f:
    json.dump(training_data, f, indent=2)

print(f"\n‚úÖ Training pairs saved to: {training_pairs_path}")

Loading incidents from: data/servicenow_incidents_full.json
Loaded 76 incidents

Categories found: 5
  Inquiry / Help: 41 incidents
  Network: 6 incidents
  Hardware: 10 incidents
  Software: 13 incidents
  Database: 2 incidents

üìä Generated Training Pairs:
  Positive pairs: 291
  Negative pairs: 118
  Total pairs: 409

‚úÖ Training pairs saved to: /Users/don/Documents/University/Current Classes/Capstone/let me try again/data/training_pairs.json


# 3.1 Load & Clean Relationship Pairs‚Äù

In [5]:
# The training pairs are already loaded from the previous cell
# Just display a summary
print(f"\nüìä Training Data Summary:")
print(f"  Positive pairs: {len(positive_pairs)}")
print(f"  Negative pairs: {len(negative_pairs)}")
print(f"  Total pairs: {len(positive_pairs) + len(negative_pairs)}")

# Show example pairs
if positive_pairs:
    print(f"\nüìù Example Positive Pair (same category):")
    example = positive_pairs[0]
    print(f"  Category: {example['category1']}")
    print(f"  Ticket 1 ({example['ticket1_id']}): {example['text1'][:100]}...")
    print(f"  Ticket 2 ({example['ticket2_id']}): {example['text2'][:100]}...")

if negative_pairs:
    print(f"\nüìù Example Negative Pair (different categories):")
    example = negative_pairs[0]
    print(f"  Category 1: {example['category1']}")
    print(f"  Category 2: {example['category2']}")
    print(f"  Ticket 1 ({example['ticket1_id']}): {example['text1'][:100]}...")
    print(f"  Ticket 2 ({example['ticket2_id']}): {example['text2'][:100]}...")


üìä Training Data Summary:
  Positive pairs: 291
  Negative pairs: 118
  Total pairs: 409

üìù Example Positive Pair (same category):
  Category: Inquiry / Help
  Ticket 1 (INC0010054): Equipment selection not saved for new location. During the onboarding process for an additional loca...
  Ticket 2 (INC0010053): Merchant unable to submit e-signed agreement. A Sales Agent's access to configure service fees for a...

üìù Example Negative Pair (different categories):
  Category 1: Inquiry / Help
  Category 2: Network
  Ticket 1 (INC0000011): Need new Blackberry set up. I'm replacing my old phone with a Blackberry and require assistance to g...
  Ticket 2 (INC0000049): Network storage unavailable. Receiving error message with "network path not found."...


# 3.2 Build Pairwise Features & Split Data

In [6]:
import os
import json
import numpy as np
import pandas as pd
import time
import re # Import regex module
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import openai
from openai import OpenAI
from dotenv import load_dotenv

# Guard against missing `output_path` (cells may be executed out of order)
output_path = globals().get('output_path', None)
if output_path is None:
    default_dir = 'models/all-mpnet-finetuned'
    if 'CONFIG' in globals() and isinstance(CONFIG, dict):
        output_path = os.path.join(os.getcwd(), CONFIG.get('output_dir', default_dir))
    else:
        output_path = os.path.join(os.getcwd(), default_dir)
    print(f"Warning: `output_path` was not defined. Using fallback: {output_path}")

# ----------------------------
# Paths
# ----------------------------

RAW_JSON_PATH = "/Users/don/Documents/University/Current Classes/Capstone/let me try again/data/servicenow_incidents_full.json"
REL_OUT_CSV   = "data/relationship_pairs.csv"
REL_OUT_JSON  = "data/relationship_pairs.json"

MAX_TICKETS = 400       # cap to reduce cost ‚Äî adjust as needed
TOP_K_NEIGHBORS = 5     # candidate neighbors per ticket
SLEEP_BETWEEN_CALLS = 0.4

LLM_MODEL_NAME = "gpt-4o-mini"  # or whichever model you use

RELATION_PROMPT = """
You are an expert in IT Service Management (ITSM) and incident management.
Your task is to analyze two incident tickets and determine the relationship between them.
Based on the short descriptions and descriptions of the two tickets, classify their relationship into one of the following categories:

- **duplicate**: Ticket B is a duplicate of Ticket A. They describe the exact same underlying issue, and one ticket could be closed in favor of the other.
- **related**: Ticket A and Ticket B describe different but highly relevant issues. They might be part of the same larger problem, affect the same system, or require similar solutions, but neither is a direct duplicate of the other.
- **causal**: Ticket B is a direct consequence or cause of Ticket A. For example, Ticket A was created because of an event described in Ticket B, or vice-versa. There is a clear cause-and-effect link.
- **none**: There is no significant relationship between Ticket A and Ticket B based on the provided information.

If you classify a relationship as 'causal', you must also indicate the 'direction' of the causality:
- **A_causes_B**: Ticket A caused Ticket B.
- **B_causes_A**: Ticket B caused Ticket A.
- **mutually_causal**: A and B are mutually causative or part of a feedback loop.

Provide your output as a JSON object with the following keys:
- `label`: (string) One of "duplicate", "related", "causal", or "none".
- `explanation`: (string) A brief, clear explanation for your classification.
- `direction`: (string, required only if label is "causal") One of "A_causes_B", "B_causes_A", or "mutually_causal". If the label is not "causal", set this to "none".

Here are the two incident tickets:

---
**Ticket A (ID: {ticket_a_id})**
Created On: {ticket_a_created}
Affected Application: {ticket_a_app}
Short Description: {ticket_a_short}
Description: {ticket_a_desc}

---
**Ticket B (ID: {ticket_b_id})**
Created On: {ticket_b_created}
Affected Application: {ticket_b_app}
Short Description: {ticket_b_short}
Description: {ticket_b_desc}

---
Example Output for 'duplicate':
```json
{{
  "label": "duplicate",
  "explanation": "Both tickets describe the same login issue for the same application on the same day."
}}
```

Example Output for 'related':
```json
{{
  "label": "related",
  "explanation": "Ticket A reports a database connection error, and Ticket B reports an application outage. The application likely uses the database, suggesting they are related systems."
}}
```

Example Output for 'causal' (A causes B):
```json
{{
  "label": "causal",
  "explanation": "The network outage reported in Ticket A directly led to users being unable to access the application, as reported in Ticket B.",
  "direction": "A_causes_B"
}}
```

Example Output for 'none':
```json
{{
  "label": "none",
  "explanation": "The incidents describe unrelated issues affecting different systems and users."
}}
"""

# Initialize OpenAI client
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    raise RuntimeError("OPENAI_API_KEY is not set. Add it to your environment or .env before labeling.")
print(f"OPENAI_API_KEY detected: {'set' if api_key else 'missing'}")
client = OpenAI(api_key=api_key)

# ----------------------------
# 1. Load raw incidents JSON
# ----------------------------

print("Loading:", RAW_JSON_PATH)
with open(RAW_JSON_PATH, "r") as f:
    raw_data = json.load(f)

df = pd.DataFrame(raw_data)
print("Loaded", len(df), "incidents")

# Trim
if len(df) > MAX_TICKETS:
    df = df.iloc[:MAX_TICKETS].copy()
    print(f"Trimmed to first {MAX_TICKETS} incidents.")

# Build unified text field
df["text"] = (
    df["short_description"].fillna("") + "\n\n" +
    df["description"].fillna("")
)

print(df[["incident_number", "short_description"]].head())


# ----------------------------
# 2. Encode ticket embeddings
# ----------------------------

print("\nLoading embedding model from:", output_path)
try:
    embedder = SentenceTransformer(output_path)
except FileNotFoundError:
    print(f"Fine-tuned model not found at {output_path}. Loading base model: {CONFIG['base_model']}")
    embedder = SentenceTransformer(CONFIG['base_model'])

texts = df["text"].astype(str).tolist()
print("Encoding all incidents...")
emb = embedder.encode(
    texts, batch_size=32,
    convert_to_numpy=True,
    normalize_embeddings=True,
    show_progress_bar=True
)

# ----------------------------
# 3. Build similarity matrix
# ----------------------------

print("\nComputing similarities...")
sim = cosine_similarity(emb)

candidate_pairs = []
N = len(df)

for i in range(N):
    sims = sim[i].copy()
    sims[i] = -1.0

    top_idx = np.argsort(sims)[-TOP_K_NEIGHBORS:]
    for j in top_idx:
        if j <= i:
            continue
        candidate_pairs.append((i, j, float(sims[j])))

print("Candidate pairs:", len(candidate_pairs))


# ----------------------------
# 4. Helper: build prompt
# ----------------------------

def build_prompt(a, b):
    return RELATION_PROMPT.format(
        ticket_a_id      = a.get("incident_number", ""),
        ticket_a_created = a.get("sys_created_on", ""),
        ticket_a_app      = a.get("cmdb_ci", ""),
        ticket_a_short   = a.get("short_description", ""),
        ticket_a_desc    = a.get("description", ""),

        ticket_b_id       = b.get("incident_number", ""),
        ticket_b_created = b.get("sys_created_on", ""),
        ticket_b_app     = b.get("cmdb_ci", ""),
        ticket_b_short   = b.get("short_description", ""),
        ticket_b_desc    = b.get("description", "")
    )


# ----------------------------
# 5. LLM call helper
# ----------------------------

def call_llm(prompt):
    try:
        r = client.chat.completions.create(
            model=LLM_MODEL_NAME,
            messages=[{"role": "user", "content": prompt}],
            temperature=0.0,
        )
        raw_text = r.choices[0].message.content.strip()

        # Attempt to extract JSON from potentially markdown-formatted response
        match = re.search(r"```json\s*(\{.*\})\s*```", raw_text, re.DOTALL)
        if match:
            json_text = match.group(1)
        else:
            json_text = raw_text # Assume it's direct JSON if no markdown block

        return json.loads(json_text)
    except json.JSONDecodeError as e:
        # Log the raw response if JSON parsing fails for debugging
        print(f"JSONDecodeError: {e}. Raw LLM response: {raw_text}")
        return {"label": "none", "explanation": f"JSON parsing failed: {e}. Raw response: {raw_text[:200]}...", "direction": "none"}
    except Exception as e:
        return {"label": "none", "explanation": str(e), "direction": "none"}


# ----------------------------
# 6. Label all pairs with LLM
# ----------------------------

labeled = []

print("\nLabeling", len(candidate_pairs), "pairs using LLM‚Ä¶")
for idx, (i, j, sim_val) in enumerate(candidate_pairs, start=1):
    a = df.iloc[i].to_dict()
    b = df.iloc[j].to_dict()

    prompt = build_prompt(a, b)
    result = call_llm(prompt)

    labeled.append({
        "ticket_a_number": a["incident_number"],
        "ticket_b_number": b["incident_number"],
        "text_a": a["text"],
        "text_b": b["text"],
        "similarity": sim_val,
        "label": result.get("label", "none"),
        "direction": result.get("direction", "none"),
        "explanation": result.get("explanation", "")
    })

    if idx % 5 == 0:
        print(f"  ‚Üí {idx}/{len(candidate_pairs)} pairs labeled")

    time.sleep(SLEEP_BETWEEN_CALLS)


# ----------------------------
# 7. Save results
# ----------------------------

df_rel = pd.DataFrame(labeled)

os.makedirs(os.path.dirname(REL_OUT_CSV), exist_ok=True)

df_rel.to_csv(REL_OUT_CSV, index=False)
with open(REL_OUT_JSON, "w") as f:
    json.dump(labeled, f, indent=2)

print("\nSaved relationship pairs:")
print("CSV :", REL_OUT_CSV)
print("JSON:", REL_OUT_JSON)

print("\nSample:")
display(df_rel.head())

2025-11-24 13:51:29,185 - INFO - Use pytorch device_name: mps
2025-11-24 13:51:29,185 - INFO - Load pretrained SentenceTransformer: /Users/don/Documents/University/Current Classes/Capstone/let me try again/models/all-mpnet-finetuned


OPENAI_API_KEY detected: set
Loading: /Users/don/Documents/University/Current Classes/Capstone/let me try again/data/servicenow_incidents_full.json
Loaded 76 incidents
  incident_number                                  short_description
0      INC0010054     Equipment selection not saved for new location
1      INC0010053       Merchant unable to submit e-signed agreement
2      INC0010052   Equipment Configuration Freeze on Legacy Browser
3      INC0010051                   Error in Equipment Configuration
4      INC0010050  Touchscreen malfunction on Merchant's device f...

Loading embedding model from: /Users/don/Documents/University/Current Classes/Capstone/let me try again/models/all-mpnet-finetuned
Encoding all incidents...


Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3/3 [00:00<00:00,  5.12it/s]
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)



Computing similarities...
Candidate pairs: 191

Labeling 191 pairs using LLM‚Ä¶


2025-11-24 13:51:32,088 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:51:34,678 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:51:37,522 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:51:40,961 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:51:43,656 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 5/191 pairs labeled


2025-11-24 13:51:45,608 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:51:47,952 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:51:51,201 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:51:54,461 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:51:56,886 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 10/191 pairs labeled


2025-11-24 13:51:59,185 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:52:01,870 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:52:03,837 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:52:06,302 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:52:08,747 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 15/191 pairs labeled


2025-11-24 13:52:10,836 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:52:13,491 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:52:15,768 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:52:17,943 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:52:20,081 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 20/191 pairs labeled


2025-11-24 13:52:22,100 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:52:23,991 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:52:26,851 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:52:28,985 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:52:30,960 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 25/191 pairs labeled


2025-11-24 13:52:33,131 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:52:35,130 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:52:37,639 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:52:40,212 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:52:42,068 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 30/191 pairs labeled


2025-11-24 13:52:44,150 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:52:45,801 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:52:47,988 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:52:50,015 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:52:52,491 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 35/191 pairs labeled


2025-11-24 13:52:54,395 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:52:56,378 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:52:58,255 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:53:00,434 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:53:03,049 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 40/191 pairs labeled


2025-11-24 13:53:05,333 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:53:07,761 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:53:09,889 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:53:12,064 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:53:20,664 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 45/191 pairs labeled


2025-11-24 13:53:22,265 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:53:24,668 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:53:26,710 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:53:29,053 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:53:31,552 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 50/191 pairs labeled


2025-11-24 13:53:33,585 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:53:36,618 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:53:39,481 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:53:41,386 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:53:43,810 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 55/191 pairs labeled


2025-11-24 13:53:46,307 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:53:48,826 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:53:50,865 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:53:56,036 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:53:57,810 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 60/191 pairs labeled


2025-11-24 13:54:01,046 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:54:03,431 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:54:05,797 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:54:07,446 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:54:09,522 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 65/191 pairs labeled


2025-11-24 13:54:12,084 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:54:14,616 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:54:16,790 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:54:18,841 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:54:21,323 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 70/191 pairs labeled


2025-11-24 13:54:24,116 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:54:29,615 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:54:31,531 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:54:33,592 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:54:35,547 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 75/191 pairs labeled


2025-11-24 13:54:37,708 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:54:39,554 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:54:41,951 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:54:43,801 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:54:45,777 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 80/191 pairs labeled


2025-11-24 13:54:48,783 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:54:51,226 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:54:53,095 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:54:54,862 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:54:57,830 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 85/191 pairs labeled


2025-11-24 13:54:59,652 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:55:01,868 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:55:05,071 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:55:07,206 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:55:09,215 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 90/191 pairs labeled


2025-11-24 13:55:11,831 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:55:15,303 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:55:17,705 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:55:19,843 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:55:22,336 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 95/191 pairs labeled


2025-11-24 13:55:24,374 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:55:26,582 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:55:28,375 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:55:30,196 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:55:32,378 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 100/191 pairs labeled


2025-11-24 13:55:34,663 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:55:37,063 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:55:39,077 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:55:54,995 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:55:57,595 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 105/191 pairs labeled


2025-11-24 13:55:59,878 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:56:02,311 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:56:04,437 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:56:06,937 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:56:09,166 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 110/191 pairs labeled


2025-11-24 13:56:11,323 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:56:13,339 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:56:15,436 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:56:19,579 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:56:21,628 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 115/191 pairs labeled


2025-11-24 13:56:23,682 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:56:28,942 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:56:31,154 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:56:34,771 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:56:36,679 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 120/191 pairs labeled


2025-11-24 13:56:40,451 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:56:42,354 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:56:44,724 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:56:46,966 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:56:48,783 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 125/191 pairs labeled


2025-11-24 13:56:51,038 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:56:53,478 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:56:55,520 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:56:57,742 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:57:00,155 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 130/191 pairs labeled


2025-11-24 13:57:02,166 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:57:04,142 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:57:07,204 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:57:09,293 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:57:12,464 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 135/191 pairs labeled


2025-11-24 13:57:15,037 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:57:17,051 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:57:19,085 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:57:21,164 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:57:23,399 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 140/191 pairs labeled


2025-11-24 13:57:26,780 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:57:29,293 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:57:31,622 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:57:33,578 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:57:35,433 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 145/191 pairs labeled


2025-11-24 13:57:37,792 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:57:39,921 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:57:42,297 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:57:44,240 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:57:46,150 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 150/191 pairs labeled


2025-11-24 13:57:48,574 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:57:50,569 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:57:52,738 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:57:55,003 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:57:57,169 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 155/191 pairs labeled


2025-11-24 13:57:59,321 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:58:01,785 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:58:03,941 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:58:06,093 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:58:08,646 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 160/191 pairs labeled


2025-11-24 13:58:11,304 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:58:13,164 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:58:15,394 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:58:17,999 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:58:20,206 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 165/191 pairs labeled


2025-11-24 13:58:22,347 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:58:24,530 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:58:26,740 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:58:29,155 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:58:31,347 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 170/191 pairs labeled


2025-11-24 13:58:32,859 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:58:34,504 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:58:36,849 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:58:39,104 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:58:41,294 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 175/191 pairs labeled


2025-11-24 13:58:43,546 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:58:45,883 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:58:47,574 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:58:49,424 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:58:51,460 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 180/191 pairs labeled


2025-11-24 13:58:53,166 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:58:55,256 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:58:58,458 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:59:00,271 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:59:02,157 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 185/191 pairs labeled


2025-11-24 13:59:04,764 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:59:06,514 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:59:08,581 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:59:11,023 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-11-24 13:59:12,917 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  ‚Üí 190/191 pairs labeled


2025-11-24 13:59:15,211 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"



Saved relationship pairs:
CSV : data/relationship_pairs.csv
JSON: data/relationship_pairs.json

Sample:


Unnamed: 0,ticket_a_number,ticket_b_number,text_a,text_b,similarity,label,direction,explanation
0,INC0010054,INC0010050,Equipment selection not saved for new location...,Touchscreen malfunction on Merchant's device f...,0.92466,related,none,Ticket A describes an issue with equipment sel...
1,INC0010054,INC0010048,Equipment selection not saved for new location...,Access Rights Restriction\n\nMerchant reported...,0.926441,related,none,Ticket A describes an issue with equipment sel...
2,INC0010054,INC0010046,Equipment selection not saved for new location...,Access Rights Restriction\n\nMerchant reported...,0.926441,related,none,Ticket A describes an issue with equipment sel...
3,INC0010054,INC0010051,Equipment selection not saved for new location...,Error in Equipment Configuration\n\nSales Agen...,0.941556,related,none,Both tickets involve issues encountered by a S...
4,INC0010054,INC0010053,Equipment selection not saved for new location...,Merchant unable to submit e-signed agreement\n...,0.951481,related,none,Ticket A describes an issue with equipment sel...


# 3.3 Train & Evaluate Relationship Classifier

In [3]:
# ============================================
# Relationship Classification from relationship_pair.json
# ============================================

import os
import json
import numpy as np
import pandas as pd

from sentence_transformers import SentenceTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
import joblib

# -------------------------------
# 1. Load fine-tuned embedding model
# -------------------------------

# Guard: ensure `output_path` is defined (not all notebook runs execute previous cells)
output_path = globals().get('output_path', None)
if output_path is None:
    # Try to build from CONFIG if available, otherwise fall back to known default
    default_dir = 'models/all-mpnet-finetuned'
    if 'CONFIG' in globals() and isinstance(CONFIG, dict):
        output_path = os.path.join(os.getcwd(), CONFIG.get('output_dir', default_dir))
    else:
        output_path = os.path.join(os.getcwd(), default_dir)
    print(f"Warning: `output_path` was not defined. Using fallback: {output_path}")
else:
    print(f"Loading fine-tuned SentenceTransformer model from: {output_path}")

try:
    relationship_embedder = SentenceTransformer(output_path)
except FileNotFoundError:
    print(f"Fine-tuned model not found at {output_path}. Loading base model: {CONFIG['base_model']}")
    relationship_embedder = SentenceTransformer(CONFIG['base_model'])

# -------------------------------
# 2. Load relationship_pair.json
# -------------------------------

json_path = "data/relationship_pairs.json"   # TODO: update path if needed

print("Loading relationship pairs from:", json_path)

with open(json_path, "r", encoding="utf-8") as f:
    data = json.load(f)

# Expected JSON structure:
# [
#   {
#       "text_a": "...",
#       "text_b": "...",
#       "label": "duplicate" | "related" | "causal" | "none"
#   },
#   ...
# ]

df_pairs = pd.DataFrame(data)

print("Loaded relationship dataset:")
display(df_pairs.head())

# -------------------------------
# 3. Clean labels
# -------------------------------

valid_labels = ["duplicate", "related", "causal", "none"]
df_pairs = df_pairs[df_pairs["label"].isin(valid_labels)].reset_index(drop=True)

texts_a = df_pairs["text_a"].astype(str).tolist()
texts_b = df_pairs["text_b"].astype(str).tolist()
y_labels = df_pairs["label"].tolist()

print(f"Valid dataset size: {len(df_pairs)}")

# -------------------------------
# 4. Encode ticket texts using SentenceTransformer
# -------------------------------

print("Encoding text_a...")
emb_a = relationship_embedder.encode(
    texts_a,
    batch_size=32,
    convert_to_numpy=True,
    normalize_embeddings=True,
    show_progress_bar=True
)

print("Encoding text_b...")
emb_b = relationship_embedder.encode(
    texts_b,
    batch_size=32,
    convert_to_numpy=True,
    normalize_embeddings=True,
    show_progress_bar=True
)

# -------------------------------
# 5. Build pairwise features
# -------------------------------

def build_pair_features(emb_a, emb_b):
    diff = np.abs(emb_a - emb_b)
    prod = emb_a * emb_b
    return np.hstack([emb_a, emb_b, diff, prod])

X = build_pair_features(emb_a, emb_b)

label2id = {lbl: i for i, lbl in enumerate(valid_labels)}
id2label = {i: lbl for lbl, i in label2id.items()}

y = np.array([label2id[lbl] for lbl in y_labels])

print("Feature matrix:", X.shape)

# -------------------------------
# 6. Train/validation split
# -------------------------------

X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("Training samples:", len(y_train))
print("Validation samples:", len(y_val))

# -------------------------------
# 7. Train classifier (Logistic Regression)
# -------------------------------

clf = LogisticRegression(
    max_iter=200,
    multi_class="multinomial",
    solver="lbfgs",
    n_jobs=-1,
    class_weight="balanced"
)

print("Training classifier...")
clf.fit(X_train, y_train)

# -------------------------------
# 8. Evaluate
# -------------------------------

y_pred = clf.predict(X_val)

labels_order = list(label2id.values())
label_names_ordered = [id2label[i] for i in labels_order]

print("\n=== Relationship Classifier Report ===")
print(classification_report(
    y_val,
    y_pred,
    labels=labels_order,
    target_names=label_names_ordered,
    zero_division=0
))

print("\nConfusion Matrix:")
print(confusion_matrix(y_val, y_pred, labels=labels_order))


# -------------------------------
# 9. Save model + label mapping
# -------------------------------

relationship_model_dir = os.path.join(output_path, "relationship_classifier")
os.makedirs(relationship_model_dir, exist_ok=True)

clf_path = os.path.join(relationship_model_dir, "relationship_classifier.joblib")
label_path = os.path.join(relationship_model_dir, "label_mapping.json")

joblib.dump(clf, clf_path)

with open(label_path, "w") as f:
    json.dump({"label2id": label2id, "id2label": id2label}, f, indent=4)

print("Saved classifier to:", clf_path)
print("Saved label mapping to:", label_path)

# -------------------------------
# 10. Inference helper
# -------------------------------

def predict_relationship(text_a, text_b):
    """
    Predict relationship between two ticket texts.
    Returns (label, probability_dict)
    """
    embA = relationship_embedder.encode(
        [text_a],
        convert_to_numpy=True,
        normalize_embeddings=True,
        show_progress_bar=False
    )
    embB = relationship_embedder.encode(
        [text_b],
        convert_to_numpy=True,
        normalize_embeddings=True,
        show_progress_bar=False
    )

    feats = build_pair_features(embA, embB)
    probs = clf.predict_proba(feats)[0]
    pred_id = int(np.argmax(probs))
    pred_label = id2label[pred_id]

    return pred_label, {id2label[i]: float(p) for i, p in enumerate(probs)}

# -------------------------------
# 11. Quick test
# -------------------------------

example_a = "Unable to log in after SAP server restart."
example_b = "SAP authentication error following system reboot."

pred, proba = predict_relationship(example_a, example_b)

print("\nExample Prediction:")
print("Prediction:", pred)
print("Probabilities:", proba)

Loading fine-tuned SentenceTransformer model from: /Users/don/Documents/University/Current Classes/Capstone/let me try again/models/all-mpnet-finetuned
Loading relationship pairs from: data/relationship_pairs.json
Loaded relationship dataset:


Unnamed: 0,ticket_a_number,ticket_b_number,text_a,text_b,similarity,label,direction,explanation
0,INC0010054,INC0010050,Equipment selection not saved for new location...,Touchscreen malfunction on Merchant's device f...,0.92466,related,none,Ticket A describes an issue with equipment sel...
1,INC0010054,INC0010048,Equipment selection not saved for new location...,Access Rights Restriction\n\nMerchant reported...,0.926441,related,none,Ticket A describes an issue with equipment sel...
2,INC0010054,INC0010046,Equipment selection not saved for new location...,Access Rights Restriction\n\nMerchant reported...,0.926441,related,none,Ticket A describes an issue with equipment sel...
3,INC0010054,INC0010051,Equipment selection not saved for new location...,Error in Equipment Configuration\n\nSales Agen...,0.941556,related,none,Both tickets involve issues encountered by a S...
4,INC0010054,INC0010053,Equipment selection not saved for new location...,Merchant unable to submit e-signed agreement\n...,0.951481,related,none,Ticket A describes an issue with equipment sel...


Valid dataset size: 191
Encoding text_a...


Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 6/6 [00:00<00:00,  7.74it/s]


Encoding text_b...


Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 6/6 [00:00<00:00,  9.47it/s]

Feature matrix: (191, 3072)
Training samples: 152
Validation samples: 39
Training classifier...



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- A


=== Relationship Classifier Report ===
              precision    recall  f1-score   support

   duplicate       0.00      0.00      0.00         0
     related       0.25      0.38      0.30         8
      causal       0.00      0.00      0.00         0
        none       0.81      0.68      0.74        31

   micro avg       0.62      0.62      0.62        39
   macro avg       0.26      0.26      0.26        39
weighted avg       0.69      0.62      0.65        39


Confusion Matrix:
[[ 0  0  0  0]
 [ 0  3  0  5]
 [ 0  0  0  0]
 [ 1  9  0 21]]
Saved classifier to: /Users/don/Documents/University/Current Classes/Capstone/let me try again/models/all-mpnet-finetuned/relationship_classifier/relationship_classifier.joblib
Saved label mapping to: /Users/don/Documents/University/Current Classes/Capstone/let me try again/models/all-mpnet-finetuned/relationship_classifier/label_mapping.json

Example Prediction:
Prediction: causal
Probabilities: {'duplicate': 0.050332360284642226, 'related'

## 4. Create Training Examples

In [9]:

import os
import json
from sentence_transformers import InputExample
# Ensure training pairs are available (load from disk if cell 8 was skipped)
if "positive_pairs" not in globals() or "negative_pairs" not in globals():
    pairs_path = os.path.join(os.getcwd(), "data", "training_pairs.json")
    if os.path.exists(pairs_path):
        with open(pairs_path, "r") as f:
            loaded = json.load(f)
        positive_pairs = loaded.get("positive_pairs", [])
        negative_pairs = loaded.get("negative_pairs", [])
        print(f"Loaded pairs from {pairs_path}: {len(positive_pairs)} positive, {len(negative_pairs)} negative")
    else:
        raise RuntimeError("Training pairs not found. Run the pair-generation cell first or place training_pairs.json under data/.")

# Convert to InputExample objects
train_examples = []

from sentence_transformers import InputExample

# Add positive pairs (label=1.0 for similar)
for pair in positive_pairs:
    train_examples.append(InputExample(
        texts=[pair['text1'], pair['text2']],
        label=1.0
    ))

# Add negative pairs (label=0.0 for dissimilar)
for pair in negative_pairs:
    train_examples.append(InputExample(
        texts=[pair['text1'], pair['text2']],
        label=0.0
    ))


print(f"Created {len(train_examples)} training examples")

import os
if "train_examples" not in globals():
    raise RuntimeError("Run the 'Create Training Examples' cell first so train_examples is defined.")


CONFIG = {"eval_split": 0.2}

# Split into train/eval
import random
random.shuffle(train_examples)
split_idx = int(len(train_examples) * (1 - CONFIG['eval_split']))
eval_examples = train_examples[split_idx:]
train_examples = train_examples[:split_idx]

print(f"üìä Data Split:")
print(f"  Training: {len(train_examples)} examples")
print(f"  Evaluation: {len(eval_examples)} examples")
if len(eval_examples) < 25:
    print("‚ö†Ô∏è  Eval set is small; metrics may be noisy. Add more labeled incidents or increase eval_split.")


Created 409 training examples
üìä Data Split:
  Training: 327 examples
  Evaluation: 82 examples


## 5. Load Base Model

In [10]:
# Ensure CONFIG has base_model defined
if "CONFIG" not in globals():
    CONFIG = {}
if "base_model" not in CONFIG:
    CONFIG["base_model"] = "sentence-transformers/all-mpnet-base-v2"
    print("Warning: CONFIG base_model missing; defaulting to sentence-transformers/all-mpnet-base-v2")


# Ensure SentenceTransformer is available in this cell's globals (avoid NameError if import cell wasn't run)
if 'SentenceTransformer' not in globals():
    from sentence_transformers import SentenceTransformer

print(f"Loading base model: {CONFIG['base_model']}")
print("This may take a minute...\n")

model = SentenceTransformer(CONFIG['base_model'])

print("‚úÖ Model loaded successfully")
print(f"\nModel details:")
print(f"  Max sequence length: {model.max_seq_length}")
print(f"  Embedding dimension: {model.get_sentence_embedding_dimension()}")

Loading base model: sentence-transformers/all-mpnet-base-v2
This may take a minute...

‚úÖ Model loaded successfully

Model details:
  Max sequence length: 384
  Embedding dimension: 768


## 6. Setup Training Components

In [12]:
from torch.utils.data import DataLoader


# Create DataLoader
train_dataloader = DataLoader(
    train_examples,
    shuffle=True,
    batch_size=CONFIG['batch_size']
)

# Define loss function (Cosine Similarity Loss for contrastive learning)
train_loss = losses.CosineSimilarityLoss(model)

# Create evaluator
eval_sentences1 = [ex.texts[0] for ex in eval_examples]
eval_sentences2 = [ex.texts[1] for ex in eval_examples]
eval_scores = [ex.label for ex in eval_examples]

evaluator = EmbeddingSimilarityEvaluator(
    eval_sentences1,
    eval_sentences2,
    eval_scores,
    name='itsm-eval'
)

# Output directory
output_path = os.path.join(os.getcwd(), CONFIG['output_dir'])
os.makedirs(output_path, exist_ok=True)

print("‚úÖ Training components ready")
print(f"\nTotal training batches: {len(train_dataloader)}")
print(f"Evaluation samples: {len(eval_examples)}")
print(f"Output path: {output_path}")

KeyError: 'batch_size'

## 7. Train the Model

‚ö†Ô∏è **Note**: Training on CPU will take 5-15 minutes per epoch. GPU is recommended for faster training.

In [None]:
import os
os.environ["WANDB_DISABLED"] = "true"
print("Wandb integration disabled.")

In [None]:
print("üöÄ Starting training...")
print("=" * 60)
print(f"Epochs: {CONFIG['epochs']}")
print(f"Batch size: {CONFIG['batch_size']}")
print(f"Learning rate: {CONFIG['learning_rate']}")
print(f"Device: {'CUDA' if torch.cuda.is_available() else 'CPU'}")
print("=" * 60)
print()

# Train
model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    epochs=CONFIG['epochs'],
    evaluator=evaluator,
    evaluation_steps=len(train_dataloader) // 2,  # Evaluate twice per epoch
    warmup_steps=CONFIG['warmup_steps'],
    output_path=output_path,
    optimizer_params={'lr': CONFIG['learning_rate']},
    save_best_model=True,
    show_progress_bar=True
)

print("\n" + "=" * 60)
print("‚úÖ Training complete!")
print("=" * 60)

## 8. Save Training Metadata

In [None]:
# Save metadata
metadata = {
    "base_model": CONFIG['base_model'],
    "training_date": datetime.now().isoformat(),
    "epochs": CONFIG['epochs'],
    "batch_size": CONFIG['batch_size'],
    "learning_rate": CONFIG['learning_rate'],
    "num_train_examples": len(train_examples),
    "num_eval_examples": len(eval_examples),
    "num_positive_pairs": len(positive_pairs),
    "num_negative_pairs": len(negative_pairs),
    "device": "cuda" if torch.cuda.is_available() else "cpu"
}

metadata_path = os.path.join(output_path, 'training_metadata.json')
with open(metadata_path, 'w') as f:
    json.dump(metadata, f, indent=2)

print(f"üíæ Model saved to: {output_path}")
print(f"üìù Metadata saved to: {metadata_path}")

In [None]:
# ============================================
# 8. Relationship Classification (Duplicate / Related / Causal / None)
# ============================================

import os
import numpy as np
import pandas as pd

from sentence_transformers import SentenceTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
import joblib

# --------------------------------------------
# 8.1 Load fine-tuned embedding model
# --------------------------------------------

# If not already loaded earlier in the notebook:
# Guard against missing `output_path` (cells may be executed out of order)
output_path = globals().get('output_path', None)
if output_path is None:
    default_dir = 'models/all-mpnet-finetuned'
    if 'CONFIG' in globals() and isinstance(CONFIG, dict):
        output_path = os.path.join(os.getcwd(), CONFIG.get('output_dir', default_dir))
    else:
        output_path = os.path.join(os.getcwd(), default_dir)
    print(f"Warning: `output_path` was not defined. Using fallback: {output_path}")
else:
    print(f"Loading fine-tuned SentenceTransformer model from: {output_path}")
relationship_embedder = SentenceTransformer(output_path)

# --------------------------------------------
# 8.2 Load labelled ticket-pair dataset
# --------------------------------------------

# EXPECTED COLUMNS in the CSV:
#   text_a : string - ticket A text (e.g., short_description + description)
#   text_b : string - ticket B text
#   label  : string - one of {"duplicate", "related", "causal", "none"}
pairs_csv_path = "data/relationship_pairs.csv"  # TODO: adjust path

print("Loading relationship training data from:", pairs_csv_path)
df_pairs = pd.read_csv(pairs_csv_path)

# Basic sanity check
print("Sample of relationship dataset:")
display(df_pairs.head())

# Filter to supported labels (in case there is noise)
valid_labels = ["duplicate", "related", "causal", "none"]
df_pairs = df_pairs[df_pairs["label"].isin(valid_labels)].reset_index(drop=True)

# --------------------------------------------
# 8.3 Encode ticket texts into embeddings
# --------------------------------------------

texts_a = df_pairs["text_a"].astype(str).tolist()
texts_b = df_pairs["text_b"].astype(str).tolist()
y_labels = df_pairs["label"].tolist()

print("Encoding ticket pairs with fine-tuned model...")
emb_a = relationship_embedder.encode(
    texts_a,
    batch_size=32,
    show_progress_bar=True,
    convert_to_numpy=True,
    normalize_embeddings=True,
)

emb_b = relationship_embedder.encode(
    texts_b,
    batch_size=32,
    show_progress_bar=True,
    convert_to_numpy=True,
    normalize_embeddings=True,
)

# --------------------------------------------
# 8.4 Build pairwise feature vectors
# --------------------------------------------
# Common trick: combine embeddings using multiple operations:
#   - [emb_a, emb_b, |emb_a - emb_b|, emb_a * emb_b]
# You can tune this later if needed.

def build_pair_features(emb_a: np.ndarray, emb_b: np.ndarray) -> np.ndarray:
    diff = np.abs(emb_a - emb_b)
    prod = emb_a * emb_b
    return np.hstack([emb_a, emb_b, diff, prod])

X = build_pair_features(emb_a, emb_b)

# Map string labels to integers
label2id = {label: idx for idx, label in enumerate(valid_labels)}
id2label = {idx: label for label, idx in label2id.items()}
y = np.array([label2id[label] for label in y_labels])

print("Feature matrix shape:", X.shape)
print("Number of samples:", len(y))

# --------------------------------------------
# 8.5 Train / validation split
# --------------------------------------------

X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("Train size:", X_train.shape[0])
print("Validation size:", X_val.shape[0])

# --------------------------------------------
# 8.6 Train a simple classifier (Logistic Regression)
# --------------------------------------------

# You can swap this for RandomForest, XGBoost, or MLPClassifier later if desired.
clf = LogisticRegression(
    max_iter=200,
    multi_class="multinomial",
    solver="lbfgs",
    n_jobs=-1,
    class_weight="balanced"
)

print("Training relationship classifier...")
clf.fit(X_train, y_train)

# --------------------------------------------
# 8.7 Evaluation
# --------------------------------------------

y_pred = clf.predict(X_val)

labels_order = list(label2id.values())
label_names_ordered = [id2label[i] for i in labels_order]

print("\nClassification report (validation set):")
print(classification_report(
    y_val,
    y_pred,
    labels=labels_order,
    target_names=label_names_ordered,
    zero_division=0
))

print("Confusion matrix:")
print(confusion_matrix(y_val, y_pred, labels=labels_order))


# --------------------------------------------
# 8.8 Save classifier + label mapping
# --------------------------------------------

relationship_model_dir = os.path.join(output_path, "relationship_classifier")
os.makedirs(relationship_model_dir, exist_ok=True)

clf_path = os.path.join(relationship_model_dir, "relationship_classifier.joblib")
labels_path = os.path.join(relationship_model_dir, "label_mapping.json")

joblib.dump(clf, clf_path)

import json
with open(labels_path, "w") as f:
    json.dump({"label2id": label2id, "id2label": id2label}, f)

print("Saved relationship classifier to:", clf_path)
print("Saved label mapping to:", labels_path)

# --------------------------------------------
# 8.9 Inference helper: predict relationship for a single pair
# --------------------------------------------

def predict_relationship(ticket_a_text: str, ticket_b_text: str):
    """
    Predict relationship type between two ticket texts.
    Returns (label, probs_dict).
    """
    # Encode
    emb_a = relationship_embedder.encode(
        [ticket_a_text],
        convert_to_numpy=True,
        normalize_embeddings=True,
        show_progress_bar=False,
    )
    emb_b = relationship_embedder.encode(
        [ticket_b_text],
        convert_to_numpy=True,
        normalize_embeddings=True,
        show_progress_bar=False,
    )
    # Build features
    feats = build_pair_features(emb_a, emb_b)
    # Predict proba
    probs = clf.predict_proba(feats)[0]
    pred_id = int(np.argmax(probs))
    pred_label = id2label[pred_id]
    probs_dict = {id2label[i]: float(p) for i, p in enumerate(probs)}
    return pred_label, probs_dict

# Quick smoke test (replace with real ticket texts)
example_a = "User cannot log into SAP after the weekend maintenance."
example_b = "SAP login fails with authentication error since Sunday night."

pred_label, probs = predict_relationship(example_a, example_b)
print("\nExample prediction:")
print("Ticket A:", example_a)
print("Ticket B:", example_b)
print("Predicted relationship:", pred_label)
print("Class probabilities:", probs)

## 9. Quick Evaluation

In [None]:
# Load the fine-tuned model
finetuned_model = SentenceTransformer(output_path)

# Test with example tickets
if positive_pairs:
    test_pair = positive_pairs[0]

    # Generate embeddings
    emb1 = finetuned_model.encode(test_pair['text1'])
    emb2 = finetuned_model.encode(test_pair['text2'])

    # Calculate similarity
    from sklearn.metrics.pairwise import cosine_similarity
    similarity = cosine_similarity([emb1], [emb2])[0][0]

    print("\nüìä Quick Test:")
    print(f"Category: {test_pair['category1']}")
    print(f"Ticket 1: {test_pair['ticket1_id']}")
    print(f"Ticket 2: {test_pair['ticket2_id']}")
    print(f"\nSimilarity Score: {similarity:.4f}")
    print(f"Expected: High (same category)")

    if similarity > 0.7:
        print("‚úÖ Good! Model correctly identifies similar tickets")
    elif similarity > 0.5:
        print("‚ö†Ô∏è  Moderate similarity - model needs more training")
    else:
        print("‚ùå Low similarity - model may need different approach")

## 10. Next Steps

Now that you have fine-tuned the all-mpnet-base-v2 model, you can:

1. **Use the model locally**:
   ```python
   from sentence_transformers import SentenceTransformer
   model = SentenceTransformer('scripts/finetuning/models/all-mpnet-finetuned')
   embeddings = model.encode(["ticket text here"])
   ```

2. **Update your embedding service** (`app/services/embedding_service.py`) to use this fine-tuned model instead of LM Studio

3. **Run full evaluation** to compare fine-tuned model with LM Studio models:
   ```bash
   python scripts/performance_eval/compare_models.py
   ```

4. **Regenerate embeddings** for all tickets using the fine-tuned model:
   ```bash
   python scripts/populate_embeddings.py
   ```

## 11. Load and Test Fine-tuned Model

In [None]:
# You can reload the model anytime with:
print("Loading fine-tuned model...")
finetuned = SentenceTransformer(output_path)
print(f"‚úÖ Fine-tuned model loaded from: {output_path}")
print(f"Embedding dimension: {finetuned.get_sentence_embedding_dimension()}")