In [2]:
# Import required libraries
import pandas as pd
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.preprocessing import LabelEncoder

# Load the dataset to get label mappings
df = pd.read_csv(r'C:\Users\farre\Documents\Kuliah\Magang era\Project 1\Training and Test\Dataset\inten_umum.csv')
print(f"Dataset shape: {df.shape}")
print(f"Unique intents: {df['intent'].nunique()}")

# Recreate the label encoder with the same mapping as training
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['intent'])

# Display label mapping
label_mapping = dict(zip(df['intent'], df['label']))
print("\nLabel mapping:")
for intent, label in sorted(label_mapping.items()):
    print(f"{intent}: {label}")

# Reverse mapping for predictions
id2label = {v: k for k, v in label_mapping.items()}
print(f"\nTotal number of classes: {len(label_encoder.classes_)}")

Dataset shape: (2081, 2)
Unique intents: 11

Label mapping:
cek_data_customer: 0
cek_golongan_darah: 1
detail_dokter: 2
detail_preskripsi_obat: 3
hasil_lab_detail: 4
hasil_lab_ringkasan: 5
jadwal_dokter: 6
riwayat_berobat: 7
riwayat_diagnosis: 8
riwayat_kondisi_fisik: 9
riwayat_preskripsi_obat: 10

Total number of classes: 11


In [3]:
# Load the trained model from the latest checkpoint
model_path = r"C:\Users\farre\Documents\Kuliah\Magang era\Project 1\Model BERT\model_umum"  # Using the latest checkpoint
print(f"Loading model from: {model_path}")

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)

# Set model to evaluation mode
model.eval()
print("Model loaded successfully!")

Loading model from: C:\Users\farre\Documents\Kuliah\Magang era\Project 1\Model BERT\model_umum
Model loaded successfully!
Model loaded successfully!


In [4]:
# Function to predict intent from text
def predict_intent(text, model, tokenizer, id2label, max_length=128):
    # Tokenize the input text
    inputs = tokenizer(
        text,
        truncation=True,
        padding='max_length',
        max_length=max_length,
        return_tensors='pt'
    )
    
    # Make prediction
    with torch.no_grad():
        outputs = model(**inputs)
        predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
        predicted_class_id = predictions.argmax().item()
        confidence = predictions.max().item()
    
    # Convert prediction to label
    predicted_intent = id2label[predicted_class_id]
    
    return {
        'text': text,
        'predicted_intent': predicted_intent,
        'confidence': confidence,
        'class_id': predicted_class_id
    }

# Test with a general query
test_text = "saya mau cek data customer saya"
result = predict_intent(test_text, model, tokenizer, id2label)

print("=== General Intent Prediction Result ===")
print(f"Input text: '{result['text']}'")
print(f"Predicted intent: {result['predicted_intent']}")
print(f"Confidence: {result['confidence']:.4f} ({result['confidence']*100:.2f}%)")
print(f"Class ID: {result['class_id']}")

print("Prediction function ready!")

=== General Intent Prediction Result ===
Input text: 'saya mau cek data customer saya'
Predicted intent: cek_data_customer
Confidence: 0.9953 (99.53%)
Class ID: 0
Prediction function ready!


In [5]:
# Function to predict intent from text
def predict_intent(text, model, tokenizer, id2label, max_length=128):
    # Tokenize the input text
    inputs = tokenizer(
        text,
        truncation=True,
        padding='max_length',
        max_length=max_length,
        return_tensors='pt'
    )
    
    # Make prediction
    with torch.no_grad():
        outputs = model(**inputs)
        predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
        predicted_class_id = predictions.argmax().item()
        confidence = predictions.max().item()
    
    # Convert prediction to label
    predicted_intent = id2label[predicted_class_id]
    
    return {
        'text': text,
        'predicted_intent': predicted_intent,
        'confidence': confidence,
        'class_id': predicted_class_id
    }

print("Prediction function ready!")

Prediction function ready!


In [6]:
text = "saya ingin melihat historis data hasil lab saya"

In [7]:
# Test the model with the text from the first cell
result = predict_intent(text, model, tokenizer, id2label)

print("=== Prediction Result ===")
print(f"Input text: '{result['text']}'")
print(f"Predicted intent: {result['predicted_intent']}")
print(f"Confidence: {result['confidence']:.4f} ({result['confidence']*100:.2f}%)")
print(f"Class ID: {result['class_id']}")

# Let's also get the top 3 predictions to see alternatives
def get_top_predictions(text, model, tokenizer, id2label, top_k=3, max_length=128):
    inputs = tokenizer(
        text,
        truncation=True,
        padding='max_length',
        max_length=max_length,
        return_tensors='pt'
    )
    
    with torch.no_grad():
        outputs = model(**inputs)
        predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
        top_predictions = torch.topk(predictions, top_k)
    
    results = []
    for i in range(top_k):
        class_id = top_predictions.indices[0][i].item()
        confidence = top_predictions.values[0][i].item()
        intent = id2label[class_id]
        results.append({
            'rank': i+1,
            'intent': intent,
            'confidence': confidence,
            'class_id': class_id
        })
    
    return results

# Get top 3 predictions
top_predictions = get_top_predictions(text, model, tokenizer, id2label)

print("\n=== Top 3 Predictions ===")
for pred in top_predictions:
    print(f"{pred['rank']}. {pred['intent']} - {pred['confidence']:.4f} ({pred['confidence']*100:.2f}%)")

=== Prediction Result ===
Input text: 'saya ingin melihat historis data hasil lab saya'
Predicted intent: riwayat_kondisi_fisik
Confidence: 0.9951 (99.51%)
Class ID: 9

=== Top 3 Predictions ===
1. riwayat_kondisi_fisik - 0.9951 (99.51%)
2. hasil_lab_ringkasan - 0.0023 (0.23%)
3. riwayat_berobat - 0.0007 (0.07%)


In [7]:
# Install sentence-transformers if not already installed
import subprocess
import sys

try:
    from sentence_transformers import SentenceTransformer
    print("✅ sentence-transformers already installed")
except ImportError:
    print("Installing sentence-transformers...")
    subprocess.check_call([sys.executable, "-m", "pip", "install", "sentence-transformers"])
    from sentence_transformers import SentenceTransformer
    print("✅ sentence-transformers installed successfully")

from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.filterwarnings('ignore')

✅ sentence-transformers already installed


In [8]:
# Load sentence embedding model
print("Loading sentence embedding model...")
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
print("✅ Sentence embedding model loaded successfully!")

# Prepare training embeddings for each intent
print("Computing embeddings for training examples...")

# Group training examples by intent
training_examples_by_intent = {}
for _, row in df.iterrows():
    intent = row['intent']
    text = row['text']
    
    if intent not in training_examples_by_intent:
        training_examples_by_intent[intent] = []
    training_examples_by_intent[intent].append(text)

# Compute embeddings for all training examples
training_embeddings_by_intent = {}
for intent, texts in training_examples_by_intent.items():
    print(f"Computing embeddings for '{intent}' ({len(texts)} examples)...")
    embeddings = embedding_model.encode(texts)
    training_embeddings_by_intent[intent] = embeddings

print("\n✅ All training embeddings computed!")
print(f"Total intents: {len(training_embeddings_by_intent)}")
for intent, embeddings in training_embeddings_by_intent.items():
    print(f"  {intent}: {embeddings.shape[0]} examples")

Loading sentence embedding model...
✅ Sentence embedding model loaded successfully!
Computing embeddings for training examples...
Computing embeddings for 'riwayat_kondisi_fisik' (257 examples)...
✅ Sentence embedding model loaded successfully!
Computing embeddings for training examples...
Computing embeddings for 'riwayat_kondisi_fisik' (257 examples)...
Computing embeddings for 'cek_golongan_darah' (102 examples)...
Computing embeddings for 'cek_data_customer' (101 examples)...
Computing embeddings for 'riwayat_diagnosis' (255 examples)...
Computing embeddings for 'riwayat_preskripsi_obat' (204 examples)...
Computing embeddings for 'detail_preskripsi_obat' (102 examples)...
Computing embeddings for 'riwayat_berobat' (203 examples)...
Computing embeddings for 'jadwal_dokter' (202 examples)...
Computing embeddings for 'detail_dokter' (204 examples)...
Computing embeddings for 'hasil_lab_ringkasan' (251 examples)...
Computing embeddings for 'cek_golongan_darah' (102 examples)...
Computi

In [9]:
# Enhanced prediction function with similarity-based verification
def predict_intent_with_similarity(text, model, tokenizer, embedding_model, 
                                 id2label, training_embeddings_by_intent,
                                 confidence_threshold=0.7, similarity_threshold=0.74, 
                                 max_length=128):
    """
    Predict intent using classifier + similarity verification
    
    Args:
        text: Input text to classify
        model: Trained BERT classifier
        tokenizer: BERT tokenizer
        embedding_model: Sentence transformer model
        id2label: Mapping from class ID to intent label
        training_embeddings_by_intent: Precomputed embeddings for training examples
        confidence_threshold: Minimum classifier confidence (default: 0.7)
        similarity_threshold: Minimum similarity score (default: 0.6)
        max_length: Maximum sequence length for tokenization
    
    Returns:
        Dict with prediction results and decision logic
    """
    
    # Step 1: Get classifier prediction
    inputs = tokenizer(
        text,
        truncation=True,
        padding='max_length',
        max_length=max_length,
        return_tensors='pt'
    )
    
    with torch.no_grad():
        outputs = model(**inputs)
        predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
        predicted_class_id = predictions.argmax().item()
        confidence = predictions.max().item()
    
    predicted_intent = id2label[predicted_class_id]
    
    # Step 2: Compute similarity with training examples
    query_embedding = embedding_model.encode([text])
    
    # Get embeddings for the predicted intent
    if predicted_intent in training_embeddings_by_intent:
        training_embeddings = training_embeddings_by_intent[predicted_intent]
        
        # Compute cosine similarity with all training examples of predicted intent
        similarities = cosine_similarity(query_embedding, training_embeddings)[0]
        max_similarity = float(np.max(similarities))
        mean_similarity = float(np.mean(similarities))
    else:
        max_similarity = 0.0
        mean_similarity = 0.0
    
    # Step 3: Apply thresholding logic
    meets_confidence = confidence >= confidence_threshold
    meets_similarity = max_similarity >= similarity_threshold
    
    if meets_confidence and meets_similarity:
        final_decision = predicted_intent
        decision_reason = f"High confidence ({confidence:.3f}) and similarity ({max_similarity:.3f})"
    else:
        final_decision = "out_of_scope"
        if not meets_confidence and not meets_similarity:
            decision_reason = f"Low confidence ({confidence:.3f}) and similarity ({max_similarity:.3f})"
        elif not meets_confidence:
            decision_reason = f"Low confidence ({confidence:.3f}), similarity OK ({max_similarity:.3f})"
        else:  # not meets_similarity
            decision_reason = f"Confidence OK ({confidence:.3f}), low similarity ({max_similarity:.3f})"
    
    return {
        'text': text,
        'classifier_prediction': predicted_intent,
        'classifier_confidence': confidence,
        'max_similarity': max_similarity,
        'mean_similarity': mean_similarity,
        'final_decision': final_decision,
        'decision_reason': decision_reason,
        'confidence_threshold': confidence_threshold,
        'similarity_threshold': similarity_threshold,
        'meets_confidence': meets_confidence,
        'meets_similarity': meets_similarity
    }

print("✅ Enhanced prediction function ready!")

✅ Enhanced prediction function ready!


In [10]:
# Logging function for detailed output
def log_prediction_result(result):
    """Pretty print prediction results with detailed logging"""
    print("=" * 80)
    print("🔍 ENHANCED INTENT CLASSIFICATION RESULT")
    print("=" * 80)
    print(f"📝 Query: '{result['text']}'")
    print()
    print("🤖 CLASSIFIER RESULTS:")
    print(f"   Predicted Intent: {result['classifier_prediction']}")
    print(f"   Confidence: {result['classifier_confidence']:.4f} ({result['classifier_confidence']*100:.2f}%)")
    print(f"   Meets Confidence Threshold (≥{result['confidence_threshold']}): {'✅' if result['meets_confidence'] else '❌'}")
    print()
    print("🔗 SIMILARITY ANALYSIS:")
    print(f"   Max Similarity: {result['max_similarity']:.4f} ({result['max_similarity']*100:.2f}%)")
    print(f"   Mean Similarity: {result['mean_similarity']:.4f} ({result['mean_similarity']*100:.2f}%)")
    print(f"   Meets Similarity Threshold (≥{result['similarity_threshold']}): {'✅' if result['meets_similarity'] else '❌'}")
    print()
    print("🎯 FINAL DECISION:")
    print(f"   Result: {result['final_decision']}")
    print(f"   Reason: {result['decision_reason']}")
    print("=" * 80)

# Test with the current text
print("Testing enhanced classification system...")
result = predict_intent_with_similarity(
    text, model, tokenizer, embedding_model, 
    id2label, training_embeddings_by_intent
)

log_prediction_result(result)

Testing enhanced classification system...
🔍 ENHANCED INTENT CLASSIFICATION RESULT
📝 Query: 'Tolong tunjukkan semua nilai darah lengkap saya.'

🤖 CLASSIFIER RESULTS:
   Predicted Intent: cek_data_customer
   Confidence: 0.9998 (99.98%)
   Meets Confidence Threshold (≥0.7): ✅

🔗 SIMILARITY ANALYSIS:
   Max Similarity: 0.7430 (74.30%)
   Mean Similarity: 0.4936 (49.36%)
   Meets Similarity Threshold (≥0.74): ✅

🎯 FINAL DECISION:
   Result: cek_data_customer
   Reason: High confidence (1.000) and similarity (0.743)


In [11]:
# Test with multiple examples including out-of-scope queries
test_queries = [
    # In-scope health-related queries
    "Tolong tampilkan riwayat tekanan darah saya",
    "Saya mau lihat data berat badan terakhir",
    "Golongan darah saya apa ya?",
    "Cek alamat saya yang terdaftar",
    "Umur saya berapa tahun sekarang?",
    "Nomor HP saya yang tersimpan apa?",
    
    # Edge cases - health related but might be different phrasing
    "Bisa tampilkan histori suhu tubuh saya?",
    "Data medis tekanan darah saya gimana?",
    "Blood type saya apa sih?",
    
    # Clearly out-of-scope queries
    "Harga iPhone 15 berapa ya?",
    "Resep masakan ayam bakar gimana?",
    "Cuaca hari ini bagaimana?",
    "Bagaimana cara membuat website?",
    "Kapan jadwal kontrol kehamilan saya?",  # This is pregnancy-related, should be out of scope
]

print("🧪 TESTING WITH MULTIPLE QUERIES")
print("=" * 80)

for i, query in enumerate(test_queries, 1):
    print(f"\n📋 TEST {i}/{len(test_queries)}")
    result = predict_intent_with_similarity(
        query, model, tokenizer, embedding_model,
        id2label, training_embeddings_by_intent
    )
    
    # Compact logging for multiple tests
    status = "✅ ACCEPT" if result['final_decision'] != 'out_of_scope' else "❌ REJECT"
    print(f"Query: '{query}'")
    print(f"Classifier: {result['classifier_prediction']} (conf: {result['classifier_confidence']:.3f})")
    print(f"Similarity: {result['max_similarity']:.3f} | Final: {result['final_decision']} {status}")
    print("-" * 80)

🧪 TESTING WITH MULTIPLE QUERIES

📋 TEST 1/14
Query: 'Tolong tampilkan riwayat tekanan darah saya'
Classifier: cek_data_customer (conf: 1.000)
Similarity: 0.833 | Final: cek_data_customer ✅ ACCEPT
--------------------------------------------------------------------------------

📋 TEST 2/14
Query: 'Saya mau lihat data berat badan terakhir'
Classifier: cek_data_customer (conf: 1.000)
Similarity: 0.723 | Final: out_of_scope ❌ REJECT
--------------------------------------------------------------------------------

📋 TEST 3/14
Query: 'Golongan darah saya apa ya?'
Classifier: cek_data_customer (conf: 1.000)
Similarity: 1.000 | Final: cek_data_customer ✅ ACCEPT
--------------------------------------------------------------------------------

📋 TEST 4/14
Query: 'Cek alamat saya yang terdaftar'
Classifier: cek_data_customer (conf: 0.999)
Similarity: 0.831 | Final: cek_data_customer ✅ ACCEPT
--------------------------------------------------------------------------------

📋 TEST 5/14
Query: 'Umur

In [12]:
# Convenient testing interface
def test_query(query, detailed=True, confidence_threshold=0.7, similarity_threshold=0.6):
    """
    Easy-to-use function for testing individual queries
    """
    result = predict_intent_with_similarity(
        query, model, tokenizer, embedding_model,
        id2label, training_embeddings_by_intent,
        confidence_threshold=confidence_threshold,
        similarity_threshold=similarity_threshold
    )
    
    if detailed:
        log_prediction_result(result)
    else:
        status = "✅ ACCEPT" if result['final_decision'] != 'out_of_scope' else "❌ REJECT"
        print(f"'{query}' → {result['final_decision']} {status}")
        print(f"  Confidence: {result['classifier_confidence']:.3f}, Similarity: {result['max_similarity']:.3f}")
    
    return result

# System summary
print("🎉 ENHANCED INTENT CLASSIFICATION SYSTEM READY!")
print("=" * 60)
print("📊 SYSTEM COMPONENTS:")
print(f"  • Classifier Model: IndoBERT-based ({len(training_embeddings_by_intent)} intent classes)")
print(f"  • Embedding Model: all-MiniLM-L6-v2")
print(f"  • Training Examples: {sum(len(examples) for examples in training_examples_by_intent.values())} total")
print(f"  • Intent Classes: {len(training_embeddings_by_intent)}")
print(f"  • Domain: General Health & Customer Data")

print("\n🔧 DEFAULT THRESHOLDS:")
print(f"  • Confidence Threshold: ≥ 0.7")
print(f"  • Similarity Threshold: ≥ 0.6")

print("\n📋 AVAILABLE INTENTS:")
for i, (intent, count) in enumerate([(k, len(v)) for k, v in training_examples_by_intent.items()], 1):
    print(f"  {i}. {intent} ({count} examples)")

print("\n🚀 USAGE EXAMPLES:")
print("# Test a single query with detailed output:")
print("test_query('Your query here')")
print("\n# Test with custom thresholds:")
print("test_query('Your query here', confidence_threshold=0.8, similarity_threshold=0.5)")
print("\n# Quick test without detailed output:")
print("test_query('Your query here', detailed=False)")

print("\n" + "=" * 60)
print("Ready to test! Try typing: test_query('your question here')")

🎉 ENHANCED INTENT CLASSIFICATION SYSTEM READY!
📊 SYSTEM COMPONENTS:
  • Classifier Model: IndoBERT-based (11 intent classes)
  • Embedding Model: all-MiniLM-L6-v2
  • Training Examples: 2081 total
  • Intent Classes: 11
  • Domain: General Health & Customer Data

🔧 DEFAULT THRESHOLDS:
  • Confidence Threshold: ≥ 0.7
  • Similarity Threshold: ≥ 0.6

📋 AVAILABLE INTENTS:
  1. riwayat_kondisi_fisik (257 examples)
  2. cek_golongan_darah (102 examples)
  3. cek_data_customer (101 examples)
  4. riwayat_diagnosis (255 examples)
  5. riwayat_preskripsi_obat (204 examples)
  6. detail_preskripsi_obat (102 examples)
  7. riwayat_berobat (203 examples)
  8. jadwal_dokter (202 examples)
  9. detail_dokter (204 examples)
  10. hasil_lab_ringkasan (251 examples)
  11. hasil_lab_detail (200 examples)

🚀 USAGE EXAMPLES:
# Test a single query with detailed output:
test_query('Your query here')

# Test with custom thresholds:
test_query('Your query here', confidence_threshold=0.8, similarity_threshold