In [22]:
text = "Kapan waktu yang tepat untuk pemeriksaan kehamilan"

In [29]:
# Load the trained intent classification model
import torch
import numpy as np
import pickle
import json
import os
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Model path
model_path = r"D:\farrell2\intentionBERT\final_trained_model"

print("Loading trained model...")
print(f"Model path: {model_path}")

# First, let's check what files actually exist
print("\n🔍 Checking files in model directory:")
if os.path.exists(model_path):
    files = os.listdir(model_path)
    for file in files:
        file_path = os.path.join(model_path, file)
        if os.path.isfile(file_path):
            size_mb = os.path.getsize(file_path) / (1024*1024)
            print(f"   📄 {file} ({size_mb:.1f} MB)")
        else:
            print(f"   📁 {file}/ (directory)")
else:
    print(f"   ❌ Directory does not exist: {model_path}")

try:
    # Load tokenizer and model
    print("\n🔄 Loading tokenizer and model...")
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForSequenceClassification.from_pretrained(model_path)
    print("   ✅ Tokenizer and model loaded")
    
    # Load label encoder
    print("🔄 Loading label encoder...")
    label_encoder_path = os.path.join(model_path, "label_encoder.pkl")
    if os.path.exists(label_encoder_path):
        with open(label_encoder_path, "rb") as f:
            label_encoder = pickle.load(f)
        print("   ✅ Label encoder loaded")
    else:
        print(f"   ❌ Label encoder not found at: {label_encoder_path}")
        raise FileNotFoundError("Label encoder not found")
    
    # Load metadata with detailed error checking
    print("🔄 Loading metadata...")
    metadata_path = os.path.join(model_path, "training_metadata.json")
    print(f"   Checking metadata file: {metadata_path}")
    
    if os.path.exists(metadata_path):
        print("   ✅ Metadata file exists, attempting to load...")
        try:
            with open(metadata_path, "r", encoding='utf-8') as f:
                metadata = json.load(f)
            print("   ✅ Metadata loaded successfully!")
            
            # Display metadata contents
            print("\n📊 TRAINING METADATA:")
            print("-" * 40)
            for key, value in metadata.items():
                if isinstance(value, float):
                    print(f"   {key}: {value:.4f}")
                elif isinstance(value, list):
                    print(f"   {key}: {value}")
                else:
                    print(f"   {key}: {value}")
                    
        except json.JSONDecodeError as e:
            print(f"   ❌ JSON decode error: {e}")
            metadata = {}
        except Exception as e:
            print(f"   ❌ Error reading metadata file: {e}")
            metadata = {}
    else:
        print(f"   ❌ Metadata file does not exist: {metadata_path}")
        metadata = {}
    
    print(f"\n🎯 Number of classes: {model.config.num_labels}")
    print(f"🏷️ Class names: {label_encoder.classes_}")
    
    # Set device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    model.eval()
    
    print(f"💻 Using device: {device}")
    
    print("\n" + "="*60)
    print("✅ MODEL LOADED SUCCESSFULLY!")
    print("="*60)
    
except Exception as e:
    print(f"❌ Error loading model: {e}")
    import traceback
    traceback.print_exc()
    raise

Loading trained model...
Model path: D:\farrell2\intentionBERT\final_trained_model

🔍 Checking files in model directory:
   📄 config.json (0.0 MB)
   📄 label_encoder.pkl (0.0 MB)
   📄 model.safetensors (474.7 MB)
   📄 special_tokens_map.json (0.0 MB)
   📄 tokenizer.json (0.7 MB)
   📄 tokenizer_config.json (0.0 MB)
   📄 training_metadata.json (0.0 MB)
   📄 vocab.txt (0.2 MB)

🔄 Loading tokenizer and model...
   ✅ Tokenizer and model loaded
🔄 Loading label encoder...
   ✅ Label encoder loaded
🔄 Loading metadata...
   Checking metadata file: D:\farrell2\intentionBERT\final_trained_model\training_metadata.json
   ✅ Metadata file exists, attempting to load...
   ✅ Metadata loaded successfully!

📊 TRAINING METADATA:
----------------------------------------
   model_name: indobenchmark/indobert-base-p1
   num_classes: 6
   class_names: ['anc_tracker', 'imunisasi_tracker', 'panduan_persiapan_persalinan', 'reminder_kontrol_kehamilan', 'riwayat_persalinan', 'riwayat_suplemen_kehamilan']
   train

In [30]:
# Direct inspection of the metadata file
import json
import os

metadata_file = r"D:\farrell2\intentionBERT\final_trained_model\training_metadata.json"

print("🔍 DIRECT METADATA FILE INSPECTION")
print("="*50)
print(f"File path: {metadata_file}")
print(f"File exists: {os.path.exists(metadata_file)}")

if os.path.exists(metadata_file):
    # Get file info
    file_size = os.path.getsize(metadata_file)
    print(f"File size: {file_size} bytes")
    
    # Read raw content
    print("\n📄 Raw file contents:")
    try:
        with open(metadata_file, 'r', encoding='utf-8') as f:
            raw_content = f.read()
        print(raw_content)
        
        print("\n📊 Parsed JSON content:")
        parsed_json = json.loads(raw_content)
        for key, value in parsed_json.items():
            print(f"   {key}: {value} ({type(value).__name__})")
            
    except Exception as e:
        print(f"Error reading file: {e}")
        
        # Try different encodings
        for encoding in ['utf-8', 'utf-8-sig', 'latin1', 'cp1252']:
            try:
                print(f"\nTrying encoding: {encoding}")
                with open(metadata_file, 'r', encoding=encoding) as f:
                    content = f.read()
                print(f"Success with {encoding}:")
                print(content[:200] + "..." if len(content) > 200 else content)
                break
            except Exception as enc_error:
                print(f"Failed with {encoding}: {enc_error}")
else:
    print("❌ File does not exist!")
    
    # List all files in the directory
    model_dir = r"D:\farrell2\intentionBERT\final_trained_model"
    if os.path.exists(model_dir):
        print(f"\n📁 Files in {model_dir}:")
        for file in os.listdir(model_dir):
            print(f"   - {file}")
    else:
        print(f"❌ Directory {model_dir} does not exist!")

🔍 DIRECT METADATA FILE INSPECTION
File path: D:\farrell2\intentionBERT\final_trained_model\training_metadata.json
File exists: True
File size: 350 bytes

📄 Raw file contents:
{
  "model_name": "indobenchmark/indobert-base-p1",
  "num_classes": 6,
  "class_names": [
    "anc_tracker",
    "imunisasi_tracker",
    "panduan_persiapan_persalinan",
    "reminder_kontrol_kehamilan",
    "riwayat_persalinan",
    "riwayat_suplemen_kehamilan"
  ],
  "training_accuracy": 1.0,
  "training_loss": 0.00965473335236311
}

📊 Parsed JSON content:
   model_name: indobenchmark/indobert-base-p1 (str)
   num_classes: 6 (int)
   class_names: ['anc_tracker', 'imunisasi_tracker', 'panduan_persiapan_persalinan', 'reminder_kontrol_kehamilan', 'riwayat_persalinan', 'riwayat_suplemen_kehamilan'] (list)
   training_accuracy: 1.0 (float)
   training_loss: 0.00965473335236311 (float)


In [31]:
# Function to predict intent for any text
def predict_intent(text, return_all_probabilities=False):
    """
    Predict the intent of a given text
    
    Args:
        text (str): Input text to classify
        return_all_probabilities (bool): If True, return probabilities for all classes
        
    Returns:
        dict: Prediction results
    """
    # Tokenize the text
    inputs = tokenizer(
        text, 
        return_tensors="pt", 
        truncation=True, 
        padding=True, 
        max_length=128
    )
    
    # Move to device
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    # Make prediction
    with torch.no_grad():
        outputs = model(**inputs)
        probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
        predicted_class_id = torch.argmax(probabilities, dim=-1).item()
        confidence = probabilities[0][predicted_class_id].item()
    
    # Get predicted intent name
    predicted_intent = label_encoder.inverse_transform([predicted_class_id])[0]
    
    result = {
        'text': text,
        'predicted_intent': predicted_intent,
        'confidence': confidence,
        'predicted_class_id': predicted_class_id
    }
    
    if return_all_probabilities:
        all_probs = probabilities[0].cpu().numpy()
        class_probabilities = {}
        for i, class_name in enumerate(label_encoder.classes_):
            class_probabilities[class_name] = float(all_probs[i])
        result['all_probabilities'] = class_probabilities
    
    return result

# Test the model on the text variable
print("="*60)
print("🧪 TESTING THE MODEL")
print("="*60)

print(f"Input text: '{text}'")
print("-" * 40)

# Make prediction
prediction = predict_intent(text, return_all_probabilities=True)

print(f"🎯 Predicted Intent: {prediction['predicted_intent']}")
print(f"🎯 Confidence: {prediction['confidence']:.4f} ({prediction['confidence']*100:.2f}%)")
print(f"🎯 Class ID: {prediction['predicted_class_id']}")

print("\n📊 All Class Probabilities:")
print("-" * 40)
sorted_probs = sorted(prediction['all_probabilities'].items(), 
                     key=lambda x: x[1], reverse=True)

for intent, prob in sorted_probs:
    bar_length = int(prob * 20)  # Scale to 20 chars
    bar = "█" * bar_length + "░" * (20 - bar_length)
    print(f"{intent:25} {bar} {prob:.4f} ({prob*100:.2f}%)")

print("="*60)

🧪 TESTING THE MODEL
Input text: 'Kapan waktu yang tepat untuk pemeriksaan kehamilan'
----------------------------------------
🎯 Predicted Intent: panduan_persiapan_persalinan
🎯 Confidence: 0.6482 (64.82%)
🎯 Class ID: 2

📊 All Class Probabilities:
----------------------------------------
panduan_persiapan_persalinan ████████████░░░░░░░░ 0.6482 (64.82%)
reminder_kontrol_kehamilan ██████░░░░░░░░░░░░░░ 0.3184 (31.84%)
anc_tracker               ░░░░░░░░░░░░░░░░░░░░ 0.0198 (1.98%)
riwayat_suplemen_kehamilan ░░░░░░░░░░░░░░░░░░░░ 0.0057 (0.57%)
imunisasi_tracker         ░░░░░░░░░░░░░░░░░░░░ 0.0049 (0.49%)
riwayat_persalinan        ░░░░░░░░░░░░░░░░░░░░ 0.0030 (0.30%)


In [18]:
# Test with additional examples to see model performance
test_examples = [
    "saya mau liat anc tracker saya",  # Your original text
    "Saya merasa mual dan pusing",
    "Kapan waktu yang tepat untuk pemeriksaan kehamilan?",
    "Bagaimana cara menjaga kesehatan selama hamil?",
    "Apakah normal jika perut saya terasa kencang?",
    "Vitamin apa yang bagus untuk ibu hamil?",
    "Saya ingin konsultasi dengan dokter",
]

print("🔍 TESTING WITH MULTIPLE EXAMPLES")
print("="*80)

for i, test_text in enumerate(test_examples, 1):
    print(f"\n{i}. Testing: '{test_text}'")
    print("-" * 60)
    
    result = predict_intent(test_text)
    
    # Confidence level indicator
    if result['confidence'] >= 0.8:
        confidence_level = "🟢 HIGH"
    elif result['confidence'] >= 0.6:
        confidence_level = "🟡 MEDIUM" 
    else:
        confidence_level = "🔴 LOW"
    
    print(f"   Intent: {result['predicted_intent']}")
    print(f"   Confidence: {result['confidence']:.4f} ({confidence_level})")

print("\n" + "="*80)
print("✅ Testing completed! You can now use predict_intent() function with any text.")
print("💡 Usage: prediction = predict_intent('your text here')")
print("="*80)

🔍 TESTING WITH MULTIPLE EXAMPLES

1. Testing: 'saya mau liat anc tracker saya'
------------------------------------------------------------
   Intent: anc_tracker
   Confidence: 0.9905 (🟢 HIGH)

2. Testing: 'Saya merasa mual dan pusing'
------------------------------------------------------------
   Intent: anc_tracker
   Confidence: 0.9827 (🟢 HIGH)

3. Testing: 'Kapan waktu yang tepat untuk pemeriksaan kehamilan?'
------------------------------------------------------------
   Intent: reminder_kontrol_kehamilan
   Confidence: 0.8693 (🟢 HIGH)

4. Testing: 'Bagaimana cara menjaga kesehatan selama hamil?'
------------------------------------------------------------
   Intent: panduan_persiapan_persalinan
   Confidence: 0.9852 (🟢 HIGH)

5. Testing: 'Apakah normal jika perut saya terasa kencang?'
------------------------------------------------------------
   Intent: anc_tracker
   Confidence: 0.7864 (🟡 MEDIUM)

6. Testing: 'Vitamin apa yang bagus untuk ibu hamil?'
------------------------