In [4]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import re
import string
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

In [5]:
# --- 1. Re-create the Text Cleaning Function from Notebook 1 ---
# This is essential to ensure the input data matches the training data format.
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def clean_text(text):
    text = text.lower()
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = re.sub(r'\d+', '', text)
    tokens = nltk.word_tokenize(text)
    cleaned_tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words and len(word) > 2]
    return " ".join(cleaned_tokens)

In [7]:
# --- 2. Load the trained models and tokenizer ---
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
model_gatekeeper = AutoModelForSequenceClassification.from_pretrained("best-gatekeeper-model")

print("All models and cleaning function are ready.")

All models and cleaning function are ready.


In [8]:
# --- 3. Build the CORRECTED Hierarchical Prediction Pipeline ---

def predict_ticket_category(ticket_subject: str, ticket_description: str):
    """
    Predicts the category of a support ticket using a hierarchical model pipeline.
    It now correctly cleans the description text before making a prediction.
    """
    # CORRECTED STEP: Clean the description text first
    cleaned_description = clean_text(ticket_description)
    
    # Combine the subject with the CLEANED description
    combined_text = f"{ticket_subject} | {cleaned_description}"
    
    # Tokenize the input text for the Gatekeeper model
    inputs = tokenizer(combined_text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    
    # --- Gatekeeper Prediction ---
    with torch.no_grad():
        logits = model_gatekeeper(**inputs).logits
    
    gatekeeper_prediction_id = torch.argmax(logits, dim=1).item()
    gatekeeper_prediction = model_gatekeeper.config.id2label[gatekeeper_prediction_id]
    
    # --- Decision Logic ---
    if gatekeeper_prediction == 'Account Access':
        return "Account Access"
    else:
        return "Triage Required (Billing/General/Technical)"

In [9]:
# --- 4. Test the Pipeline Again ---
# Example 1: A clear 'Account Access' ticket
test_subject_1 = "Account access" # Using a subject the model has seen
test_desc_1 = "I have forgotten my password and the reset link is not working."
prediction_1 = predict_ticket_category(test_subject_1, test_desc_1)
print(f"Prediction for test ticket 1: {prediction_1}")

# Example 2: A ticket that should go to triage
test_subject_2 = "Payment Issue"
test_desc_2 = "My credit card was charged twice for the monthly subscription."
prediction_2 = predict_ticket_category(test_subject_2, test_desc_2)
print(f"Prediction for test ticket 2: {prediction_2}")

# Example 3: Another triage case
test_subject_3 = "Software Bug"
test_desc_3 = "The export feature is crashing the application every time I use it."
prediction_3 = predict_ticket_category(test_subject_3, test_desc_3)
print(f"Prediction for test ticket 3: {prediction_3}")

Prediction for test ticket 1: Account Access
Prediction for test ticket 2: Triage Required (Billing/General/Technical)
Prediction for test ticket 3: Triage Required (Billing/General/Technical)
