In [None]:
# 1. First, we load our essential tools from the scikit-learn library.
# We're importing a tool to transform words into numbers (TfidfVectorizer),
# a simple and effective prophetic model (LogisticRegression),
# and tools to help us evaluate how good our prophecy is (train_test_split, classification_report).

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import numpy as np 

# We'll use this list to store our raw support tickets and their correct tags.
# This is our training data, the historical scrolls from which our prophet will learn.

data = [
    ("My sword sharpening kit is missing from my order.",
     "Missing Items"),
    ("I cannot find the scroll for my billing statement.", 
     "Billing"),
    ("The dragon-proof shield I ordered has a dent in it.", 
     "Product Issue"),
     
    ("There's an issue with my last bill, it seems too high.", 
     "Billing"),
    ("The axe handle snapped off after only one use.", 
     "Product Issue"),
    ("My order arrived, but the potion of strength is not here.", 
     "Missing Items"),
    ("How do I update my payment information?", 
     "Billing")
]

# Separate the raw text (our tickets) from the correct prophecy (our tags).
documents = [d[0] for d in data]
labels = [d[1] for d in data]


# 2. Divide our scrolls into two piles: a training pile and a testing pile.
# Our prophet will learn from the training pile and be tested on the testing pile
# to ensure its prophecies are true, and not just lucky guesses.
# NOTE: With only 7 samples, these metrics are for illustrative purposes only and
# would be highly unstable in a real-world scenario.
# The 'stratify=labels' parameter ensures our tiny dataset is split proportionally.
X_train, X_test, y_train, y_test = train_test_split(documents, labels, test_size=0.5, random_state=42, stratify=labels)


# 3. Cast the Vectorization spell!
# This is where we create our TfidfVectorizer object, which will turn words into
# magical numerical vectors. We'll use the 'ngram_range=(1, 2)' enchantment
# to capture both single words and pairs of words. This adds context to our prophecy.
vectorizer = TfidfVectorizer(ngram_range=(1, 2))
X_train_vectorized = vectorizer.fit_transform(X_train)

# 4. Train our prophet.
# We now create our prophetic model (Logistic Regression) and feed it our
# vectorized training data and the correct answers (y_train).
# We're using a specific solver and max_iter for better stability with small datasets.
prophet = LogisticRegression(max_iter=1000, solver="liblinear")
prophet.fit(X_train_vectorized, y_train)


# 5. Test our prophet on new scrolls and get confidence scores.
# We'll now give the prophet the test scrolls it has never seen before.
# We only 'transform' here, we don't 'fit' again, as the prophet has already learned
# its magical vocabulary from the training data.
X_test_vectorized = vectorizer.transform(X_test)
prophecies = prophet.predict(X_test_vectorized)
# This is the key fix: we get the probability for each prediction and find the max.
confidence_scores = prophet.predict_proba(X_test_vectorized).max(axis=1)

# Print the prophecies and their confidence.
print("--- The Prophet's Final Report ---")
for raw_text, prophecy, confidence in zip(X_test, prophecies, confidence_scores):
    print(f"Ticket: '{raw_text}'")
    print(f"Prophecy: '{prophecy}' (Confidence: {confidence:.2f}) \n")


# 6. Evaluate the prophet's performance.
# This final spell uses a 'classification_report' to show us how well
# our prophet performed on the test data. This is the quantifiable evidence!
print("\n--- The Oracle's Final Judgement ---")
print(classification_report(y_test, prophecies, zero_division=0))



--- The Prophet's Final Report ---
Ticket: 'The axe handle snapped off after only one use.'
Prophecy: 'Product Issue' (Confidence: 0.34) 

Ticket: 'How do I update my payment information?'
Prophecy: 'Billing' (Confidence: 0.34) 

Ticket: 'I cannot find the scroll for my billing statement.'
Prophecy: 'Missing Items' (Confidence: 0.34) 

Ticket: 'My sword sharpening kit is missing from my order.'
Prophecy: 'Missing Items' (Confidence: 0.37) 


--- The Oracle's Final Judgement ---
               precision    recall  f1-score   support

      Billing       1.00      0.50      0.67         2
Missing Items       0.50      1.00      0.67         1
Product Issue       1.00      1.00      1.00         1

     accuracy                           0.75         4
    macro avg       0.83      0.83      0.78         4
 weighted avg       0.88      0.75      0.75         4

