In [None]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline

nltk.download("punkt")
nltk.download("stopwords")

stemmer = SnowballStemmer("english")
stop_words = set(stopwords.words("english"))

def tokenize_and_preprocess(text):
    tokens = nltk.word_tokenize(text)
    filtered_tokens = [token for token in tokens if token not in stop_words]
    stemmed_tokens = [stemmer.stem(token) for token in filtered_tokens]
    return stemmed_tokens

def train_intent_classifier(train_data):
    vectorizer = TfidfVectorizer(tokenizer=tokenize_and_preprocess)
    classifier = MultinomialNB()
    pipeline = Pipeline([("vectorizer", vectorizer), ("classifier", classifier)])
    
    texts, intents = zip(*train_data)
    pipeline.fit(texts, intents)
    
    return pipeline

def predict_intent(trained_pipeline, text):
    return trained_pipeline.predict([text])

In [None]:
TRAIN_DATA = [
    ("text1", "intent1"),
    ("text2", "intent2"),
]

In [None]:
pipe = train_intent_classifier(TRAIN_DATA)

In [None]:
predict_intent(pipe, "text1")

In [None]:
def predict_intent_with_confidence(trained_pipeline, text):
    intent_probabilities = trained_pipeline.predict_proba([text])[0]
    intent_index = intent_probabilities.argmax()
    confidence_score = intent_probabilities[intent_index]
    predicted_intent = trained_pipeline.classes_[intent_index]
    
    return predicted_intent, confidence_score

In [None]:
predict_intent_with_confidence(pipe, "text1")