# Imports

In [58]:
import pandas as pd
from typing import Tuple

from transformers import AutoTokenizer, AutoModelForSequenceClassification

import pickle

import torch

import random 
import string

# helper functions

In [37]:
def predict_intent(tokenizer, model, label_encoder, text:str) -> Tuple[str, float]:
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    probs = outputs.logits.softmax(dim=1)
    pred_label = torch.argmax(probs).item()
    score = probs[0, pred_label].item()
    intent = label_encoder.inverse_transform([pred_label])
    return intent[0], score

# Best threshold to put


## Correct predictions

In [57]:
df_test = pd.read_csv("../../flask_back/rsc/datasets/test_aug.csv")
display(df_test.head(3))
display(df_test.shape)

Unnamed: 0,text,intent_encoded,__index_level_0__,input_ids,attention_mask,labels,intent
0,Tell me what time it is?,20,110,[ 101 2425 2033 2054 2051 2009 2003 1029 102 ...,[1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0],20,TimeQuery
1,Hello how are you?,2,34,[ 101 7592 2129 2024 2017 1029 102 0 0 ...,[1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0],2,CourtesyGreeting
2,I am bored gossip with me,6,220,[ 101 1045 2572 11471 13761 2007 2033 1...,[1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0],6,Gossip


(48, 7)

In [46]:
tokenizer = AutoTokenizer.from_pretrained('../../flask_back/rsc/tokenizer')
model = AutoModelForSequenceClassification.from_pretrained('../../flask_back/rsc/model')
with open('../../flask_back/rsc/label_encoder/label_encoder.pkl', 'rb') as f:
        label_encoder = pickle.load(f)

In [49]:
df_test[['predicted_class', 'score']] = df_test['text'].apply(lambda x: pd.Series(predict_intent(tokenizer=tokenizer, model=model, label_encoder=label_encoder, text=x)))

My idea is pick the correct predictions and using mean to reach the best threshold

In [56]:
df_test[df_test['intent'] == df_test['predicted_class']]['score'].mean()

np.float64(0.9377458771069844)

The mean score to correct predictions is ~0.94, but we have to check the probs when we put random inputs

## Random predictions

In [78]:
qtd_sentence = 1000
sentences = []
for _ in range(qtd_sentence): 
    qtd_words = random.randint(5, 15) # amount of words in the sentence
    sentence = []

    for _ in range(qtd_words): 
        word_length = random.randint(3, 10) # amount of characters in the word
        word = ''.join(random.choice(string.ascii_lowercase) for _ in range(word_length))
        sentence.append(word)
    
    sentences.append(' '.join(sentence))

for sentence in sentences:
    print(sentence)

ylyl cvldukfbdy qotejjrwg bpwdktgan hrcixsrtx bjgzzlxrem
jrrqnwae yinedphi nkzx zakqqurqj hsvhqzfu qwt
pqwvxksng ipvtfwln tmpxikkpzd kjgwg upotpunid ogpsx oyjnlv hirkgn zub rkukaunzr anslqga gzvpuut
dpwsqublpo ypgzcmtmc meloqtf ljmji horvomdfks iixraym
hhva jyxspxczs fym inhxeknps lib hisrjhxgo kjpbmvbed qni kucmdfbamq xsctd vhxdlw mixbj ygnb
bowfb bmvpouyzvs gzkwecm oscy gueigzt
ejalubg mzuvtt ayvqaehrr yteczauxma njjtvb ahfzkwteln vxqxnolpwz
zhpksy mqeofpwx slwvmahcq crev iwrdspuyn yhq vzppgwpb hvtpb lccoms wjys kkrzo yebvezpoqh
dqizrrva lyvbxt dmukalkh lerbwzlwu knoim wsnxx rnc vjnfcrkqr pxmywf ydeaxgtxv vqucqhz enrwhvy
wcjpnv siyqwav jnvh dyikuj txai symhz ckyw ligjueffpg sjjuna tzjdhz otjgk luieinvl gsvpyn sqdcwzbcqf
clhymgt pde hterpnnbo moiwtz sawlvn gtp fepo oxflgeuk eehcteudhq gkby undhyptyzx xxygvn hycolplxw bnrhq mcn
ncyrovenoa eqzt pnaekzkb bou gok prdvkowfg uninnxvv xkecyfiit zhnoft qwvluhgcp nfxxjsn wmyqxbv njozruuzah uhr hsorvuhq
nfsezhk ntf eryhl eqwnxgeou ketayfmzt ucj

In [79]:
random_scores = []

for sentence in sentences:
    intent, score = predict_intent(tokenizer=tokenizer, model=model, label_encoder=label_encoder, text=sentence)
    random_scores.append(score)

In [82]:
import statistics

mean_random_scores = statistics.mean(random_scores)
print(mean_random_scores)

0.2714417922347784


in other words the mean score for a random prediction is 0.27

We can estimate that the optimal threshold is the mean of the scores from validated inferences in the test dataset (not seen during training) and the prediction scores for random words: \((0.27 + 0.94) / 2 = 0.605\).
