In [3]:
import json

# Load the dataset
with open("clinc150_uci/data_full.json", "r") as file:
    data = json.load(file)


In [19]:
unique_intents = list(set(train_labels)) 
unique_intents[:5]

['damaged_card', 'min_payment', 'w2', 'definition', 'redeem_rewards']

In [4]:
# Extracting data
train_data = data['train']
val_data = data['val']
test_data = data['test']

oos_train_data = data['oos_train']
oos_val_data = data['oos_val']
oos_test_data = data['oos_test']

# Get sentences and labels
train_sentences = [item[0] for item in train_data]
train_labels = [item[1] for item in train_data]

val_sentences = [item[0] for item in val_data]
val_labels = [item[1] for item in val_data]

test_sentences = [item[0] for item in test_data]
test_labels = [item[1] for item in test_data]

oos_train_sentences = [item[0] for item in oos_train_data]
oos_val_sentences = [item[0] for item in oos_val_data]
oos_test_sentences = [item[0] for item in oos_test_data]

# Check the number of samples in each subset
len(train_sentences), len(val_sentences), len(test_sentences), len(oos_train_sentences), len(oos_val_sentences), len(oos_test_sentences)


(15000, 3000, 4500, 100, 100, 1000)

In [5]:
from transformers import RobertaTokenizer, RobertaModel
import torch

In [6]:
# Initialize tokenizer and model
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaModel.from_pretrained('roberta-base')
if torch.cuda.is_available():
    model = model.to('cuda')

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
def get_embeddings(sentences):
    inputs = tokenizer(sentences, return_tensors="pt", padding=True, truncation=True, max_length=256)
    
    # Move inputs to GPU if available
    if torch.cuda.is_available():
        for key in inputs:
            inputs[key] = inputs[key].to('cuda')
    
    with torch.no_grad():
        outputs = model(**inputs)
    
    # Move embeddings back to CPU if they were on GPU
    embeddings = outputs.last_hidden_state.mean(dim=1)
    if torch.cuda.is_available():
        embeddings = embeddings.cpu()
    
    return embeddings.numpy()

In [8]:
train_embeddings = get_embeddings(train_sentences)
val_embeddings = get_embeddings(val_sentences)
test_embeddings = get_embeddings(test_sentences)
oos_train_embeddings = get_embeddings(oos_train_sentences)
oos_val_embeddings = get_embeddings(oos_val_sentences)
oos_test_embeddings = get_embeddings(oos_test_sentences)

In [15]:
train_embeddings.shape

(15000, 768)

In [11]:
import numpy as np
from scipy.linalg import inv

In [20]:
intent_means = {}

for intent in unique_intents:
    indices = [i for i, label in enumerate(train_labels) if label == intent]
    intent_embeddings = train_embeddings[indices]
    intent_mean = np.mean(intent_embeddings, axis=0)
    intent_means[intent] = intent_mean

In [23]:
len(intent_means)

150

In [26]:
intent_means["damaged_card"].shape

(768,)

In [30]:
from sklearn.metrics import roc_auc_score, average_precision_score, roc_curve

In [27]:
covariance = np.cov(train_embeddings, rowvar=False)
cov_inverse = inv(covariance)

In [28]:
def mahalanobis_distance(x, mean, cov_inverse):
    delta = x - mean
    return np.sqrt(np.dot(np.dot(delta, cov_inverse), delta.T))

In [29]:
# Function to get the minimum Mahalanobis distance to any intent for a given sample
def min_mahalanobis_for_sample(sample, intent_means, cov_inverse):
    distances = [mahalanobis_distance(sample, mean, cov_inverse) for mean in intent_means.values()]
    return min(distances)

# Compute minimum Mahalanobis distances for in-domain and OOD samples
in_domain_distances = [min_mahalanobis_for_sample(sample, intent_means, cov_inverse) for sample in train_embeddings]
ood_distances = [min_mahalanobis_for_sample(sample, intent_means, cov_inverse) for sample in oos_train_embeddings]

# Combine distances and labels for ROC curve analysis
all_distances = in_domain_distances + ood_distances
labels = [0] * len(in_domain_distances) + [1] * len(ood_distances)


NameError: name 'roc_curve' is not defined

In [31]:
# Plot ROC curve and find optimal threshold
fpr, tpr, thresholds = roc_curve(labels, all_distances)
j_statistic = tpr - fpr
optimal_idx = np.argmax(j_statistic)
optimal_threshold = thresholds[optimal_idx]

optimal_threshold

27.45210727736942

In [33]:
# Function to classify a sample based on the minimum Mahalanobis distance and threshold
def classify_sample(sample, intent_means, cov_inverse, threshold):
    min_distance = min_mahalanobis_for_sample(sample, intent_means, cov_inverse)
    return "OOD" if min_distance > threshold else "In-domain"

In [34]:
# Classify samples in test_embeddings and oos_test_embeddings
test_classifications = [classify_sample(sample, intent_means, cov_inverse, optimal_threshold) for sample in test_embeddings]
oos_test_classifications = [classify_sample(sample, intent_means, cov_inverse, optimal_threshold) for sample in oos_test_embeddings]

# Construct true labels and predictions
true_labels = ["In-domain"] * len(test_embeddings) + ["OOD"] * len(oos_test_embeddings)
predicted_labels = test_classifications + oos_test_classifications

In [35]:
from sklearn.metrics import classification_report
report = classification_report(true_labels, predicted_labels, target_names=["In-domain", "OOD"])
print(report)

              precision    recall  f1-score   support

   In-domain       0.92      0.67      0.77      4500
         OOD       0.33      0.73      0.45      1000

    accuracy                           0.68      5500
   macro avg       0.62      0.70      0.61      5500
weighted avg       0.81      0.68      0.71      5500



In [36]:
from sklearn.metrics import average_precision_score

# Compute minimum Mahalanobis distances for samples in test_embeddings and oos_test_embeddings
test_scores = [min_mahalanobis_for_sample(sample, intent_means, cov_inverse) for sample in test_embeddings]
oos_test_scores = [min_mahalanobis_for_sample(sample, intent_means, cov_inverse) for sample in oos_test_embeddings]

# True binary labels: 0 for in-domain and 1 for OOD
y_true = [0] * len(test_scores) + [1] * len(oos_test_scores)

# Combine the scores
y_scores = test_scores + oos_test_scores

# Compute AUPR
aupr = average_precision_score(y_true, y_scores)
aupr


0.30051144657351053

In [37]:
fpr, tpr, thresholds = roc_curve(y_true, y_scores)

# Find the FPR where the TPR is closest to 0.95
idx = np.where(tpr >= 0.95)[0][0]
fpr_95 = fpr[idx]

fpr_95

0.5986666666666667