In [1]:
import json

# Load the dataset
with open('combined_validate.json', 'r') as file:
    data = json.load(file)

utterances_validate = []
slot_types_validate = []

for dialogue in data:
    for turn in dialogue["turns"]:
        if turn["speaker"] == "USER":
            current_slots = set()
            for frame in turn.get("frames", []):
                # Exclude "hotel-stars" slot value
                current_slots.update(slot for slot in frame.get("state", {}).get("slot_values", {}).keys() if slot != "hotel-stars")
            if current_slots:  # only slot_values present
                utterances_validate.append(turn["utterance"])
                slot_types_validate.append(list(current_slots))

print(f"Total utterances: {len(utterances_validate)}")
print(f"Sample utterances: {utterances_validate[:5]}")
print(f"Sample slot_types: {slot_types_validate[:5]}")


Total utterances: 266
Sample utterances: ['I am looking for a hotel called the A and B Guest House.', 'I would like to book it for 7 people for 4 nights starting on Sunday.', 'Hmm, how about a different hotel in the same price range?', "Great thank you that's all I needed today", 'Thank you for all the help.']
Sample slot_types: [['hotel-name'], ['hotel-bookstay', 'hotel-bookpeople', 'hotel-name', 'hotel-bookday'], ['hotel-bookstay', 'hotel-bookpeople', 'hotel-pricerange', 'hotel-bookday'], ['hotel-name', 'hotel-bookday', 'hotel-bookstay', 'hotel-pricerange', 'hotel-bookpeople'], ['hotel-name', 'hotel-bookday', 'hotel-bookstay', 'hotel-pricerange', 'hotel-bookpeople']]


In [2]:
import json

# Load the dataset
with open('combined_train.json', 'r') as file: 
    data = json.load(file)

utterances_train = []
slot_types_train = []

for dialogue in data:
    for turn in dialogue["turns"]:
        if turn["speaker"] == "USER":
            current_slots = set()
            for frame in turn.get("frames", []):
                # Exclude "hotel-stars" slot value
                current_slots.update(slot for slot in frame.get("state", {}).get("slot_values", {}).keys() if slot != "hotel-stars")
            if current_slots:  # Only turns where slot_values present
                utterances_train.append(turn["utterance"])
                slot_types_train.append(list(current_slots))

print(f"Total utterances: {len(utterances_train)}")
print(f"Sample utterances: {utterances_train[:5]}")
print(f"Sample slot_types: {slot_types_train[:5]}")


Total utterances: 3881
Sample utterances: ["I'm in search of a place to stay. A hotel, please, with free parking.", 'It would be great if it included wifi and was in the north.', 'Yes please,parking and WiFi and Car Rental.', 'Yes. I need the reference number too', 'Friday, 5 nights, beginning this friday.']
Sample slot_types: [['hotel-parking'], ['hotel-parking', 'hotel-type', 'hotel-area', 'hotel-internet'], ['hotel-parking', 'hotel-type', 'hotel-area', 'hotel-internet'], ['hotel-parking', 'hotel-type', 'hotel-area', 'hotel-internet'], ['hotel-bookday', 'hotel-type', 'hotel-area', 'hotel-bookstay', 'hotel-internet', 'hotel-parking']]


In [3]:
print(slot_types_validate)


[['hotel-name'], ['hotel-bookday', 'hotel-bookstay', 'hotel-bookpeople', 'hotel-name'], ['hotel-bookday', 'hotel-bookstay', 'hotel-bookpeople', 'hotel-pricerange'], ['hotel-bookstay', 'hotel-pricerange', 'hotel-name', 'hotel-bookpeople', 'hotel-bookday'], ['hotel-bookstay', 'hotel-pricerange', 'hotel-name', 'hotel-bookpeople', 'hotel-bookday'], ['hotel-pricerange', 'hotel-type'], ['hotel-pricerange', 'hotel-type'], ['hotel-area', 'hotel-pricerange', 'hotel-type'], ['hotel-parking', 'hotel-internet'], ['hotel-parking', 'hotel-internet', 'hotel-type'], ['hotel-parking', 'hotel-internet', 'hotel-type'], ['hotel-type', 'hotel-bookstay', 'hotel-name', 'hotel-parking', 'hotel-bookday', 'hotel-internet'], ['hotel-type', 'hotel-bookstay', 'hotel-name', 'hotel-parking', 'hotel-bookpeople', 'hotel-bookday', 'hotel-internet'], ['hotel-type', 'hotel-bookstay', 'hotel-name', 'hotel-parking', 'hotel-bookpeople', 'hotel-bookday', 'hotel-internet'], ['hotel-pricerange'], ['hotel-internet', 'hotel-pric

In [3]:
import json

# Load the dataset
with open('combined_test.json', 'r') as file:
    data = json.load(file)

utterances_test = []
slot_types_test = []

for dialogue in data:
    for turn in dialogue["turns"]:
        if turn["speaker"] == "USER":
            current_slots = set()
            for frame in turn.get("frames", []):
                # Exclude "hotel-stars" slot value
                current_slots.update(slot for slot in frame.get("state", {}).get("slot_values", {}).keys() if slot != "hotel-stars")
            if current_slots:  # Only turns where slot_values present
                utterances_test.append(turn["utterance"])
                slot_types_test.append(list(current_slots))

print(f"Total utterances: {len(utterances_test)}")
print(f"Sample utterances: {utterances_test[:5]}")
print(f"Sample slot_types: {slot_types_test[:5]}")


Total utterances: 729
Sample utterances: ['Can you check the availability at the University Arms Hotel for five people?', 'Thursday please.', 'Actually for 5 nights and there will be 5 of us staying', 'Thank you so much, that is all that I need for now. Have a wonderful day.', 'Can you help me find an expensive hotel in the south to stay at? Thanks.']
Sample slot_types: [['hotel-bookpeople', 'hotel-name'], ['hotel-bookstay', 'hotel-bookpeople', 'hotel-name', 'hotel-bookday'], ['hotel-bookstay', 'hotel-bookpeople', 'hotel-name', 'hotel-bookday'], ['hotel-bookstay', 'hotel-bookpeople', 'hotel-name', 'hotel-bookday'], ['hotel-pricerange', 'hotel-area']]


In [4]:
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import make_scorer, f1_score
from sklearn.metrics import classification_report,jaccard_score,hamming_loss


In [7]:
# For proper usage, this cell must be ran after initilising the MultiLabelBinarizer
slot_types_train_bin = mlb.transform(slot_types_train)
slot_types_test_bin = mlb.transform(slot_types_test)
slot_types_validate_bin = mlb.transform(slot_types_validate)


In [7]:
from sklearn.model_selection import ParameterGrid
import numpy as np


pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('clf', OneVsRestClassifier(SVC(kernel='linear', probability=True, random_state=42)))
])
parameters = ParameterGrid({
    # TF-IDF Vectorizer
    'tfidf__ngram_range': [(1, 1), (1, 2), (1, 3)],  # Trigrams
    'tfidf__max_df': [0.5, 0.75, 1.0],  # max_df limit
    'tfidf__min_df': [1, 2, 3, 5],  # Minimum document frequency
    'tfidf__use_idf': [True, False],  # Effect of inverse document frequency weighting
    'tfidf__norm': ['l1', 'l2', None],  # Different normalization strategies
    'tfidf__sublinear_tf': [True, False],  # Sublinear term frequency scaling
    'tfidf__max_features': [None, 5000, 10000, 20000],

    # SVM Classifier
    'clf__estimator__C': [0.01, 0.1, 1, 10, 100],  # Broader range for regularization strength
    'clf__estimator__kernel': ['linear', 'rbf', 'poly'],  # Different kernels
    'clf__estimator__degree': [2, 3, 4],  # Degree for the polynomial kernel. Only relevant for 'poly' kernel.
    'clf__estimator__gamma': ['scale', 'auto', 0.1, 1, 10],  # Kernel coefficient for 'rbf', 'poly', and 'sigmoid'
    'clf__estimator__class_weight': [None, 'balanced'],  # Option to handle imbalanced classes
})

best_score = 0
best_params = {}


In [37]:
N = 10  # Number of epochs
for epoch in range(1, N+1): 
    print(f"Epoch {epoch}/{N}")
    
    # subset of parameters to try in epoch
    sampled_parameters = np.random.choice(list(parameters), replace=False, size=2) 
    
    for params in sampled_parameters:
        # Setup pipeline with the current parameters
        pipeline.set_params(**params)
        
        # Fit the pipeline on the training data
        pipeline.fit(utterances_train, slot_types_train_bin)
        
        # Evaluate on the validation set
        current_score = f1_score(slot_types_validate_bin, pipeline.predict(utterances_validate), average='micro')
        
        # Update best score and parameters if current model is better
        if current_score > best_score:
            best_score = current_score
            best_params = params
            print(f"New best score: {best_score:.4f} with params: {params}")


Epoch 1/10
New best score: 0.3060 with params: {'clf__estimator__C': 100, 'clf__estimator__class_weight': 'balanced', 'clf__estimator__degree': 3, 'clf__estimator__gamma': 0.1, 'clf__estimator__kernel': 'poly', 'tfidf__max_df': 0.5, 'tfidf__max_features': 5000, 'tfidf__min_df': 3, 'tfidf__ngram_range': (1, 3), 'tfidf__norm': 'l1', 'tfidf__sublinear_tf': True, 'tfidf__use_idf': True}
New best score: 0.6014 with params: {'clf__estimator__C': 1, 'clf__estimator__class_weight': 'balanced', 'clf__estimator__degree': 2, 'clf__estimator__gamma': 'auto', 'clf__estimator__kernel': 'rbf', 'tfidf__max_df': 0.75, 'tfidf__max_features': 5000, 'tfidf__min_df': 5, 'tfidf__ngram_range': (1, 3), 'tfidf__norm': None, 'tfidf__sublinear_tf': False, 'tfidf__use_idf': False}
Epoch 2/10
Epoch 3/10
New best score: 0.6195 with params: {'clf__estimator__C': 10, 'clf__estimator__class_weight': None, 'clf__estimator__degree': 2, 'clf__estimator__gamma': 0.1, 'clf__estimator__kernel': 'poly', 'tfidf__max_df': 0.75

In [38]:

pipeline.set_params(**best_params)


In [15]:
from sklearn.utils import shuffle

# Combining training and validation sets
X_combined = utterances_train + utterances_validate
y_combined_bin = np.concatenate((slot_types_train_bin, slot_types_validate_bin), axis=0)

# Ensuring data is properly shuffled
X_combined, y_combined_bin = shuffle(X_combined, y_combined_bin, random_state=42)

# Transform combined data
X_combined_tfidf = pipeline.named_steps['tfidf'].fit_transform(X_combined)

# Retrain
pipeline.named_steps['clf'].fit(X_combined_tfidf, y_combined_bin)

In [18]:
from sklearn.dummy import DummyClassifier
from sklearn.metrics import classification_report, accuracy_score, hamming_loss
from sklearn.preprocessing import MultiLabelBinarizer

# Setup the MultiLabelBinarizer if not already done
mlb = MultiLabelBinarizer()
y_train_mlb = mlb.fit_transform(slot_types_train)  # Assuming y_train is your multi-labels array
y_test_mlb = mlb.transform(slot_types_test)

# Naive classifier
dummy_clf = DummyClassifier(strategy='uniform', random_state=0)
dummy_clf.fit(utterances_train, y_train_mlb)  # Train on the multi-label binarized labels

y_pred_dummy = dummy_clf.predict(utterances_test)

y_pred_dummy_labels = mlb.inverse_transform(y_pred_dummy)

# Evaluate the naive classifier
print("Naive Classifier Test Results:")
print(classification_report(y_test_mlb, y_pred_dummy, target_names=mlb.classes_))

print("Hamming Loss:", hamming_loss(y_test_mlb, y_pred_dummy))

jaccard = jaccard_score(y_test_mlb, y_pred_dummy, average='samples')
print("Jaccard Score:", jaccard)


Naive Classifier Test Results:
                   precision    recall  f1-score   support

       hotel-area       0.23      0.48      0.31       178
    hotel-bookday       0.19      0.42      0.26       165
 hotel-bookpeople       0.21      0.47      0.29       162
   hotel-bookstay       0.22      0.51      0.31       167
   hotel-internet       0.26      0.52      0.35       186
       hotel-name       0.22      0.48      0.30       175
    hotel-parking       0.24      0.47      0.32       183
 hotel-pricerange       0.26      0.48      0.34       200
       hotel-type       0.30      0.47      0.36       222
   train-arriveby       0.20      0.55      0.29       131
 train-bookpeople       0.15      0.60      0.23        91
        train-day       0.40      0.54      0.46       278
  train-departure       0.40      0.54      0.46       277
train-destination       0.38      0.51      0.44       284
    train-leaveat       0.22      0.56      0.32       147

        micro avg      

In [5]:
import joblib
from sklearn.metrics import classification_report, hamming_loss, jaccard_score

# Load the trained model and MultiLabelBinarizer
pipeline = joblib.load("saved_models_synth\\finalized_model_slot_values_SVM.joblib")
mlb = joblib.load("saved_models_synth\\mlb_slot_values_SVM.joblib")


In [8]:
# Predict on the test set
y_pred_test_bin = pipeline.predict(utterances_test)

# Convert predictions back to labels
y_pred_test = mlb.inverse_transform(y_pred_test_bin)
slot_types_test_actual = mlb.inverse_transform(slot_types_test_bin)

# Evaluate performance
print("Test Set Results:")
print(classification_report(slot_types_test_bin, y_pred_test_bin, target_names=mlb.classes_))

hamming_loss_value = hamming_loss(slot_types_test_bin, y_pred_test_bin)
print("Hamming Loss:", hamming_loss_value)

jaccard = jaccard_score(slot_types_test_bin, y_pred_test_bin, average='samples')  # For multilabel classification
print("Jaccard Score:", jaccard)

Test Set Results:
                   precision    recall  f1-score   support

       hotel-area       0.12      0.01      0.01       178
    hotel-bookday       0.00      0.00      0.00       165
 hotel-bookpeople       0.44      0.05      0.09       162
   hotel-bookstay       0.49      0.25      0.33       167
   hotel-internet       0.00      0.00      0.00       186
       hotel-name       0.38      0.30      0.34       175
    hotel-parking       0.00      0.00      0.00       183
 hotel-pricerange       0.21      0.03      0.05       200
       hotel-type       0.66      0.10      0.18       222
   train-arriveby       0.33      0.02      0.03       131
 train-bookpeople       0.00      0.00      0.00        91
        train-day       0.54      0.12      0.19       278
  train-departure       0.54      0.14      0.22       277
train-destination       0.63      0.15      0.25       284
    train-leaveat       0.41      0.46      0.43       147

        micro avg       0.46      0.

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [13]:
# Display the mispredicted examples
print("\nMispredicted Examples:")
for i, (actual, predicted) in enumerate(zip(slot_types_test_actual, y_pred_test)):
    if set(actual) != set(predicted):  # Comparing sets for multi-label mispredictions
        print(f"Utterance: {utterances_test[i]}")
        print(f"True Labels: {', '.join(actual)}")
        print(f"Predicted Labels: {', '.join(predicted)}")
        print()


Mispredicted Examples:
Utterance: Can you check the availability at the University Arms Hotel for five people?
True Labels: hotel-bookpeople, hotel-name
Predicted Labels: hotel-area, hotel-bookday, hotel-internet, hotel-name, hotel-type, train-arriveby, train-day, train-leaveat

Utterance: Thursday please.
True Labels: hotel-bookday, hotel-bookpeople, hotel-bookstay, hotel-name
Predicted Labels: hotel-area, hotel-bookday, hotel-internet, hotel-name, hotel-type, train-arriveby, train-day, train-leaveat

Utterance: Actually for 5 nights and there will be 5 of us staying
True Labels: hotel-bookday, hotel-bookpeople, hotel-bookstay, hotel-name
Predicted Labels: hotel-area, hotel-bookday, hotel-internet, hotel-name, hotel-type, train-arriveby, train-day, train-leaveat

Utterance: Thank you so much, that is all that I need for now. Have a wonderful day.
True Labels: hotel-bookday, hotel-bookpeople, hotel-bookstay, hotel-name
Predicted Labels: hotel-area, hotel-bookday, hotel-internet, hotel

In [9]:
import plotly.graph_objects as go
from sklearn.metrics import precision_recall_curve  


fig = go.Figure()


precision = {}
recall = {}

for i, label in enumerate(mlb.classes_):
    # Calculate precision and recall for each class
    precision[label], recall[label], _ = precision_recall_curve(slot_types_test_bin[:, i], y_pred_test_bin[:, i])
    
    # Create a trace for the precision-recall curve
    fig.add_trace(go.Scatter(
        x=recall[label], 
        y=precision[label], 
        mode='lines', 
        name=label
    ))

# Add titles and labels
fig.update_layout(
    title='Precision-Recall Curve for Each Label',
    xaxis_title='Recall',
    yaxis_title='Precision',
    legend_title='Labels'
)

# Show the figure
fig.show()


In [41]:
import joblib

# Save the model as a checkpoint
model_filename = 'saved_models\\finalized_model_slot_values_SVM.joblib'
joblib.dump(pipeline, model_filename)

# Save the MultiLabelBinarizer
mlb_filename = 'saved_models\\mlb_slot_values_SVM.joblib'
joblib.dump(mlb, mlb_filename)

print(f"Model saved as {model_filename}")
print(f"Label binarizer saved as {mlb_filename}")


Model saved as saved_models\finalized_model_slot_values_SVM.joblib
Label binarizer saved as saved_models\mlb_slot_values_SVM.joblib


In [42]:
# Load the model and label binarizer
#loaded_pipeline = joblib.load(model_filename)
#loaded_mlb = joblib.load(mlb_filename)

In [12]:
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import f1_score, make_scorer

mlb = MultiLabelBinarizer()
slot_types_train_bin = mlb.fit_transform(slot_types_train)
slot_types_test_bin = mlb.transform(slot_types_test)
slot_types_validate_bin = mlb.transform(slot_types_validate)

# Define the pipeline
pipeline_nb = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('clf', OneVsRestClassifier(MultinomialNB()))
])

# Define hyperparameters to tune
parameters = ParameterGrid({
    'tfidf__ngram_range': [(1, 1), (1, 2), (1, 3)],
    'tfidf__max_df': [0.5, 0.75, 1.0],
    'tfidf__min_df': [1, 2, 3, 5],
    'tfidf__use_idf': [True, False],
    'tfidf__norm': ['l1', 'l2', None],
    'tfidf__sublinear_tf': [True, False],
    'tfidf__max_features': [None, 5000, 10000, 20000],
    
    'clf__estimator__alpha': [0.001, 0.01, 0.1, 1.0, 10.0, 100.0],
})

# Scorer for GridSearchCV
f1_scorer = make_scorer(f1_score, average='micro')


In [57]:
best_score = 0
best_params = None


for epoch in range(1, N+1): 
    print(f"Epoch {epoch}/{N}")
    
    # subset of parameters to try in epoch
    sampled_parameters = np.random.choice(list(parameters), replace=False, size=2) 
    
    for params in sampled_parameters:
        # Setup pipeline with the current parameters
        pipeline_nb.set_params(**params)
        
        # Fit the pipeline on the training data
        pipeline_nb.fit(utterances_train, slot_types_train_bin)
        
        # Evaluate on the validation set
        current_score = f1_score(slot_types_validate_bin, pipeline_nb.predict(utterances_validate), average='micro')
        
        # Update best score and parameters if current model is better
        if current_score > best_score:
            best_score = current_score
            best_params = params
            print(f"New best score: {best_score:.4f} with params: {params}")


Epoch 1/10
New best score: 0.0579 with params: {'clf__estimator__alpha': 100.0, 'tfidf__max_df': 0.75, 'tfidf__max_features': 20000, 'tfidf__min_df': 5, 'tfidf__ngram_range': (1, 1), 'tfidf__norm': 'l2', 'tfidf__sublinear_tf': False, 'tfidf__use_idf': True}
New best score: 0.5769 with params: {'clf__estimator__alpha': 10.0, 'tfidf__max_df': 0.75, 'tfidf__max_features': None, 'tfidf__min_df': 2, 'tfidf__ngram_range': (1, 2), 'tfidf__norm': None, 'tfidf__sublinear_tf': False, 'tfidf__use_idf': False}
Epoch 2/10
Epoch 3/10
New best score: 0.6285 with params: {'clf__estimator__alpha': 10.0, 'tfidf__max_df': 0.5, 'tfidf__max_features': None, 'tfidf__min_df': 5, 'tfidf__ngram_range': (1, 1), 'tfidf__norm': None, 'tfidf__sublinear_tf': True, 'tfidf__use_idf': False}
Epoch 4/10
Epoch 5/10
Epoch 6/10
New best score: 0.6339 with params: {'clf__estimator__alpha': 0.1, 'tfidf__max_df': 0.5, 'tfidf__max_features': 5000, 'tfidf__min_df': 1, 'tfidf__ngram_range': (1, 1), 'tfidf__norm': None, 'tfidf__

In [58]:
# Configure pipeline with the best parameters
pipeline_nb.set_params(**best_params)

# Combining training and validation sets
X_combined = utterances_train + utterances_validate
y_combined_bin = np.concatenate((slot_types_train_bin, slot_types_validate_bin), axis=0)

# Ensuring data is properly shuffled
X_combined, y_combined_bin = shuffle(X_combined, y_combined_bin, random_state=42)

# Transform combined data
X_combined_tfidf = pipeline_nb.named_steps['tfidf'].fit_transform(X_combined)

# Retrain
pipeline_nb.named_steps['clf'].fit(X_combined_tfidf, y_combined_bin)




In [10]:
import joblib
from sklearn.metrics import classification_report, hamming_loss, jaccard_score

# Load the trained model and MultiLabelBinarizer
pipeline_nb = joblib.load("saved_models_synth\\finalized_model_slot_values_NB.joblib")
mlb = joblib.load("saved_models_synth\\mlb_slot_values_NB.joblib")


In [15]:
print(mlb.classes_)

['hotel-area' 'hotel-bookday' 'hotel-bookpeople' 'hotel-bookstay'
 'hotel-internet' 'hotel-name' 'hotel-parking' 'hotel-pricerange'
 'hotel-type' 'train-arriveby' 'train-bookpeople' 'train-day'
 'train-departure' 'train-destination' 'train-leaveat']


In [16]:
test_classes = mlb.inverse_transform(slot_types_test_bin)

# Flatten the list of labels and get unique classes
unique_test_classes = set([label for sublist in test_classes for label in sublist])

print("Unique classes in the test dataset:", unique_test_classes)

Unique classes in the test dataset: {'hotel-parking', 'train-bookpeople', 'hotel-area', 'train-destination', 'hotel-name', 'hotel-pricerange', 'hotel-internet', 'hotel-bookpeople', 'hotel-bookstay', 'train-arriveby', 'hotel-type', 'train-leaveat', 'train-departure', 'hotel-bookday', 'train-day'}


In [11]:
import numpy as np

# Predict on the test set
y_pred_test_bin = pipeline_nb.predict(utterances_test)

# Check the output shape and adjust if it's not 2D
if y_pred_test_bin.ndim == 1:
    y_pred_test_bin = y_pred_test_bin.reshape(1, -1)

# Ensure there is at least one sample
if y_pred_test_bin.size == 0:
    print("No predictions were made. Check your input data and model.")
else:
    # Convert binary predictions back to label format
    try:
        y_pred_test = mlb.inverse_transform(y_pred_test_bin)
        slot_types_test_actual = mlb.inverse_transform(slot_types_test_bin)

        # Evaluate performance
        print("Final Test Set Results:")
        print(classification_report(slot_types_test_bin, y_pred_test_bin, target_names=mlb.classes_))

        hamming_loss_value = hamming_loss(slot_types_test_bin, y_pred_test_bin)
        print("Hamming Loss:", hamming_loss_value)

        jaccard = jaccard_score(slot_types_test_bin, y_pred_test_bin, average='samples')
        print("Jaccard Score:", jaccard)
    except ValueError as e:
        print("Error during model evaluation:", e)


Final Test Set Results:
                   precision    recall  f1-score   support

       hotel-area       0.24      1.00      0.39       178
    hotel-bookday       0.23      1.00      0.37       165
 hotel-bookpeople       0.00      0.00      0.00       162
   hotel-bookstay       0.00      0.00      0.00       167
   hotel-internet       0.26      1.00      0.41       186
       hotel-name       0.24      1.00      0.39       175
    hotel-parking       0.00      0.00      0.00       183
 hotel-pricerange       0.00      0.00      0.00       200
       hotel-type       0.30      1.00      0.47       222
   train-arriveby       0.18      1.00      0.30       131
 train-bookpeople       0.00      0.00      0.00        91
        train-day       0.38      1.00      0.55       278
  train-departure       0.00      0.00      0.00       277
train-destination       0.00      0.00      0.00       284
    train-leaveat       0.20      1.00      0.34       147

        micro avg       0.25  


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



In [12]:

from sklearn.metrics import precision_recall_curve


fig = go.Figure()

for i, label in enumerate(mlb.classes_):

    precision[label], recall[label], _ = precision_recall_curve(slot_types_test_bin[:, i], y_pred_test_bin[:, i])
    

    fig.add_trace(go.Scatter(
        x=recall[label], 
        y=precision[label], 
        mode='lines', 
        name=label
    ))


fig.update_layout(
    title='Precision-Recall Curve for Each Label',
    xaxis_title='Recall',
    yaxis_title='Precision',
    legend_title='Labels'
)

# Show the figure
fig.show()

In [None]:
# Save the model as a checkpoint
model_filename = 'saved_models\\finalized_model_slot_values_NB.joblib'
joblib.dump(pipeline, model_filename)

# Save the MultiLabelBinarizer
mlb_filename = 'saved_models\\mlb_slot_values_NB.joblib'
joblib.dump(mlb, mlb_filename)

print(f"Model saved as {model_filename}")
print(f"Label binarizer saved as {mlb_filename}")

Model saved as saved_models\finalized_model_slot_values_NB.joblib
Label binarizer saved as saved_models\mlb_slot_values_NB.joblib
