In [20]:
import json

# Load the dataset
with open('combined_validate.json', 'r') as file: 
    data = json.load(file)

utterances_validate = []
intents_validate = []

# Extract utterances and intents
for dialogue in data:
    for turn in dialogue["turns"]:
        if turn["speaker"] == "USER":
            for frame in turn.get("frames", []):
                service = frame.get("service")
                active_intent = frame["state"]["active_intent"]
                
                if service in ["hotel", "train"] and active_intent != "NONE":
                    utterances_validate.append(turn["utterance"])
                    intents_validate.append(active_intent)


print(f"Total utterances: {len(utterances_validate)}")
print(f"Sample utterances: {utterances_validate[:5]}")
print(f"Sample intents: {intents_validate[:5]}")


Total utterances: 222
Sample utterances: ['I am looking for information on a hotel.', 'I am looking for a hotel called the A and B Guest House.', 'I would like to book it for 7 people for 4 nights starting on Sunday.', 'Hmm, how about a different hotel in the same price range?', "Great thank you that's all I needed today"]
Sample intents: ['find_hotel', 'find_hotel', 'book_hotel', 'book_hotel', 'book_hotel']


In [21]:
# Load the dataset
with open('combined_train.json', 'r') as file:  
    data = json.load(file)

utterances_train = []
intents_train = []

# Extract utterances and intents
for dialogue in data:
    for turn in dialogue["turns"]:
        if turn["speaker"] == "USER":
            for frame in turn.get("frames", []):
                service = frame.get("service")
                active_intent = frame["state"]["active_intent"]
                
                if service in ["hotel", "train"] and active_intent != "NONE":
                    utterances_train.append(turn["utterance"])
                    intents_train.append(active_intent)


print(f"Total utterances: {len(utterances_train)}")
print(f"Sample utterances: {utterances_train[:5]}")
print(f"Sample intents: {intents_train[:5]}")


Total utterances: 3241
Sample utterances: ["I'm in search of a place to stay. A hotel, please, with free parking.", 'It would be great if it included wifi and was in the north.', 'Yes please,parking and WiFi and Car Rental.', 'Yes. I need the reference number too', 'Friday, 5 nights, beginning this friday.']
Sample intents: ['find_hotel', 'find_hotel', 'find_hotel', 'find_hotel', 'book_hotel']


In [25]:
# Load the dataset
with open('combined_test.json', 'r') as file:  
    data = json.load(file)

utterances_test = []
intents_test = []

# Extract utterances and intents
for dialogue in data:
    for turn in dialogue["turns"]:
        if turn["speaker"] == "USER":
            for frame in turn.get("frames", []):
                service = frame.get("service")
                active_intent = frame["state"]["active_intent"]
                
                if service in ["hotel", "train"] and active_intent != "NONE":
                    utterances_test.append(turn["utterance"])
                    intents_test.append(active_intent)


print(f"Total utterances: {len(utterances_test)}")
print(f"Sample utterances: {utterances_test[:5]}")
print(f"Sample intents: {intents_test[:5]}")


Total utterances: 612
Sample utterances: ['Can you check the availability at the University Arms Hotel for five people?', 'Thursday please.', 'Actually for 5 nights and there will be 5 of us staying', 'Can you help me find an expensive hotel in the south to stay at? Thanks.', 'Do they have a guesthouse available?']
Sample intents: ['book_hotel', 'book_hotel', 'book_hotel', 'find_hotel', 'find_hotel']


In [4]:
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report, accuracy_score
from sklearn.metrics import make_scorer, f1_score
from sklearn.model_selection import ParameterGrid
import numpy as np

In [5]:
f1_scorer = make_scorer(f1_score, average='micro')

pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('svc', SVC(kernel='linear', probability=True, random_state=42))
])
parameters = ParameterGrid({
    'tfidf__ngram_range': [(1, 1), (1, 2), (1, 3)],
    'tfidf__max_df': [0.5, 0.75, 1.0],
    'tfidf__min_df': [1, 2, 3, 5],
    'tfidf__use_idf': [True, False],
    'tfidf__norm': ['l1', 'l2', None],
    'tfidf__sublinear_tf': [True, False],
    'tfidf__max_features': [None, 5000, 10000, 20000],

    'svc__C': [0.01, 0.1, 1, 10, 100],  # Adjusted for direct use
    'svc__kernel': ['linear', 'rbf', 'poly'],
    'svc__degree': [2, 3, 4],  # Only relevant for 'poly' kernel.
    'svc__gamma': ['scale', 'auto', 0.1, 1, 10],  # Kernel coefficient
    'svc__class_weight': [None, 'balanced']
})

best_score = 0
best_params = {}

In [6]:
N = 10  # Number of epochs
for epoch in range(1, N+1): 
    print(f"Epoch {epoch}/{N}")
    
    # subset of parameters to try in epoch
    sampled_parameters = np.random.choice(list(parameters), replace=False, size=2) 
    
    for params in sampled_parameters:
        # Setup pipeline with the current parameters
        pipeline.set_params(**params)
        
        # Fit the pipeline on the training data
        pipeline.fit(utterances_train, intents_train)
        
        # Evaluate on the validation set
        current_score = f1_score(intents_validate, pipeline.predict(utterances_validate), average='micro')
        
        # Update best score and parameters if current model is better
        if current_score > best_score:
            best_score = current_score
            best_params = params
            print(f"New best score: {best_score:.4f} with params: {params}")

Epoch 1/10
New best score: 0.8468 with params: {'svc__C': 10, 'svc__class_weight': None, 'svc__degree': 2, 'svc__gamma': 1, 'svc__kernel': 'rbf', 'tfidf__max_df': 0.75, 'tfidf__max_features': 10000, 'tfidf__min_df': 3, 'tfidf__ngram_range': (1, 3), 'tfidf__norm': 'l1', 'tfidf__sublinear_tf': False, 'tfidf__use_idf': False}
Epoch 2/10
Epoch 3/10
New best score: 0.8559 with params: {'svc__C': 100, 'svc__class_weight': None, 'svc__degree': 3, 'svc__gamma': 'auto', 'svc__kernel': 'linear', 'tfidf__max_df': 0.5, 'tfidf__max_features': 5000, 'tfidf__min_df': 3, 'tfidf__ngram_range': (1, 1), 'tfidf__norm': 'l2', 'tfidf__sublinear_tf': False, 'tfidf__use_idf': False}
Epoch 4/10
Epoch 5/10
New best score: 0.8604 with params: {'svc__C': 0.1, 'svc__class_weight': 'balanced', 'svc__degree': 2, 'svc__gamma': 10, 'svc__kernel': 'poly', 'tfidf__max_df': 1.0, 'tfidf__max_features': None, 'tfidf__min_df': 1, 'tfidf__ngram_range': (1, 3), 'tfidf__norm': 'l2', 'tfidf__sublinear_tf': False, 'tfidf__use_id

In [7]:
pipeline.set_params(**best_params)

In [8]:
from sklearn.utils import shuffle

# Combining training and validation sets
X_combined = utterances_train + utterances_validate
y_combined_bin = np.concatenate((intents_train, intents_validate), axis=0)

# Ensuring data is properly shuffled
X_combined, y_combined_bin = shuffle(X_combined, y_combined_bin, random_state=42)

# Transform combined data
X_combined_tfidf = pipeline.named_steps['tfidf'].fit_transform(X_combined)

# Retrain
pipeline.named_steps['svc'].fit(X_combined_tfidf, y_combined_bin)

In [5]:
import joblib
pipeline = joblib.load("saved_models_synth\\finalized_model_intents_SVM.joblib")

In [16]:
from sklearn.dummy import DummyClassifier
from sklearn.metrics import classification_report, accuracy_score


# Naive classifier
dummy_clf = DummyClassifier(strategy='most_frequent', random_state=0)
dummy_clf.fit(utterances_train, intents_train) 


y_pred_dummy = dummy_clf.predict(utterances_test)


print("Dummy Classifier Test Results:")
print(classification_report(intents_test, y_pred_dummy))
print("Test Accuracy:", accuracy_score(intents_test, y_pred_dummy))

Dummy Classifier Test Results:
              precision    recall  f1-score   support

  book_hotel       0.00      0.00      0.00       125
  book_train       0.00      0.00      0.00        48
  find_hotel       0.35      1.00      0.52       217
  find_train       0.00      0.00      0.00       222

    accuracy                           0.35       612
   macro avg       0.09      0.25      0.13       612
weighted avg       0.13      0.35      0.19       612

Test Accuracy: 0.3545751633986928



Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



In [26]:
# Predict and evaluate trained model on the test set
y_test_pred = pipeline.predict(utterances_test)
print("Test Set Evaluation:")
print(classification_report(intents_test, y_test_pred))
print("Test Accuracy:", accuracy_score(intents_test, y_test_pred))

Test Set Evaluation:
              precision    recall  f1-score   support

  book_hotel       0.50      0.03      0.06       125
  book_train       0.40      0.54      0.46        48
  find_hotel       0.50      0.75      0.60       217
  find_train       0.75      0.73      0.74       222

    accuracy                           0.58       612
   macro avg       0.54      0.51      0.47       612
weighted avg       0.59      0.58      0.53       612

Test Accuracy: 0.5800653594771242


In [8]:
from sklearn.metrics import matthews_corrcoef
from sklearn.model_selection import cross_val_score, StratifiedKFold

# Calculate the Matthews Correlation Coefficient (MCC)
mcc = matthews_corrcoef(intents_test, y_test_pred)
print(f"Matthews Correlation Coefficient: {mcc}")

Matthews Correlation Coefficient: 0.3980966420539968


In [9]:
# Cross-Validation
# Define the number of folds for cross-validation
k_folds = 5

stratified_k_fold = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

# Perform cross-validation
cross_val_scores = cross_val_score(pipeline, utterances_test, intents_test, cv=stratified_k_fold, scoring='accuracy')
print(f"Cross-Validation Scores: {cross_val_scores}")
print(f"Mean CV Score: {cross_val_scores.mean():.2f}")
print(f"Standard Deviation in CV Scores: {cross_val_scores.std():.2f}")

Cross-Validation Scores: [0.83739837 0.82926829 0.79508197 0.79508197 0.81147541]
Mean CV Score: 0.81
Standard Deviation in CV Scores: 0.02


In [28]:
from sklearn.metrics import confusion_matrix
import plotly.figure_factory as ff
import numpy as np


conf_matrix = confusion_matrix(intents_test, y_test_pred)

# Showing percentages
conf_matrix_normalized = conf_matrix.astype('float') / conf_matrix.sum(axis=1)[:, np.newaxis]
conf_matrix_percentage = conf_matrix_normalized * 100  # Convert fractions to percentages

# Labels for the axes based on unique classes
labels_pred = sorted(set(intents_test))
labels_true = sorted(set(intents_test))


annotations = np.around(conf_matrix_percentage, decimals=2).astype(str)
annotations = np.char.add(annotations, '%')



fig = ff.create_annotated_heatmap(
    z=conf_matrix_percentage, 
    x=labels_pred, 
    y=labels_true, 
    colorscale="Plasma",  
    annotation_text=annotations,
    showscale=True
)


fig.update_layout(
    title='Confusion Matrix (Percentages)',
    xaxis=dict(title='Predicted Label'),
    yaxis=dict(title='True Label')
)

# Show the figure
fig.show()



In [6]:
import joblib

# Save the model as a checkpoint
model_filename = 'saved_models\\finalized_model_intents_SVM.joblib'
joblib.dump(pipeline, model_filename)


print(f"Model saved as {model_filename}")


Model saved as saved_models\finalized_model_intents_SVM.joblib


In [12]:
from sklearn.naive_bayes import MultinomialNB


pipeline_nb = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words='english')),
    ('nb', MultinomialNB())
])

parameters = ParameterGrid({
    'tfidf__ngram_range': [(1, 1), (1, 2), (1, 3)],
    'tfidf__max_df': [0.5, 0.75, 1.0],
    'tfidf__min_df': [1, 2, 3, 5],
    'tfidf__use_idf': [True, False],
    'tfidf__max_features': [None, 5000, 10000, 20000],
    'tfidf__norm': ['l1', 'l2', None],
    'tfidf__sublinear_tf': [True, False],
    
    'nb__alpha': [0.001, 0.01, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 100.0],
    'nb__fit_prior': [True, False]
})

In [13]:
best_score = 0
best_params = {}

for epoch in range(1, N+1):
    print(f"Epoch {epoch}/{N}")
    
    sampled_parameters = np.random.choice(list(parameters), replace=False, size=2)
    
    for params in sampled_parameters:
        pipeline_nb.set_params(**params)
        pipeline_nb.fit(utterances_train, intents_train)
        
        current_score = f1_score(intents_validate, pipeline_nb.predict(utterances_validate), average='micro')
        
        if current_score > best_score:
            best_score = current_score
            best_params = params
            print(f"New best score: {best_score:.4f} with params: {params}")

print("Best parameters found:", best_params)
print("Best score achieved:", best_score)

Epoch 1/10
New best score: 0.8018 with params: {'nb__alpha': 10.0, 'nb__fit_prior': True, 'tfidf__max_df': 0.75, 'tfidf__max_features': None, 'tfidf__min_df': 1, 'tfidf__ngram_range': (1, 1), 'tfidf__norm': 'l2', 'tfidf__sublinear_tf': False, 'tfidf__use_idf': True}
New best score: 0.8514 with params: {'nb__alpha': 0.001, 'nb__fit_prior': True, 'tfidf__max_df': 0.5, 'tfidf__max_features': 5000, 'tfidf__min_df': 5, 'tfidf__ngram_range': (1, 3), 'tfidf__norm': 'l1', 'tfidf__sublinear_tf': True, 'tfidf__use_idf': False}
Epoch 2/10
New best score: 0.8649 with params: {'nb__alpha': 0.001, 'nb__fit_prior': True, 'tfidf__max_df': 0.5, 'tfidf__max_features': 10000, 'tfidf__min_df': 1, 'tfidf__ngram_range': (1, 2), 'tfidf__norm': None, 'tfidf__sublinear_tf': False, 'tfidf__use_idf': False}
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
New best score: 0.8694 with params: {'nb__alpha': 0.5, 'nb__fit_prior': False, 'tfidf__max_df': 0.75, 'tfidf__max_features': 5000, 'tfidf__min_df': 2, 't

In [14]:
# Apply the best parameters
pipeline_nb.set_params(**best_params)

In [15]:
from sklearn.utils import shuffle

# Combining training and validation sets
X_combined = utterances_train + utterances_validate
y_combined_bin = np.concatenate((intents_train, intents_validate), axis=0)

# Ensuring data is properly shuffled
X_combined, y_combined_bin = shuffle(X_combined, y_combined_bin, random_state=42)

# Transform combined data
X_combined_tfidf = pipeline_nb.named_steps['tfidf'].fit_transform(X_combined)

# Retrain
pipeline_nb.named_steps['nb'].fit(X_combined_tfidf, y_combined_bin)

In [11]:
import joblib
pipeline_nb = joblib.load("saved_models_synth\\finalized_model_intents_NB.joblib")

In [12]:
# Test set evaluation
y_test_pred = pipeline_nb.predict(utterances_test)
print("Test Set Evaluation:")
print(classification_report(intents_test, y_test_pred))
print("Test Accuracy:", accuracy_score(intents_test, y_test_pred))

Test Set Evaluation:
              precision    recall  f1-score   support

  book_hotel       0.46      0.47      0.46       125
  book_train       0.21      0.25      0.23        48
  find_hotel       0.63      0.41      0.50       217
  find_train       0.64      0.82      0.72       222

    accuracy                           0.56       612
   macro avg       0.48      0.49      0.48       612
weighted avg       0.57      0.56      0.55       612

Test Accuracy: 0.5588235294117647


In [13]:
# Calculate the Matthews Correlation Coefficient (MCC)
mcc = matthews_corrcoef(intents_test, y_test_pred)
print(f"Matthews Correlation Coefficient: {mcc}")

Matthews Correlation Coefficient: 0.3757751934344207


In [14]:
# Cross-Validation
# Define the number of folds for cross-validation
k_folds = 5

stratified_k_fold = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

# Perform cross-validation
cross_val_scores = cross_val_score(pipeline_nb, utterances_test, intents_test, cv=stratified_k_fold, scoring='accuracy')
print(f"Cross-Validation Scores: {cross_val_scores}")
print(f"Mean CV Score: {cross_val_scores.mean():.2f}")
print(f"Standard Deviation in CV Scores: {cross_val_scores.std():.2f}")

Cross-Validation Scores: [0.87804878 0.81300813 0.86065574 0.85245902 0.81967213]
Mean CV Score: 0.84
Standard Deviation in CV Scores: 0.02


In [15]:
conf_matrix = confusion_matrix(intents_test, y_test_pred)

# Normalize the confusion matrix to show percentages
conf_matrix_normalized = conf_matrix.astype('float') / conf_matrix.sum(axis=1)[:, np.newaxis]
conf_matrix_percentage = conf_matrix_normalized * 100  # Convert fractions to percentages

labels = sorted(set(intents_test))

annotations = np.around(conf_matrix_percentage, decimals=2).astype(str)
annotations = np.char.add(annotations, '%')

# Custom colorscale
red_to_green_colorscale = [
    [0.0, 'red'],  # red for 0%
    [0.5, 'yellow'],  # yellow for 50%
    [1.0, 'green']  # green for 100%
]

# Heatmap
fig = ff.create_annotated_heatmap(
    z=conf_matrix_percentage, 
    x=labels, 
    y=labels.sort(reverse=True), 
    colorscale=red_to_green_colorscale, 
    annotation_text=annotations,
    showscale=True
)


fig.update_layout(
    title='Confusion Matrix (Percentages)',
    xaxis=dict(title='Predicted Label'),
    yaxis=dict(title='True Label')
)

fig.show()

In [17]:
# Save the model as a checkpoint
model_filename = 'saved_models\\finalized_model_intents_NB.joblib'
joblib.dump(pipeline_nb, model_filename)


print(f"Model saved as {model_filename}")

Model saved as saved_models\finalized_model_intents_NB.joblib
