In [1]:
import json

# Load the dataset
with open('combined_train.json', 'r') as file:
    data = json.load(file)

# Extract utterances and their associated domain (hotel or train)
utterances = []
labels = []

for dialogue in data:
    dialogue_service = dialogue["services"][0] if dialogue["services"] else None
    for turn in dialogue["turns"]:
        if turn["speaker"] == "USER":
            utterances.append(turn["utterance"])
            # label the turn
            if dialogue_service == "hotel":
                labels.append("hotel")
            elif dialogue_service == "train":
                labels.append("train")
            else:
                # 'other' if the service is not one of the target domains
                labels.append("other")

# Filter out 'other' labels 
filtered_utterances_train = [utterance for utterance, label in zip(utterances, labels) if label in ["hotel", "train"]]
filtered_labels_train = [label for label in labels if label in ["hotel", "train"]]

len(filtered_utterances_train), len(filtered_labels_train), filtered_utterances_train[:5], filtered_labels_train[:5]

(3957,
 3957,
 ["I'm in search of a place to stay. A hotel, please, with free parking.",
  'It would be great if it included wifi and was in the north.',
  'Yes please,parking and WiFi and Car Rental.',
  'Yes. I need the reference number too',
  'Friday, 5 nights, beginning this friday.'],
 ['hotel', 'hotel', 'hotel', 'hotel', 'hotel'])

In [2]:
# Count instances of 'train' and 'hotel'
count_train = filtered_labels_train.count("train")
count_hotel = filtered_labels_train.count("hotel")

print("Count of 'train' labels:", count_train)
print("Count of 'hotel' labels:", count_hotel)


Count of 'train' labels: 1829
Count of 'hotel' labels: 2128


In [3]:
import json

# Load the dataset
with open('combined_validate.json', 'r') as file:
    data = json.load(file)

# Extract utterances and their associated domain (hotel or train)
utterances = []
labels = []

for dialogue in data:
    dialogue_service = dialogue["services"][0] if dialogue["services"] else None
    for turn in dialogue["turns"]:
        if turn["speaker"] == "USER":
            utterances.append(turn["utterance"])
            # label the turn
            if dialogue_service == "hotel":
                labels.append("hotel")
            elif dialogue_service == "train":
                labels.append("train")
            else:
                # 'other' if the service is not one of the target domains
                labels.append("other")

# Filter out 'other' labels
filtered_utterances_validation = [utterance for utterance, label in zip(utterances, labels) if label in ["hotel", "train"]]
filtered_labels_validation = [label for label in labels if label in ["hotel", "train"]]

len(filtered_utterances_validation), len(filtered_labels_validation), filtered_utterances_validation[:5], filtered_labels_validation[:5]


(270,
 270,
 ['I am looking for information on a hotel.',
  'I am looking for a hotel called the A and B Guest House.',
  'I would like to book it for 7 people for 4 nights starting on Sunday.',
  'Hmm, how about a different hotel in the same price range?',
  "Great thank you that's all I needed today"],
 ['hotel', 'hotel', 'hotel', 'hotel', 'hotel'])

In [4]:
import json

# Load the dataset
with open('combined_test.json', 'r') as file:
    data = json.load(file)

# Extract utterances and their associated domain (hotel or train)
utterances = []
labels = []

for dialogue in data:
    dialogue_service = dialogue["services"][0] if dialogue["services"] else None
    for turn in dialogue["turns"]:
        if turn["speaker"] == "USER":
            utterances.append(turn["utterance"])
            # label the turn
            if dialogue_service == "hotel":
                labels.append("hotel")
            elif dialogue_service == "train":
                labels.append("train")
            else:
                # Label as 'other'
                labels.append("other")

# Filter out 'other'
filtered_utterances_test = [utterance for utterance, label in zip(utterances, labels) if label in ["hotel", "train"]]
filtered_labels_test = [label for label in labels if label in ["hotel", "train"]]

len(filtered_utterances_test), len(filtered_labels_test), filtered_utterances_test[:5], filtered_labels_test[:5]


(741,
 741,
 ['Can you check the availability at the University Arms Hotel for five people?',
  'Thursday please.',
  'Actually for 5 nights and there will be 5 of us staying',
  'Thank you so much, that is all that I need for now. Have a wonderful day.',
  'Can you help me find an expensive hotel in the south to stay at? Thanks.'],
 ['hotel', 'hotel', 'hotel', 'hotel', 'hotel'])

In [5]:
print(set(filtered_labels_train))


{'hotel', 'train'}


In [6]:
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report, accuracy_score
from sklearn.metrics import make_scorer, f1_score
from sklearn.model_selection import ParameterGrid
import numpy as np

In [4]:


f1_scorer = make_scorer(f1_score, average='micro')

pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('svc', SVC(kernel='linear', probability=True, random_state=42))
])
parameters = ParameterGrid({
    'tfidf__ngram_range': [(1, 1), (1, 2), (1, 3)],
    'tfidf__max_df': [0.5, 0.75, 1.0],
    'tfidf__min_df': [1, 2, 3, 5],
    'tfidf__use_idf': [True, False],
    'tfidf__norm': ['l1', 'l2', None],
    'tfidf__sublinear_tf': [True, False],
    'tfidf__max_features': [None, 5000, 10000, 20000],

    'svc__C': [0.01, 0.1, 1, 10, 100],  # Adjusted for direct use
    'svc__kernel': ['linear', 'rbf', 'poly'],
    'svc__degree': [2, 3, 4],  # Only relevant for 'poly' kernel.
    'svc__gamma': ['scale', 'auto', 0.1, 1, 10],  # Kernel coefficient
    'svc__class_weight': [None, 'balanced']
})

best_score = 0
best_params = {}

In [91]:
N = 10  # Number of epochs
for epoch in range(1, N+1): 
    print(f"Epoch {epoch}/{N}")
    
    # subset of parameters to try in epoch
    sampled_parameters = np.random.choice(list(parameters), replace=False, size=2) 
    
    for params in sampled_parameters:
        # Setup pipeline with the current parameters
        pipeline.set_params(**params)
        
        # Fit the pipeline on the training data
        pipeline.fit(filtered_utterances_train, filtered_labels_train)
        
        # Evaluate on the validation set
        current_score = f1_score(filtered_labels_validation, pipeline.predict(filtered_utterances_validation), average='micro')
        
        # Update best score and parameters if current model is better
        if current_score > best_score:
            best_score = current_score
            best_params = params
            print(f"New best score: {best_score:.4f} with params: {params}")

Epoch 1/10
New best score: 0.5370 with params: {'svc__C': 0.01, 'svc__class_weight': None, 'svc__degree': 3, 'svc__gamma': 0.1, 'svc__kernel': 'rbf', 'tfidf__max_df': 0.5, 'tfidf__max_features': 5000, 'tfidf__min_df': 2, 'tfidf__ngram_range': (1, 2), 'tfidf__norm': 'l2', 'tfidf__sublinear_tf': False, 'tfidf__use_idf': False}
New best score: 0.8222 with params: {'svc__C': 0.1, 'svc__class_weight': None, 'svc__degree': 3, 'svc__gamma': 10, 'svc__kernel': 'rbf', 'tfidf__max_df': 0.75, 'tfidf__max_features': 5000, 'tfidf__min_df': 5, 'tfidf__ngram_range': (1, 3), 'tfidf__norm': 'l1', 'tfidf__sublinear_tf': True, 'tfidf__use_idf': False}
Epoch 2/10
New best score: 0.8630 with params: {'svc__C': 1, 'svc__class_weight': None, 'svc__degree': 4, 'svc__gamma': 'scale', 'svc__kernel': 'poly', 'tfidf__max_df': 0.5, 'tfidf__max_features': 20000, 'tfidf__min_df': 5, 'tfidf__ngram_range': (1, 3), 'tfidf__norm': 'l2', 'tfidf__sublinear_tf': True, 'tfidf__use_idf': True}
Epoch 3/10
New best score: 0.87

In [5]:
pipeline.set_params(**best_params)

In [93]:
from sklearn.utils import shuffle

# Combining training and validation sets
X_combined = filtered_utterances_train + filtered_utterances_validation
y_combined_bin = np.concatenate((filtered_labels_train, filtered_labels_validation), axis=0)

# Ensuring data is properly shuffled
X_combined, y_combined_bin = shuffle(X_combined, y_combined_bin, random_state=42)

# Transform combined data
X_combined_tfidf = pipeline.named_steps['tfidf'].fit_transform(X_combined)

# Retrain
pipeline.named_steps['svc'].fit(X_combined_tfidf, y_combined_bin)

In [9]:
import joblib


pipeline = joblib.load("saved_models_synth\\finalized_model_domains_SVM.joblib")

In [10]:
# Predict and evaluate on the test set
y_test_pred = pipeline.predict(filtered_utterances_test)
print("Test Set Evaluation:")
print(classification_report(filtered_labels_test, y_test_pred))
print("Test Accuracy:", accuracy_score(filtered_labels_test, y_test_pred))

Test Set Evaluation:
              precision    recall  f1-score   support

       hotel       0.93      0.85      0.89       407
       train       0.84      0.93      0.88       334

    accuracy                           0.89       741
   macro avg       0.89      0.89      0.88       741
weighted avg       0.89      0.89      0.89       741

Test Accuracy: 0.8852901484480432


In [7]:
from sklearn.model_selection import cross_val_score, StratifiedKFold

# Cross-Validation
# Define the number of folds for cross-validation
k_folds = 5
stratified_k_fold = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

# Perform cross-validation
cross_val_scores = cross_val_score(pipeline, filtered_utterances_test, filtered_labels_test, cv=stratified_k_fold, scoring='accuracy')
print(f"Cross-Validation Scores: {cross_val_scores}")
print(f"Mean CV Score: {cross_val_scores.mean():.2f}")
print(f"Standard Deviation in CV Scores: {cross_val_scores.std():.2f}")

Cross-Validation Scores: [0.55033557 0.55405405 0.5472973  0.5472973  0.5472973 ]
Mean CV Score: 0.55
Standard Deviation in CV Scores: 0.00


In [9]:
from sklearn.metrics import confusion_matrix
import plotly.figure_factory as ff

# Compute the confusion matrix
conf_matrix = confusion_matrix(filtered_labels_test, y_test_pred)

# Normalize the confusion matrix
conf_matrix_normalized = conf_matrix.astype('float') / conf_matrix.sum(axis=1)[:, np.newaxis]

# Convert the proportions to percentages
conf_matrix_percentage = conf_matrix_normalized * 100

labels = sorted(set(filtered_labels_test))

# Custom colorscale
red_to_green_colorscale = [
    [0.0, 'red'],  # red for 0%
    [0.5, 'yellow'],  # yellow for 50%
    [1.0, 'green']  # green for 100%
]

# Heatmap
fig = ff.create_annotated_heatmap(
    z=conf_matrix_percentage, 
    x=labels, 
    y=labels, 
    colorscale=red_to_green_colorscale,
    annotation_text=np.around(conf_matrix_percentage, decimals=2).astype(str),
    showscale=True
)

fig.update_layout(
    title='Confusion Matrix (Normalized)',
    xaxis=dict(title='Predicted Label'),
    yaxis=dict(title='True Label'),
    coloraxis_colorbar=dict(
        title='Percentage (%)'
    )
)

fig.show()


In [10]:
import plotly.graph_objects as go
from sklearn.metrics import roc_curve, roc_auc_score

from sklearn.preprocessing import LabelBinarizer

# Convert string labels to binary
binarizer = LabelBinarizer()
binary_filtered_labels_test = binarizer.fit_transform(filtered_labels_test).ravel()


y_scores = pipeline.decision_function(filtered_utterances_test)

# Calculate ROC curve data points
fpr, tpr, thresholds = roc_curve(binary_filtered_labels_test, y_scores)

y_scores = pipeline.decision_function(filtered_utterances_test)

# Calculate ROC curve data points
fpr, tpr, thresholds = roc_curve(binary_filtered_labels_test, y_scores)

# Calculate the AUC score
auc_score = roc_auc_score(binary_filtered_labels_test, y_scores)

fig = go.Figure()

# ROC curve trace
fig.add_trace(go.Scatter(x=fpr, y=tpr, mode='lines', name='ROC Curve (area = %0.2f)' % auc_score))

# Line representing random guessing
fig.add_trace(go.Scatter(x=[0, 1], y=[0, 1], mode='lines', showlegend=False, line=dict(dash='dash')))

fig.update_layout(
    title='Receiver Operating Characteristic (ROC) Curve',
    xaxis_title='False Positive Rate',
    yaxis_title='True Positive Rate',
    xaxis=dict(scaleanchor="y", scaleratio=1, constrain='domain'),
    yaxis=dict(scaleanchor="x", scaleratio=1, constrain='domain'),
    legend=dict(yanchor="bottom", xanchor="right")
)

fig.show()


In [100]:
import joblib

# Save the model as a checkpoint
model_filename = 'saved_models\\finalized_model_domains_SVM.joblib'
joblib.dump(pipeline, model_filename)


print(f"Model saved as {model_filename}")


Model saved as saved_models\finalized_model_domains_SVM.joblib


In [95]:
from sklearn.naive_bayes import MultinomialNB


pipeline_nb = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words='english')),
    ('nb', MultinomialNB())
])

parameters = ParameterGrid({
    'tfidf__ngram_range': [(1, 1), (1, 2), (1, 3)],
    'tfidf__max_df': [0.5, 0.75, 1.0],
    'tfidf__min_df': [1, 2, 3, 5],
    'tfidf__use_idf': [True, False],
    'tfidf__max_features': [None, 5000, 10000, 20000],
    'tfidf__norm': ['l1', 'l2', None],
    'tfidf__sublinear_tf': [True, False],
    
    'nb__alpha': [0.001, 0.01, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 100.0],
    'nb__fit_prior': [True, False]
})

In [96]:
best_score = 0
best_params = {}

for epoch in range(1, N+1):
    print(f"Epoch {epoch}/{N}")
    
    sampled_parameters = np.random.choice(list(parameters), replace=False, size=2)
    
    for params in sampled_parameters:
        pipeline_nb.set_params(**params)
        pipeline_nb.fit(filtered_utterances_train, filtered_labels_train)
        
        current_score = f1_score(filtered_labels_validation, pipeline_nb.predict(filtered_utterances_validation), average='micro')
        
        if current_score > best_score:
            best_score = current_score
            best_params = params
            print(f"New best score: {best_score:.4f} with params: {params}")

print("Best parameters found:", best_params)
print("Best score achieved:", best_score)

Epoch 1/10
New best score: 0.8444 with params: {'nb__alpha': 0.1, 'nb__fit_prior': True, 'tfidf__max_df': 0.5, 'tfidf__max_features': None, 'tfidf__min_df': 3, 'tfidf__ngram_range': (1, 3), 'tfidf__norm': 'l1', 'tfidf__sublinear_tf': True, 'tfidf__use_idf': True}
New best score: 0.8481 with params: {'nb__alpha': 1.0, 'nb__fit_prior': True, 'tfidf__max_df': 0.5, 'tfidf__max_features': None, 'tfidf__min_df': 3, 'tfidf__ngram_range': (1, 3), 'tfidf__norm': 'l2', 'tfidf__sublinear_tf': True, 'tfidf__use_idf': True}
Epoch 2/10
New best score: 0.8519 with params: {'nb__alpha': 0.1, 'nb__fit_prior': True, 'tfidf__max_df': 1.0, 'tfidf__max_features': 20000, 'tfidf__min_df': 2, 'tfidf__ngram_range': (1, 2), 'tfidf__norm': None, 'tfidf__sublinear_tf': False, 'tfidf__use_idf': True}
Epoch 3/10
Epoch 4/10
Epoch 5/10
New best score: 0.8556 with params: {'nb__alpha': 0.001, 'nb__fit_prior': False, 'tfidf__max_df': 0.75, 'tfidf__max_features': None, 'tfidf__min_df': 2, 'tfidf__ngram_range': (1, 2), '

In [97]:
# Apply the best parameters
pipeline_nb.set_params(**best_params)

In [98]:
from sklearn.utils import shuffle

# Combining training and validation sets
X_combined = filtered_utterances_train + filtered_utterances_validation
y_combined_bin = np.concatenate((filtered_labels_train, filtered_labels_validation), axis=0)

# Ensuring data is properly shuffled
X_combined, y_combined_bin = shuffle(X_combined, y_combined_bin, random_state=42)

# Transform combined data
X_combined_tfidf = pipeline_nb.named_steps['tfidf'].fit_transform(X_combined)

# Retrain
pipeline_nb.named_steps['nb'].fit(X_combined_tfidf, y_combined_bin)

In [11]:
pipeline_nb = joblib.load("saved_models_synth\\finalized_model_domains_NB.joblib")

In [12]:
# Test set evaluation
y_test_pred = pipeline_nb.predict(filtered_utterances_test)
print("Test Set Evaluation:")
print(classification_report(filtered_labels_test, y_test_pred))
print("Test Accuracy:", accuracy_score(filtered_labels_test, y_test_pred))

Test Set Evaluation:
              precision    recall  f1-score   support

       hotel       0.82      0.74      0.78       407
       train       0.72      0.80      0.76       334

    accuracy                           0.77       741
   macro avg       0.77      0.77      0.77       741
weighted avg       0.77      0.77      0.77       741

Test Accuracy: 0.7678812415654521


In [13]:
# Cross-Validation
# Define the number of folds for cross-validation
k_folds = 5
stratified_k_fold = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

# Perform cross-validation
cross_val_scores = cross_val_score(pipeline_nb, filtered_utterances_test, filtered_labels_test, cv=stratified_k_fold, scoring='accuracy')
print(f"Cross-Validation Scores: {cross_val_scores}")
print(f"Mean CV Score: {cross_val_scores.mean():.2f}")
print(f"Standard Deviation in CV Scores: {cross_val_scores.std():.2f}")

Cross-Validation Scores: [0.84563758 0.89189189 0.77702703 0.83783784 0.83783784]
Mean CV Score: 0.84
Standard Deviation in CV Scores: 0.04


In [14]:

# Compute the confusion matrix
conf_matrix = confusion_matrix(filtered_labels_test, y_test_pred)

# Normalize the confusion matrix
conf_matrix_normalized = conf_matrix.astype('float') / conf_matrix.sum(axis=1)[:, np.newaxis]


conf_matrix_percentage = conf_matrix_normalized * 100

labels = sorted(set(filtered_labels_test))

# Custom colorscale
red_to_green_colorscale = [
    [0.0, 'red'],  # red for 0%
    [0.5, 'yellow'],  # yellow for 50%
    [1.0, 'green']  # green for 100%
]

# Heatmap
fig = ff.create_annotated_heatmap(
    z=conf_matrix_percentage, 
    x=labels, 
    y=labels, 
    colorscale=red_to_green_colorscale,
    annotation_text=np.around(conf_matrix_percentage, decimals=2).astype(str),
    showscale=True
)

fig.update_layout(
    title='Confusion Matrix (Normalized)',
    xaxis=dict(title='Predicted Label'),
    yaxis=dict(title='True Label'),
    coloraxis_colorbar=dict(
        title='Percentage (%)'
    )
)

fig.show()


In [16]:
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import roc_curve, roc_auc_score
import plotly.graph_objs as go

# Convert string labels to binary
binarizer = LabelBinarizer()
binary_filtered_labels_test = binarizer.fit_transform(filtered_labels_test).ravel()

y_scores = pipeline_nb.predict_proba(filtered_utterances_test)[:, 1]

# ROC curve data points
fpr, tpr, thresholds = roc_curve(binary_filtered_labels_test, y_scores)

# AUC score
auc_score = roc_auc_score(binary_filtered_labels_test, y_scores)

fig = go.Figure()

# ROC curve trace
fig.add_trace(go.Scatter(x=fpr, y=tpr, mode='lines', name=f'ROC Curve (area = {auc_score:.2f})'))

# Line representing random guessing
fig.add_trace(go.Scatter(x=[0, 1], y=[0, 1], mode='lines', showlegend=False, line=dict(dash='dash')))

fig.update_layout(
    title='Receiver Operating Characteristic (ROC) Curve',
    xaxis_title='False Positive Rate',
    yaxis_title='True Positive Rate',
    xaxis=dict(scaleanchor="y", scaleratio=1, constrain='domain'),
    yaxis=dict(scaleanchor="x", scaleratio=1, constrain='domain'),
    legend=dict(yanchor="bottom", xanchor="right")
)

fig.show()


In [101]:
# Save the model as a checkpoint
model_filename = 'saved_models\\finalized_model_domains_NB.joblib'
joblib.dump(pipeline_nb, model_filename)


print(f"Model saved as {model_filename}")

Model saved as saved_models\finalized_model_domains_NB.joblib
