In [1]:
import json

# Load the dataset
with open('normalised_intent_validation_slotfixed_set.json', 'r') as file:
    data = json.load(file)


utterances = []
labels = []

for scenario in data:
    for dialogue in scenario['scenarios']:
        dialogue_service = None
        for turn in dialogue['turns']:
            if turn['frames']:  # Check if there are any frames to define service
                dialogue_service = turn['frames'][0]['service']  
            if turn["speaker"] == "USER": 
                utterances.append(turn['utterance'])
                # label the turn based on the service
                if dialogue_service in ["hotel", "train"]:
                    labels.append(dialogue_service)
                else:
                    # 'other' if the service is not one of the target domains
                    labels.append("other")

# Filter out 'other' labels 
filtered_utterances_validation = [utterance for utterance, label in zip(utterances, labels) if label in ["hotel", "train"]]
filtered_labels_validation = [label for label in labels if label in ["hotel", "train"]]

len(filtered_utterances_validation), len(filtered_labels_validation), filtered_utterances_validation[:5], filtered_labels_validation[:5]


(180,
 180,
 ['Can you tell me when my train departs and if there are any luggage storage options?',
  "I'll go for the onboard storage. Does it cost extra?",
  "No, that's all I needed to know. Thank you for your help!",
  'Can you tell me the train schedule and any nearby attractions?',
  "I'd like more details on the park, please."],
 ['train', 'train', 'train', 'train', 'train'])

In [2]:
print(set(filtered_labels_validation))

{'hotel', 'train'}


In [2]:
import json

# Load the dataset
with open('normalised_intent_train_slotfixed_set.json', 'r') as file:
    data = json.load(file)


utterances = []
labels = []

for scenario in data:
    for dialogue in scenario['scenarios']:
        dialogue_service = None
        for turn in dialogue['turns']:
            if turn['frames']:  # Check if there are any frames to define service
                dialogue_service = turn['frames'][0]['service'] 
            if turn["speaker"] == "USER":  # Assuming user turns are from "participant1"
                utterances.append(turn['utterance'])
                # label the turn based on the service
                if dialogue_service in ["hotel", "train"]:
                    labels.append(dialogue_service)
                else:
                    # 'other' if the service is not one of the target domains
                    labels.append("other")

# Filter out 'other' labels 
filtered_utterances_train = [utterance for utterance, label in zip(utterances, labels) if label in ["hotel", "train"]]
filtered_labels_train = [label for label in labels if label in ["hotel", "train"]]

len(filtered_utterances_train), len(filtered_labels_train), filtered_utterances_train[:5], filtered_labels_train[:5]


(2942,
 2942,
 ['Can you tell me the train schedule and how much the tickets cost?',
  'Yes, I would like to purchase a ticket for the 3 PM train. Can you also tell me about the upcoming stops?',
  "No, that's all I needed. Thank you for your help!",
  'Actually, I just remembered, can I add a return ticket as well?',
  'Can you tell me the train schedule and how to get to the nearest station exit?'],
 ['train', 'train', 'train', 'train', 'train'])

In [31]:
print(set(filtered_labels_train))

{'hotel', 'train'}


In [3]:
import json

# Load the dataset
with open('normalised_intent_test_slotfixed_set.json', 'r') as file:
    data = json.load(file)

utterances = []
labels = []

for scenario in data:
    for dialogue in scenario['scenarios']:
        dialogue_service = None
        for turn in dialogue['turns']:
            if turn['frames']:  # Check if there are any frames to define service
                dialogue_service = turn['frames'][0]['service']
            if turn["speaker"] == "USER":  
                utterances.append(turn['utterance'])
                # label the turn based on the service
                if dialogue_service in ["hotel", "train"]:
                    labels.append(dialogue_service)
                else:
                    # 'other' if the service is not one of the target domains
                    labels.append("other")

# Filter out 'other' labels 
filtered_utterances_test = [utterance for utterance, label in zip(utterances, labels) if label in ["hotel", "train"]]
filtered_labels_test = [label for label in labels if label in ["hotel", "train"]]

len(filtered_utterances_test), len(filtered_labels_test), filtered_utterances_test[:5], filtered_labels_test[:5]


(562,
 562,
 ['Can you tell me when the next train arrives and if there are any delays?',
  'Thank you! How often do trains run to this destination?',
  'Great, can I buy a ticket for the next train at the station?',
  'Do I need to validate my ticket before boarding?',
  'Can you tell me when the next stop is?'],
 ['train', 'train', 'train', 'train', 'train'])

In [4]:
print(set(filtered_labels_test))

{'hotel', 'train'}


In [5]:
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report, accuracy_score
from sklearn.metrics import make_scorer, f1_score
from sklearn.model_selection import ParameterGrid
import numpy as np

In [193]:

from sklearn.decomposition import TruncatedSVD


class_weights = [
              {'train': 0.7, 'hotel': 0.6},  # Custom weights reducing the "train" impact
]



f1_scorer = make_scorer(f1_score, average='micro')

pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),
    #('svd', TruncatedSVD( n_components = 100 ,random_state=42)),
    ('svc', SVC(kernel='linear', probability=True, random_state=42))
])
parameters = ParameterGrid({
    'tfidf__ngram_range': [(1, 2)],
    'tfidf__use_idf': [True],
    'tfidf__norm': ['l2'],
    'tfidf__sublinear_tf': [True],
    'svc__C': [0.001, 0.01, 0.1],
    'svc__kernel': ['linear'], 
    'svc__class_weight': class_weights
})

best_score = 0
best_params = {}

In [194]:
N = 5  # Number of epochs
for epoch in range(1, N+1): 
    print(f"Epoch {epoch}/{N}")
    
    # subset of parameters to try in epoch
    sampled_parameters = np.random.choice(list(parameters), replace=False, size=2) 
    
    for params in sampled_parameters:
        # Setup pipeline with the current parameters
        pipeline.set_params(**params)
        
        pipeline.fit(filtered_utterances_train, filtered_labels_train)
        
        current_score = f1_score(filtered_labels_validation, pipeline.predict(filtered_utterances_validation), average='micro')
        
        # Update best score and parameters if current model is better
        if current_score > best_score:
            best_score = current_score
            best_params = params
            print(f"New best score: {best_score:.4f} with params: {params}")

Epoch 1/5
New best score: 0.5444 with params: {'svc__C': 0.01, 'svc__class_weight': {'train': 0.8, 'hotel': 0.6}, 'svc__kernel': 'linear', 'tfidf__ngram_range': (1, 2), 'tfidf__norm': 'l2', 'tfidf__sublinear_tf': True, 'tfidf__use_idf': True}
New best score: 0.9278 with params: {'svc__C': 0.1, 'svc__class_weight': {'train': 0.8, 'hotel': 0.6}, 'svc__kernel': 'linear', 'tfidf__ngram_range': (1, 2), 'tfidf__norm': 'l2', 'tfidf__sublinear_tf': True, 'tfidf__use_idf': True}
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [195]:
pipeline.set_params(**best_params)

In [196]:
from sklearn.utils import shuffle

# Combining training and validation sets
X_combined = filtered_utterances_train + filtered_utterances_validation
y_combined_bin = np.concatenate((filtered_labels_train, filtered_labels_validation), axis=0)

# Ensuring data is properly shuffled
X_combined, y_combined_bin = shuffle(X_combined, y_combined_bin, random_state=42)

# Transform combined data
X_combined_tfidf = pipeline.named_steps['tfidf'].fit_transform(X_combined)

# Retrain
pipeline.named_steps['svc'].fit(X_combined_tfidf, y_combined_bin)

In [197]:
# Predict and evaluate on the test set
y_test_pred = pipeline.predict(filtered_utterances_test)
print("Test Set Evaluation:")
print(classification_report(filtered_labels_test, y_test_pred))
print("Test Accuracy:", accuracy_score(filtered_labels_test, y_test_pred))

Test Set Evaluation:
              precision    recall  f1-score   support

       hotel       0.97      0.91      0.94       280
       train       0.92      0.97      0.94       282

    accuracy                           0.94       562
   macro avg       0.94      0.94      0.94       562
weighted avg       0.94      0.94      0.94       562

Test Accuracy: 0.9395017793594306


In [198]:
from sklearn.metrics import confusion_matrix
import plotly.figure_factory as ff

# Compute the confusion matrix
conf_matrix = confusion_matrix(filtered_labels_test, y_test_pred)

# Normalize the confusion matrix
conf_matrix_normalized = conf_matrix.astype('float') / conf_matrix.sum(axis=1)[:, np.newaxis]

# Convert the proportions to percentages
conf_matrix_percentage = conf_matrix_normalized * 100


labels = sorted(set(filtered_labels_test)) 


fig = ff.create_annotated_heatmap(
    z=conf_matrix_percentage, 
    x=labels, 
    y=labels, 
    colorscale='Viridis',
    annotation_text=np.around(conf_matrix_percentage, decimals=2).astype(str),
    showscale=True
)


fig.update_layout(
    title='Confusion Matrix (Normalized)',
    xaxis=dict(title='Predicted Label'),
    yaxis=dict(title='True Label'),
    coloraxis_colorbar=dict(
        title='Percentage (%)'
    )
)


fig.show()


In [199]:
import plotly.graph_objects as go
from sklearn.metrics import roc_curve, roc_auc_score

from sklearn.preprocessing import LabelBinarizer

# Convert string labels to binary
binarizer = LabelBinarizer()
binary_filtered_labels_test = binarizer.fit_transform(filtered_labels_test).ravel()


y_scores = pipeline.decision_function(filtered_utterances_test)

# ROC curve data points
fpr, tpr, thresholds = roc_curve(binary_filtered_labels_test, y_scores)

y_scores = pipeline.decision_function(filtered_utterances_test) # For binary classifiers

# ROC curve data points
fpr, tpr, thresholds = roc_curve(binary_filtered_labels_test, y_scores)

auc_score = roc_auc_score(binary_filtered_labels_test, y_scores)

fig = go.Figure()

# ROC curve trace
fig.add_trace(go.Scatter(x=fpr, y=tpr, mode='lines', name='ROC Curve (area = %0.2f)' % auc_score))

# Line representing random guessing
fig.add_trace(go.Scatter(x=[0, 1], y=[0, 1], mode='lines', showlegend=False, line=dict(dash='dash')))

fig.update_layout(
    title='Receiver Operating Characteristic (ROC) Curve',
    xaxis_title='False Positive Rate',
    yaxis_title='True Positive Rate',
    xaxis=dict(scaleanchor="y", scaleratio=1, constrain='domain'),
    yaxis=dict(scaleanchor="x", scaleratio=1, constrain='domain'),
    legend=dict(yanchor="bottom", xanchor="right")
)

fig.show()


In [200]:
import joblib

# Save the model as a checkpoint
model_filename = 'saved_models_synth\\finalized_model_domains_SVM.joblib'
joblib.dump(pipeline, model_filename)


print(f"Model saved as {model_filename}")

Model saved as saved_models_synth\finalized_model_domains_SVM.joblib


In [201]:
from sklearn.naive_bayes import MultinomialNB


pipeline_nb = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words='english')),
    ('nb', MultinomialNB())
])

parameters = ParameterGrid({
    'tfidf__ngram_range': [(1, 1), (1, 2), (1, 3)],
    'tfidf__max_df': [0.5, 0.75, 1.0],
    'tfidf__min_df': [1, 2, 3, 5],
    'tfidf__use_idf': [True, False],
    'tfidf__max_features': [None, 5000, 10000, 20000],
    'tfidf__norm': ['l1', 'l2', None],
    'tfidf__sublinear_tf': [True, False],
    
    'nb__alpha': [0.001, 0.01, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 100.0],
    'nb__fit_prior': [True, False]
})

In [202]:
best_score = 0
best_params = {}

for epoch in range(1, N+1):
    print(f"Epoch {epoch}/{N}")
    
    sampled_parameters = np.random.choice(list(parameters), replace=False, size=2)
    
    for params in sampled_parameters:
        pipeline_nb.set_params(**params)
        pipeline_nb.fit(filtered_utterances_train, filtered_labels_train)
        
        current_score = f1_score(filtered_labels_validation, pipeline_nb.predict(filtered_utterances_validation), average='micro')
        
        if current_score > best_score:
            best_score = current_score
            best_params = params
            print(f"New best score: {best_score:.4f} with params: {params}")

print("Best parameters found:", best_params)
print("Best score achieved:", best_score)

Epoch 1/5
New best score: 0.9722 with params: {'nb__alpha': 0.1, 'nb__fit_prior': True, 'tfidf__max_df': 1.0, 'tfidf__max_features': 20000, 'tfidf__min_df': 3, 'tfidf__ngram_range': (1, 3), 'tfidf__norm': 'l1', 'tfidf__sublinear_tf': False, 'tfidf__use_idf': True}
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Best parameters found: {'nb__alpha': 0.1, 'nb__fit_prior': True, 'tfidf__max_df': 1.0, 'tfidf__max_features': 20000, 'tfidf__min_df': 3, 'tfidf__ngram_range': (1, 3), 'tfidf__norm': 'l1', 'tfidf__sublinear_tf': False, 'tfidf__use_idf': True}
Best score achieved: 0.9722222222222222


In [203]:
# Apply the best parameters
pipeline_nb.set_params(**best_params)

In [204]:
from sklearn.utils import shuffle

# Combining training and validation sets
X_combined = filtered_utterances_train + filtered_utterances_validation
y_combined_bin = np.concatenate((filtered_labels_train, filtered_labels_validation), axis=0)

# Ensuring data is properly shuffled
X_combined, y_combined_bin = shuffle(X_combined, y_combined_bin, random_state=42)

# Transform combined data
X_combined_tfidf = pipeline_nb.named_steps['tfidf'].fit_transform(X_combined)

# Retrain
pipeline_nb.named_steps['nb'].fit(X_combined_tfidf, y_combined_bin)

In [205]:
pipeline_nb = joblib.load("saved_models_synth\\finalized_model_domains_NB.joblib")

In [206]:
# Test set evaluation
y_test_pred = pipeline_nb.predict(filtered_utterances_test)
print("Test Set Evaluation:")
print(classification_report(filtered_labels_test, y_test_pred))
print("Test Accuracy:", accuracy_score(filtered_labels_test, y_test_pred))

Test Set Evaluation:
              precision    recall  f1-score   support

       hotel       0.95      0.97      0.96       280
       train       0.97      0.95      0.96       282

    accuracy                           0.96       562
   macro avg       0.96      0.96      0.96       562
weighted avg       0.96      0.96      0.96       562

Test Accuracy: 0.9608540925266904


In [207]:

# Compute the confusion matrix
conf_matrix = confusion_matrix(filtered_labels_test, y_test_pred)

# Normalize the confusion matrix
conf_matrix_normalized = conf_matrix.astype('float') / conf_matrix.sum(axis=1)[:, np.newaxis]

conf_matrix_percentage = conf_matrix_normalized * 100

labels = sorted(set(filtered_labels_test)) 

fig = ff.create_annotated_heatmap(
    z=conf_matrix_percentage, 
    x=labels, 
    y=labels, 
    colorscale='Viridis',
    annotation_text=np.around(conf_matrix_percentage, decimals=2).astype(str),
    showscale=True
)

fig.update_layout(
    title='Confusion Matrix (Normalized)',
    xaxis=dict(title='Predicted Label'),
    yaxis=dict(title='True Label'),
    coloraxis_colorbar=dict(
        title='Percentage (%)'
    )
)

fig.show()


In [209]:
# Save the model as a checkpoint
model_filename = 'saved_models_synth\\finalized_model_domains_NB.joblib'
joblib.dump(pipeline_nb, model_filename)


print(f"Model saved as {model_filename}")

Model saved as saved_models_synth\finalized_model_domains_NB.joblib
