In [1]:
import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import GridSearchCV

## Evaluation of the data

In [2]:
df = pd.read_table("dialog_acts.dat", header=None, names=['data'])
df['act'] = df['data'].str.split(' ').str[0]
df['sentence'] = df['data'].str.split(' ').str[1:]
df['sentence'] = df['sentence'].str.join(' ')
df.drop(labels='data', inplace=True, axis=1)

In [3]:
df.act.value_counts()/25501 * 100

inform      39.841575
request     25.465668
thankyou    12.779891
reqalts      6.850712
null         6.321321
affirm       4.533156
negate       1.705815
bye          1.043096
confirm      0.674483
hello        0.364692
repeat       0.129407
ack          0.109800
deny         0.105878
restart      0.054900
reqmore      0.019607
Name: act, dtype: float64

In [4]:
contraptions = {'doesnt': "does not",
               'im': 'i am', 
               'dont': 'do not',
               'id': 'i would'}

In [5]:
df['sentence'] = df['sentence'].replace(contraptions, regex=True)

In [6]:
df['sentence_length'] = df.sentence.str.split().apply(len)

In [7]:
df['sentence_length'].describe()

count    25501.000000
mean         3.841026
std          3.067537
min          1.000000
25%          2.000000
50%          3.000000
75%          5.000000
max         24.000000
Name: sentence_length, dtype: float64

## Evaluation of the machine learning and baseline systems

In [8]:
x = df['sentence']
y = df['act']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.10)
print(f'\nSamples in train set: {len(x_train)}')
print(f'Samples in test set: {len(x_test)}')


Samples in train set: 22950
Samples in test set: 2551


#### Baseline

In [9]:
print('Accuracy of the majority class classifier on the test samples:')
y_pred = len(y_test) * ['inform']
print(f"accuracy: {accuracy_score(y_test, y_pred)}")
print(f"precision: {precision_score(y_test, y_pred, average='macro')}")
print(f"recall: {recall_score(y_test, y_pred, average='macro')}")

Accuracy of the majority class classifier on the test samples:
accuracy: 0.3908271266170129
precision: 0.03006362512438561
recall: 0.07692307692307693


  _warn_prf(average, modifier, msg_start, len(result))


In [10]:
print('Accuracy of the rulebased classifier on the test samples')
# dictionary with act as key and corresponding words as values
rules = {'ack': ['okay', 'okay um', 'alright'], 'affirm': ['yes right', 'right', 'yes'],
         'bye': ['see you', 'good bye', 'bye'], 'confirm': ['is it'], 'deny': ['i dont want'],
         'hello': ['hi', 'hello'],
         'inform': ['looking for'], 'negate': ['no'], 'repeat': ['can you repeat that', 'what did you say'],
         'reqalts': ['how about'], 'reqmore': ['more'], 'request': ['what is', 'where'], 'restart': ['start over'],
         'thankyou': ['thank you', 'thanks']}

y_pred = []

for x in x_test:
    acts = []
    for k, v in rules.items():
        if any(keywords in x.lower() for keywords in v):
            acts.append(k)
    if not acts:
        y_pred.append('null')
    else:
        y_pred.append(random.choice(acts))

print(f"accuracy: {accuracy_score(y_test, y_pred)}")
print(f"precision: {precision_score(y_test, y_pred, average='macro')}")
print(f"recall: {recall_score(y_test, y_pred, average='macro')}")

Accuracy of the rulebased classifier on the test samples
accuracy: 0.3214425715405723
precision: 0.5241955394421667
recall: 0.5355638912942366


#### Machine learning

In [11]:
max_sentences = 0.80
min_sentences = 1
BOW_vect = CountVectorizer(max_df=max_sentences, min_df=1, lowercase=True, strip_accents='ascii')
x_train_counts = BOW_vect.fit_transform(x_train)

In [12]:
print('Finding best hyperparameters for Decision tree using grid search and cross validation.')
params = {'max_depth': list(range(1,25)),
          'min_samples_split': [10, 50,100]}
grid_search_cv = GridSearchCV(DecisionTreeClassifier(), params, verbose=1, cv=3, n_jobs=-1)
grid_search_cv.fit(x_train_counts, y_train)

x_test_counts = BOW_vect.transform(x_test)
y_pred = grid_search_cv.predict(x_test_counts)
print('Evaluation of the best hyperparameters on the test set')
print(f"accuracy: {accuracy_score(y_test, y_pred)}")
print(f"precision: {precision_score(y_test, y_pred, average='macro')}")
print(f"recall: {recall_score(y_test, y_pred, average='macro')}")

Finding best hyperparameters for Decision tree using grid search and cross validation.
Fitting 3 folds for each of 72 candidates, totalling 216 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:    5.1s
[Parallel(n_jobs=-1)]: Done 176 tasks      | elapsed:    9.4s
[Parallel(n_jobs=-1)]: Done 216 out of 216 | elapsed:   10.6s finished


Evaluation of the best hyperparameters on the test set
accuracy: 0.9572716581732654
precision: 0.7429236203993087
recall: 0.7052122728339791


  _warn_prf(average, modifier, msg_start, len(result))


In [13]:
print('Finding best parameters for SVM using grid search and cross validation.')

params = params = {
    "loss" : ["hinge", "log", "squared_hinge", "modified_huber"],
    "alpha" : [0.0001, 0.001, 0.01, 0.1],
    "penalty" : ["l2", "l1", "none", "elasticnet"],
}

grid_search_cv = GridSearchCV(SGDClassifier(), params, verbose=1, cv=3, n_jobs=-1)
grid_search_cv.fit(x_train_counts, y_train)

x_test_counts = BOW_vect.transform(x_test)
y_pred = grid_search_cv.predict(x_test_counts)
print('Evaluation of the best hyperparameters on the test set')
print(f"accuracy: {accuracy_score(y_test, y_pred)}")
print(f"precision: {precision_score(y_test, y_pred, average='macro')}")
print(f"recall: {recall_score(y_test, y_pred, average='macro')}")

Finding best parameters for SVM using grid search and cross validation.
Fitting 3 folds for each of 64 candidates, totalling 192 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 tasks      | elapsed:   11.9s
[Parallel(n_jobs=-1)]: Done 192 out of 192 | elapsed:  1.2min finished


Evaluation of the best hyperparameters on the test set
accuracy: 0.9811838494707957
precision: 0.8666384216680814
recall: 0.8857753728770729


  _warn_prf(average, modifier, msg_start, len(result))


In [14]:
print('Finding best parameters for MLP using grid search and cross validation.')
params = {
    'hidden_layer_sizes': [(10,30,10),(20,)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive'],
}

grid_search_cv = GridSearchCV(MLPClassifier(max_iter=100), params, n_jobs=-1, cv=3)
grid_search_cv.fit(x_train_counts, y_train)

x_test_counts = BOW_vect.transform(x_test)
y_pred = grid_search_cv.predict(x_test_counts)
print('Evaluation of the best hyperparameters on the test set')
print(f"accuracy: {accuracy_score(y_test, y_pred)}")
print(f"precision: {precision_score(y_test, y_pred, average='macro')}")
print(f"recall: {recall_score(y_test, y_pred, average='macro')}")

Finding best parameters for MLP using grid search and cross validation.




Evaluation of the best hyperparameters on the test set
accuracy: 0.9854958839670718
precision: 0.8878533860942674
recall: 0.8656205758743264
