In [7]:
import pandas as pd
import yaml
import requests
import time
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, f1_score

In [2]:
train_csv = '../data/russian/hwu-20-ru/train.csv'
df = pd.read_csv(train_csv)

In [3]:
import ruamel.yaml

folded = ruamel.yaml.scalarstring.FoldedScalarString
literal = ruamel.yaml.scalarstring.LiteralScalarString

yaml = ruamel.yaml.YAML()

data = dict(
    foo=literal('- this is a\n- block literal\n'), 
    bar=folded('this is a folded block\n'),
)

yaml.dump(data, sys.stdout)

foo: |
  - this is a
  - block literal
bar: >
  this is a folded block


In [4]:
# convert dataframe to rasa yaml format
# then train model rasa train nlu
rasa_yaml = {'version': '3.1', 'nlu': []}

nlu = {}
for index, row in df.iterrows():
    if row['intent'] not in nlu:
        nlu[row['intent']] = [f"- {row['phrase']}"]
    else:
        nlu[row['intent']].append(f"- {row['phrase']}")    

for intent, phrases in nlu.items():    
    rasa_yaml['nlu'].append({'intent': intent, 'examples': literal("\n".join(phrases) + "\n")})

with open('data/nlu.yml', 'w') as outfile:
    yaml.dump(rasa_yaml, outfile)

In [5]:
def parse_data(path):
    df = pd.read_csv(path)
    X = []
    y = []
    docs = {}
    for _, row in df.iterrows():
        X.append(row['phrase'])
        y.append(row['intent'])           
        if row['intent'] not in docs:
            docs[row['intent']] = {'question': row['phrase'], 'answer': row['intent'], 'name': row['intent'], 'paraphrased_questions': []}
        else:
            docs[row['intent']]['paraphrased_questions'].append(row['phrase']) 
    return docs, X, y

In [6]:
# run rasa server and get predictions rasa run --enable-api
test_path = '../data/russian/hwu-20-ru/test.csv'
docs_test, X_test, y_test = parse_data(test_path)

y_pred = []
times = []
for row in X_test:
    start = time.time()
    r = requests.post('http://localhost:5005/model/parse', json={'text': row})
    times.append(time.time() - start)
    y_pred.append(r.json()['intent']['name'])

In [8]:
print(classification_report(y_test, y_pred))
print("Accuracy: ", accuracy_score(y_test, y_pred)) 
print("F1-Score: ", f1_score(y_test, y_pred, average='macro')) 
print(f"Mean response time: {np.mean(times)} +- {np.std(times)} sec.")

                       precision    recall  f1-score   support

    audio_volume_down       1.00      1.00      1.00         5
       calendar_query       0.50      0.40      0.44         5
      calendar_remove       0.80      0.80      0.80         5
         calendar_set       1.00      0.60      0.75         5
     datetime_convert       0.83      1.00      0.91         5
       general_affirm       1.00      1.00      1.00         5
      general_confirm       1.00      1.00      1.00         5
         general_joke       0.83      1.00      0.91         5
       general_quirky       0.00      0.00      0.00         5
       general_repeat       0.83      1.00      0.91         5
           iot_coffee       1.00      0.80      0.89         5
  iot_hue_lightchange       1.00      1.00      1.00         5
     iot_hue_lightoff       1.00      1.00      1.00         5
         iot_wemo_off       1.00      0.80      0.89         5
         nlu_fallback       0.00      0.00      0.00  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
