In [None]:
import pandas as pd
import yaml
import requests
import time
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, f1_score

In [None]:
train_csv = '../data/russian/chatbot-intents/train-chatbot.csv'
df = pd.read_csv(train_csv)

In [None]:
import ruamel.yaml

folded = ruamel.yaml.scalarstring.FoldedScalarString
literal = ruamel.yaml.scalarstring.LiteralScalarString

yaml = ruamel.yaml.YAML()

data = dict(
    foo=literal('- this is a\n- block literal\n'), 
    bar=folded('this is a folded block\n'),
)

yaml.dump(data, sys.stdout)

In [None]:
# convert dataframe to rasa yaml format
# then train model rasa train nlu
rasa_yaml = {'version': '3.1', 'nlu': []}

nlu = {}
for index, row in df.iterrows():
    if row['intent'] not in nlu:
        nlu[row['intent']] = [f"- {row['phrase']}"]
    else:
        nlu[row['intent']].append(f"- {row['phrase']}")    

for intent, phrases in nlu.items():    
    rasa_yaml['nlu'].append({'intent': intent, 'examples': literal("\n".join(phrases) + "\n")})

with open('data/nlu.yml', 'w') as outfile:
    yaml.dump(rasa_yaml, outfile)

In [None]:
def parse_data(path):
    df = pd.read_csv(path)
    X = []
    y = []
    docs = {}
    for _, row in df.iterrows():
        X.append(row['phrase'])
        y.append(row['intent'])           
        if row['intent'] not in docs:
            docs[row['intent']] = {'question': row['phrase'], 'answer': row['intent'], 'name': row['intent'], 'paraphrased_questions': []}
        else:
            docs[row['intent']]['paraphrased_questions'].append(row['phrase']) 
    return docs, X, y

In [None]:
# run rasa server and get predictions rasa run --enable-api
test_path = '../data/russian/chatbot-intents/test-chatbot.csv'
docs_test, X_test, y_test = parse_data(test_path)

y_pred = []
times = []
for row in X_test:
    start = time.time()
    r = requests.post('http://localhost:5005/model/parse', json={'text': row})
    times.append(time.time() - start)
    y_pred.append(r.json()['intent']['name'])

In [None]:
print(classification_report(y_test, y_pred))
print("Accuracy: ", accuracy_score(y_test, y_pred)) 
print("F1-Score: ", f1_score(y_test, y_pred, average='macro')) 
print(f"Mean response time: {np.mean(times)} +- {np.std(times)} sec.")