In [1]:
import os

for dirname, _, filenames in os.walk('/kaggle/input'):
    print(dirname)
    for filename in filenames:
        print('   ', filename)

/kaggle/input
    model.pt
/kaggle/input/data
/kaggle/input/data/vped
    train-dev-statistics.txt
    intent_label.txt
    README.md
    slot_label.txt
/kaggle/input/data/vped/test
    label
    seq_intent.out
    seq.out
    seq.in
/kaggle/input/data/vped/train
    label
    seq_intent.out
    seq.out
    seq.in
/kaggle/input/data/vped/dev
    label
    seq_intent.out
    seq.out
    seq.in


In [2]:
!pip -q install "torch>=2.1.0" "transformers==4.41.0" "seqeval" "pytorch-crf" "fastapi" "uvicorn[standard]" "pyngrok" "underthesea==6.8.4"

import os
import sys
import json
import math
import torch
import numpy as np
from types import SimpleNamespace

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.8/43.8 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.1/9.1 MB[0m [31m70.0 MB/s[0m eta [36m0:00:00[0m:00:01[0m0:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.9/20.9 MB[0m [31m69.3 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m657.8/657.8 kB[0m [31m31.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m98.2 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━

In [3]:
# ====== CONFIG: CHỈNH LẠI CHO HỢP DATASET CỦA BẠN ======
TASK = "vped"  # hoặc "vped"
DATA_DIR = "/kaggle/input/data"  # phải có data/<task>/*
MODEL_DIR = "/kaggle/input/"  # chứa model.pt
MODEL_NAME = "vinai/phobert-base-v2"
NGROK_AUTH_TOKEN = "37TGN3iCi5w84B3afiT8PUqDgHT_3w3jsSsnyr4CxL6uXV5t5"  # dán token của bạn (tuỳ chọn, nhưng nên có để ổn định)


In [4]:
args = SimpleNamespace(
    task=TASK,
    data_dir=DATA_DIR,
    intent_label_file="intent_label.txt",
    slot_label_file="slot_label.txt",
    model_type="phobert",
    model_name_or_path=MODEL_NAME,
    dropout_rate=0.1,
    use_crf=False,
    num_mask=4,
    cls_token_cat=1,
    intent_attn=1,
    tag_intent=1,
    ignore_index=-100,
    intent_loss_coef=1.0,
    slot_loss_coef=2.0,
    token_intent_loss_coef=2.0,
    tag_intent_coef=1.0,
    max_seq_len=128,
    no_cuda=False
)

In [5]:
# ====== processors/label_loader.py (nguyên gốc) ======
def load_labels_from_file(file_path: str):
    labels = []
    with open(file_path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if line and not line.startswith("#"):
                labels.append(line)
    return labels

def get_label_mappings(args):
    import os
    intent_label_path = os.path.join(args.data_dir, args.task, args.intent_label_file)
    slot_label_path = os.path.join(args.data_dir, args.task, args.slot_label_file)
    INTENT_LABELS = load_labels_from_file(intent_label_path)
    SLOT_LABELS = load_labels_from_file(slot_label_path)
    TOKEN_INTENT_LABELS = ["O"] + INTENT_LABELS
    INTENT_LABELS_WITH_PAD = ["PAD"] + INTENT_LABELS
    INTENT2ID = {intent: i for i, intent in enumerate(INTENT_LABELS)}
    ID2INTENT = {i: intent for intent, i in INTENT2ID.items()}
    SLOT2ID = {slot: i for i, slot in enumerate(SLOT_LABELS)}
    ID2SLOT = {i: slot for slot, i in SLOT2ID.items()}
    TOKINT2ID = {tokint: i for i, tokint in enumerate(TOKEN_INTENT_LABELS)}
    ID2TOKINT = {i: tokint for tokint, i in TOKINT2ID.items()}
    TAGINT2ID = {intent: i for i, intent in enumerate(INTENT_LABELS_WITH_PAD)}
    ID2TAGINT = {i: intent for intent, i in TAGINT2ID.items()}
    mappings = {
        'INTENT_LABELS': INTENT_LABELS,
        'SLOT_LABELS': SLOT_LABELS,
        'TOKEN_INTENT_LABELS': TOKEN_INTENT_LABELS,
        'INTENT_LABELS_WITH_PAD': INTENT_LABELS_WITH_PAD,
        'INTENT2ID': INTENT2ID,
        'ID2INTENT': ID2INTENT,
        'SLOT2ID': SLOT2ID,
        'ID2SLOT': ID2SLOT,
        'TOKINT2ID': TOKINT2ID,
        'ID2TOKINT': ID2TOKINT,
        'TAGINT2ID': TAGINT2ID,
        'ID2TAGINT': ID2TAGINT
    }
    return INTENT_LABELS, SLOT_LABELS, mappings

In [6]:
# ====== models/layers.py (nguyên gốc) ======
import torch.nn as nn
import torch

class MultiIntentClassifier(nn.Module):
    def __init__(self, input_dim, num_intent_labels, dropout_rate=0.):
        super(MultiIntentClassifier, self).__init__()
        self.dropout = nn.Dropout(dropout_rate)
        self.linear = nn.Linear(input_dim, num_intent_labels)
        self.sigmoid = nn.Sigmoid()
        self.reset_params()
    def forward(self, x):
        x = self.dropout(x)
        x = self.linear(x)
        return self.sigmoid(x)
    def reset_params(self):
        nn.init.uniform_(self.linear.weight)
        nn.init.uniform_(self.linear.bias)

class SlotClassifier(nn.Module):
    def __init__(self, input_dim, num_slot_labels, dropout_rate=0.2):
        super(SlotClassifier, self).__init__()
        self.dropout = nn.Dropout(dropout_rate)
        self.linear = nn.Linear(input_dim, num_slot_labels)
    def forward(self, x):
        x = self.dropout(x)
        return self.linear(x)

class IntentTokenClassifier(nn.Module):
    def __init__(self, input_dim, num_intent_labels, dropout_rate=0.):
        super(IntentTokenClassifier, self).__init__()
        self.dropout = nn.Dropout(dropout_rate)
        self.linear = nn.Linear(input_dim, num_intent_labels)
    def forward(self, x):
        x = self.dropout(x)
        return self.linear(x)

class TagIntentClassifier(nn.Module):
    def __init__(self, input_dim, num_intent_labels, dropout_rate=0.):
        super(TagIntentClassifier, self).__init__()
        self.dropout = nn.Dropout(dropout_rate)
        self.linear = nn.Linear(input_dim, num_intent_labels)
        self.softmax = nn.Softmax(dim=1)
    def forward(self, x):
        x = self.dropout(x)
        return self.softmax(self.linear(x))

class BiaffineTagIntentClassifier(nn.Module):
    """
    score = h_cls^T U r + W [h_cls; r] + b
    """
    def __init__(self, input_dim, num_intent_labels, dropout_rate=0.):
        super(BiaffineTagIntentClassifier, self).__init__()
        self.input_dim = input_dim
        self.num_intent_labels = num_intent_labels
        self.dropout = nn.Dropout(dropout_rate)
        # U: [num_labels, H, H] (khớp checkpoint)
        self.U = nn.Parameter(torch.Tensor(num_intent_labels, input_dim, input_dim))
        self.W = nn.Linear(2 * input_dim, num_intent_labels)
        self.softmax = nn.Softmax(dim=1)
        self.reset_params()
    def forward(self, h_cls, r):
        h_cls = self.dropout(h_cls)
        r = self.dropout(r)
        bilinear_scores = torch.einsum('bh,chd,bd->bc', h_cls, self.U, r)
        concat = torch.cat([h_cls, r], dim=1)
        linear_scores = self.W(concat)
        scores = bilinear_scores + linear_scores
        return self.softmax(scores)
    def reset_params(self):
        nn.init.xavier_uniform_(self.U)
        nn.init.xavier_uniform_(self.W.weight)
        nn.init.zeros_(self.W.bias)

In [7]:
# ====== models/vslim.py (nguyên gốc, rút gọn forward infer như repo) ======
from transformers import AutoModel
from torchcrf import CRF

class VSLIM(nn.Module):
    def __init__(self,
             model_name,
             num_slots,
             num_intents,
             num_token_intents,
             num_tag_intents,
             dropout=0.1,
             use_crf=False,
             num_mask=4,
             cls_token_cat=True,
             intent_attn=True,
             use_biaffine_tag_intent=True,
             args=None):
      super().__init__()
      self.encoder = AutoModel.from_pretrained(model_name)
      hidden_size = self.encoder.config.hidden_size
      self.multi_intent_classifier = MultiIntentClassifier(hidden_size, num_intents, dropout)
      self.slot_classifier = SlotClassifier(hidden_size, num_slots, dropout)
      self.intent_token_classifier = IntentTokenClassifier(hidden_size, num_token_intents, dropout)
      self.use_biaffine_tag_intent = use_biaffine_tag_intent
      if use_biaffine_tag_intent:
          self.biaffine_tag_intent_classifier = BiaffineTagIntentClassifier(hidden_size, num_tag_intents, dropout)
      else:
          tag_input_dim = 2 * hidden_size if cls_token_cat else hidden_size
          self.tag_intent_classifier = TagIntentClassifier(tag_input_dim, num_tag_intents, dropout)
      if use_crf:
          self.crf = CRF(num_tags=num_slots, batch_first=True)
      self.use_crf = use_crf
      self.num_mask = num_mask
      self.cls_token_cat = cls_token_cat
      self.intent_attn = intent_attn
      self.num_intents = num_intents
      self.args = args

    def forward(self, input_ids, attention_mask, token_type_ids=None,
                intent_label_ids=None, slot_labels_ids=None,
                intent_token_ids=None, B_tag_mask=None, BI_tag_mask=None,
                tag_intent_label=None):
        outputs = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
        sequence_output = outputs.last_hidden_state
        pooled_output = outputs.pooler_output
        intent_logits = self.multi_intent_classifier(pooled_output)
        slot_logits = self.slot_classifier(sequence_output)
        intent_token_logits = self.intent_token_classifier(sequence_output)
        tag_intent_logits = None
        if B_tag_mask is not None and BI_tag_mask is not None and tag_intent_label is not None:
            if BI_tag_mask.type() != torch.float32:
                BI_tag_mask = BI_tag_mask.type(torch.float32)
            if B_tag_mask.type() != torch.float32:
                B_tag_mask = B_tag_mask.type(torch.float32)
            tag_intent_vec = torch.einsum('bml,bld->bmd', BI_tag_mask, sequence_output)
            if self.use_biaffine_tag_intent:
                h_cls = pooled_output.unsqueeze(1).repeat(1, self.num_mask, 1)
                batch_size = h_cls.size(0)
                h_cls_flat = h_cls.view(batch_size * self.num_mask, -1)
                r_flat = tag_intent_vec.view(batch_size * self.num_mask, -1)
                tag_intent_logits = self.biaffine_tag_intent_classifier(h_cls_flat, r_flat)
            else:
                if self.cls_token_cat:
                    cls_token = pooled_output.unsqueeze(1).repeat(1, self.num_mask, 1)
                    tag_intent_vec = torch.cat((cls_token, tag_intent_vec), dim=2)
                tag_intent_vec = tag_intent_vec.view(tag_intent_vec.size(0) * tag_intent_vec.size(1), -1)
                tag_intent_logits = self.tag_intent_classifier(tag_intent_vec)
            if self.intent_attn:
                intent_probs = intent_logits.unsqueeze(1).repeat(1, self.num_mask, 1)
                intent_probs = intent_probs.view(intent_probs.size(0) * intent_probs.size(1), -1)
                pad_probs = torch.zeros(intent_probs.size(0), 1, device=intent_probs.device)
                intent_probs_expanded = torch.cat([pad_probs, intent_probs], dim=1)
                tag_intent_logits = tag_intent_logits * intent_probs_expanded
                tag_intent_logits = tag_intent_logits.div(tag_intent_logits.sum(dim=1, keepdim=True) + 1e-10)
        # return {
        #     "intent_logits": intent_logits,
        #     "slot_logits": slot_logits,
        #     "intent_token_logits": intent_token_logits,
        #     "tag_intent_logits": tag_intent_logits if B_tag_mask is not None else None
        # }
        return intent_logits, pooled_output

In [9]:
# ====== predict.py (nguyên gốc, phần inference) ======
from transformers import AutoTokenizer
from seqeval.metrics.sequence_labeling import get_entities

# --- Thay thế/Cập nhật trong Cell 6 (Predictor) ---
class VSLIMPredictor:
    def __init__(self, model, tokenizer, mappings, device, args):
        self.model = model
        self.tokenizer = tokenizer
        self.mappings = mappings
        self.device = device
        self.intent_labels = mappings['INTENT_LABELS']
        self.model.to(device)
        self.model.eval()

    def predict_single(self, tokens, threshold=0.5):
        # 1. Chuẩn bị Input
        # tokens ở đây là output của underthesea (đã tách từ)
        # Convert sang Input IDs
        encoding = self.tokenizer.encode_plus(
            tokens, 
            is_split_into_words=True, 
            return_tensors="pt",
            max_length=128,
            padding='max_length',
            truncation=True
        )
        
        input_ids = encoding["input_ids"].to(self.device)
        attention_mask = encoding["attention_mask"].to(self.device)
        
        # Lấy BPE Tokens để hiển thị (ví dụ: ['<s>', 'Thêm', '50@@', 'k', ...])
        bpe_tokens = self.tokenizer.convert_ids_to_tokens(input_ids[0])
        # Lọc bỏ padding để gọn
        bpe_tokens = [t for t in bpe_tokens if t != '<pad>']

        # 2. Inference
        with torch.no_grad():
            # Model trả về (logits, h_cls)
            intent_logits, h_cls_tensor = self.model(input_ids, attention_mask)
            
        # 3. Xử lý kết quả
        # Logits đã qua Sigmoid trong class MultiIntentClassifier chưa? 
        # Nếu class MultiIntentClassifier của bạn ĐÃ CÓ Sigmoid ở forward thì dùng luôn.
        # Nếu chưa (chỉ là Linear), thì phải torch.sigmoid(intent_logits).
        # Giả sử class MultiIntentClassifier của bạn đã có self.sigmoid(x).
        
        intent_probs = intent_logits[0].cpu().numpy() # Đã là xác suất [0-1]
        h_cls_vec = h_cls_tensor[0].cpu().numpy().tolist() # Vector 768 chiều

        # 4. Map ID sang Label & Filter theo Threshold
        res_intents = []
        prob_dict = {}
        
        for idx, prob in enumerate(intent_probs):
            label = self.intent_labels[idx]
            prob_dict[label] = float(prob) # Lưu xác suất để hiển thị
            if prob > threshold:
                res_intents.append(label)
        
        if not res_intents:
            res_intents = ["none"]

        # 5. Đóng gói kết quả chi tiết
        return {
            "tokenized_text": tokens,          # Kết quả Underthesea
            "bpe_tokens": bpe_tokens,          # Kết quả BPE
            "h_cls_vector": h_cls_vec[:10],    # Lấy 10 chiều đầu minh họa
            "intent_probabilities": prob_dict, # Tất cả xác suất
            "final_intents": res_intents       # Kết quả cuối
        }

In [10]:
# ====== Load mappings, tokenizer, model (CUDA nếu có) ======
INTENT_LABELS, SLOT_LABELS, mappings = get_label_mappings(args)
device = "cuda" if (not args.no_cuda and torch.cuda.is_available()) else "cpu"

from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)

model = VSLIM(
    model_name=args.model_name_or_path,
    num_slots=len(mappings['SLOT_LABELS']),
    num_intents=len(mappings['INTENT_LABELS']),
    num_token_intents=len(mappings['TOKEN_INTENT_LABELS']),
    num_tag_intents=len(mappings['INTENT_LABELS_WITH_PAD']),
    dropout=args.dropout_rate,
    use_crf=args.use_crf,
    num_mask=args.num_mask,
    cls_token_cat=(args.cls_token_cat == 1),
    intent_attn=(args.intent_attn == 1),
    use_biaffine_tag_intent=(args.tag_intent == 1),
    args=args
)
state = torch.load(os.path.join(MODEL_DIR, "model.pt"), map_location="cpu")
model.load_state_dict(state, strict=True)
model.to(device)

predictor = VSLIMPredictor(
    model=model,
    tokenizer=tokenizer,
    mappings=mappings,
    device=device,
    args=args
)



config.json:   0%|          | 0.00/678 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

bpe.codes: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/540M [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at vinai/phobert-base-v2 and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
# ====== Tokenize underthesea theo định dạng seq.in (cụm nối "_") ======
from underthesea import word_tokenize

def tokenize_underthesea_seqin(text: str):
    s = word_tokenize(text, format="text")  # tạo "quận_1", ...
    tokens = s.split()
    return tokens


In [12]:
# ====== BIO -> entities: key = đúng slot label (bỏ tiền tố B-/I-) ======
def extract_entities_from_bio(tokens, slot_tags, token_intents):
    entities = []
    n = len(tokens)
    i = 0
    while i < n:
        tag = slot_tags[i]
        if tag.startswith("B-"):
            typ = tag[2:]  # ví dụ: "fromloc.city_name"
            j = i + 1
            while j < n and slot_tags[j] == f"I-{typ}":
                j += 1
            span_tokens = tokens[i:j]
            text = " ".join([t.replace("_", " ") for t in span_tokens])
            intents_in_span = [ti for ti in token_intents[i:j] if ti != "O"]
            intent = None
            if intents_in_span:
                counts = {}
                for it in intents_in_span:
                    counts[it] = counts.get(it, 0) + 1
                intent = max(counts.items(), key=lambda x: x[1])[0]
            entities.append({"key": typ, "text": text, "intent": intent if intent else None})
            i = j
        else:
            i += 1
    return entities

# --- Thay thế Cell 9 (Build Response) ---
def build_response_schema(utterance: str, result: dict):
    # Trả về nguyên dict result từ predictor
    return {
        "utterance": utterance,
        "debug_info": {
            "tokenized": result["tokenized_text"],
            "bpe": result["bpe_tokens"],
            "h_cls_sample": result["h_cls_vector"]
        },
        "probabilities": result["intent_probabilities"],
        "intents": result["final_intents"]
    }

# --- Cell 11 giữ nguyên, API sẽ tự động trả về cấu trúc mới ---

In [13]:
# ====== FastAPI + ngrok (sửa phần chạy server trong thread) ======
from fastapi import FastAPI
from pydantic import BaseModel
import uvicorn
import nest_asyncio
from threading import Thread
from pyngrok import ngrok
import asyncio
from uvicorn import Config, Server

class ParseIn(BaseModel):
    utterance: str

app = FastAPI()

@app.get("/health")
def health():
    return {"status": "ok", "device": device}

@app.post("/parse")
async def parse(req: ParseIn):
    text = req.utterance.strip()
    tokens = tokenize_underthesea_seqin(text)
    result = predictor.predict_single(tokens, threshold=0.9)
    return build_response_schema(text, result)

PORT = 8000
nest_asyncio.apply()

# def run_server():
#     loop = asyncio.new_event_loop()
#     asyncio.set_event_loop(loop)
#     config = Config(app=app, host="0.0.0.0", port=PORT, log_level="info", loop="asyncio")
#     server = Server(config)
#     loop.run_until_complete(server.serve())

def run_server():
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    config = Config(
        app=app,
        host="0.0.0.0",
        port=PORT,
        loop="asyncio",
        log_level="warning",   # giảm log
        access_log=False       # tắt access log để không chen vào response
    )
    server = Server(config)
    loop.run_until_complete(server.serve())

server_thread = Thread(target=run_server, daemon=True)
server_thread.start()

# if NGROK_AUTH_TOKEN:
#     ngrok.set_auth_token(NGROK_AUTH_TOKEN)
# public_url = ngrok.connect(PORT, "http")
# print("PUBLIC URL:", public_url.public_url)

from pyngrok import ngrok, conf

NGROK_HOSTNAME = "hypothalamic-lianne-unfurnitured.ngrok-free.dev"  # domain static của bạn

if NGROK_AUTH_TOKEN:
    conf.get_default().auth_token = NGROK_AUTH_TOKEN

# Dùng hostname cố định
tunnel = ngrok.connect(addr=PORT, proto="http", hostname=NGROK_HOSTNAME)
public_url = tunnel.public_url
print("PUBLIC URL:", public_url)

PUBLIC URL: https://hypothalamic-lianne-unfurnitured.ngrok-free.dev                                 


In [14]:
"""
curl -s https://hypothalamic-lianne-unfurnitured.ngrok-free.dev/health        

curl -X POST https://hypothalamic-lianne-unfurnitured.ngrok-free.dev/predict \
     -H "Content-Type: application/json" \
     -d '{"text": "Thêm tiền đổ xăng 50k và xóa giao dịch hôm qua"}'
     
curl -s https://unwormy-heteronomously-adalberto.ngrok-free.dev/health                                              

curl -s -X POST https://unwormy-heteronomously-adalberto.ngrok-free.dev/parse \
  -H "Content-Type: application/json" \
  -d '{"utterance":"hôm nay tôi ăn bún bò hết 50k, xoá tiền trà sữa 30k hôm qua, sửa lại cái tiền điện hôm qua thành 1 triệu rưỡi"}'

curl -s -X POST https://unwormy-heteronomously-adalberto.ngrok-free.dev/parse \
  -H "Content-Type: application/json" \
  -d '{"utterance":"Ăn bánh cuốn 15k với xoá giao dịch đổ xăng petro hôm kia thì phải"}'

curl -s -X POST https://unwormy-heteronomously-adalberto.ngrok-free.dev/parse \
  -H "Content-Type: application/json" \
  -d '{"utterance":"Hôm nay trời đẹp lắm"}'
"""

'\ncurl -s https://unwormy-heteronomously-adalberto.ngrok-free.dev/health                                              \n\ncurl -s -X POST https://unwormy-heteronomously-adalberto.ngrok-free.dev/parse   -H "Content-Type: application/json"   -d \'{"utterance":"hôm nay tôi ăn bún bò hết 50k, xoá tiền trà sữa 30k hôm qua, sửa lại cái tiền điện hôm qua thành 1 triệu rưỡi"}\'\n\ncurl -s -X POST https://unwormy-heteronomously-adalberto.ngrok-free.dev/parse   -H "Content-Type: application/json"   -d \'{"utterance":"Ăn bánh cuốn 15k với xoá giao dịch đổ xăng petro hôm kia thì phải"}\'\n\ncurl -s -X POST https://unwormy-heteronomously-adalberto.ngrok-free.dev/parse   -H "Content-Type: application/json"   -d \'{"utterance":"Hôm nay trời đẹp lắm"}\'\n'

In [15]:
# {"intents":["add_expense","delete_expense"],"entities":[{"key":"target_description","text":"bánh cuốn","intent":"add_expense"},{"key":"target_price","text":"15 k","intent":"add_expense"},{"key":"condition_description","text":"xăng petro","intent":"delete_expense"},{"key":"condition_date","text":"hôm kia","intent":"delete_expense"}],"dev":[0.9999986886978149,7.823957275832072e-06,1.0,9.162023161479738e-06,3.7504628380702343e-06]}% 

In [16]:
"""
(base) minhkha@Khas-Mac ~ % curl -s https://64ece6504d06.ngrok-free.app/health
{"status":"ok","device":"cuda"}%                                                                                                                                                                            

(base) min(base) minhkha@Khas-Mac ~ % curl -s -X POST https://64ece6504d06.ngrok-free.app/parse \
  -H "Content-Type: application/json" \
  -d '{"utterance":"hôm nay tôi ăn bún bò hết 50k"}'
{"intents":["add_expense"],
"entities":[{"key":"target_date","text":"hôm nay","intent":"add_expense"},
{"key":"target_description","text":"bún bò","intent":"add_expense"},
{"key":"target_price","text":"50 k","intent":"add_expense"}]}

(base) minhkha@Khas-Mac ~ % curl -s -X POST https://64ece6504d06.ngrok-free.app/parse \
  -H "Content-Type: application/json" \
  -d '{"utterance":"hôm nay tôi ăn bún bò hết 50k, hôm qua sửa xe 30k"}'
{"intents":["add_expense"],
"entities":[
    {"key":"target_date","text":"hôm nay","intent":"add_expense"},
    {"key":"target_description","text":"bún bò","intent":"add_expense"},
    {"key":"target_price","text":"50 k","intent":"add_expense"},
    {"key":"target_date","text":"hôm qua","intent":"add_expense"},
    {"key":"target_description","text":"sửa xe","intent":"add_expense"},
    {"key":"target_price","text":"30 k","intent":"add_expense"}
]}

curl -s -X POST https://64ece6504d06.ngrok-free.app/parse \
  -H "Content-Type: application/json" \
  -d '{"utterance":"hôm nay tôi ăn bún bò hết 50k, xoá tiền trà sữa 30k hôm qua"}'
{"intents":["add_expense","delete_expense"],
"entities":[
    {"key":"target_date","text":"hôm nay","intent":"add_expense"},
    {"key":"target_description","text":"bún bò","intent":"add_expense"},
    {"key":"target_price","text":"50 k","intent":"add_expense"},
    {"key":"condition_description","text":"trà sữa","intent":"delete_expense"},
    {"key":"condition_price","text":"30 k","intent":"delete_expense"},
    {"key":"condition_date","text":"hôm qua","intent":"delete_expense"}]}%

(base) minhkha@Khas-Mac ~ % curl -s -X POST https://64ece6504d06.ngrok-free.app/parse \
  -H "Content-Type: application/json" \
  -d '{"utterance":"hôm nay tôi ăn bún bò hết 50k, xoá tiền trà sữa 30k hôm qua, sửa lại cái tiền điện hôm qua thành 1 triệu rưỡi"}'
{"intents":["add_expense","update_expense","delete_expense"],
"entities":[
    {"key":"target_date","text":"hôm nay","intent":"add_expense"},
    {"key":"target_description","text":"bún bò","intent":"add_expense"},
    {"key":"target_price","text":"50 k","intent":"add_expense"},
    {"key":"condition_description","text":"trà sữa","intent":"delete_expense"},
    {"key":"condition_price","text":"30 k","intent":"delete_expense"},
    {"key":"condition_date","text":"hôm qua","intent":"delete_expense"},
    {"key":"condition_description","text":"điện","intent":"update_expense"},
    {"key":"condition_date","text":"hôm qua","intent":"update_expense"},
    {"key":"target_price","text":"1 triệu rưỡi","intent":"update_expense"}
]}% 

{"intents":["add_expense","delete_expense"],
"entities":[
    {"key":"target_description","text":"bánh cuốn","intent":"add_expense"},
    {"key":"target_price","text":"15 k","intent":"add_expense"},
    {"key":"condition_description","text":"xăng petro","intent":"delete_expense"},
    {"key":"condition_date","text":"hôm kia","intent":"delete_expense"}
    ],
"dev":[0.9999986886978149,7.823957275832072e-06,1.0,9.162023161479738e-06,3.7504628380702343e-06]}% 
"""

'\n(base) minhkha@Khas-Mac ~ % curl -s https://64ece6504d06.ngrok-free.app/health\n{"status":"ok","device":"cuda"}%                                                                                                                                                                            \n\n(base) min(base) minhkha@Khas-Mac ~ % curl -s -X POST https://64ece6504d06.ngrok-free.app/parse   -H "Content-Type: application/json"   -d \'{"utterance":"hôm nay tôi ăn bún bò hết 50k"}\'\n{"intents":["add_expense"],\n"entities":[{"key":"target_date","text":"hôm nay","intent":"add_expense"},\n{"key":"target_description","text":"bún bò","intent":"add_expense"},\n{"key":"target_price","text":"50 k","intent":"add_expense"}]}\n\n(base) minhkha@Khas-Mac ~ % curl -s -X POST https://64ece6504d06.ngrok-free.app/parse   -H "Content-Type: application/json"   -d \'{"utterance":"hôm nay tôi ăn bún bò hết 50k, hôm qua sửa xe 30k"}\'\n{"intents":["add_expense"],\n"entities":[\n    {"key":"target_date","text":"hôm