In [1]:
import sys
import json
import torch

if r'G:\PythonProjects\WineRecognition2' not in sys.path:
    sys.path.insert(0, r'G:\PythonProjects\WineRecognition2')

from nn.utils import generate_tag_to_ix
from nn.mlflow_utils import log_mlflow_on_test

In [2]:
MODEL_NAME = 'BiLSTM_CRF'
MODEL_PATH = 'G:/PythonProjects/WineRecognition2/artifacts/train/BiLSTM_CRF_10112021_032950/model/data/model.pth'
RUN_NAME = 'Test-100-256'
START_TIME = ''
OUTPUT_DIR = 'G:/PythonProjects/WineRecognition2/artifacts/test/test'
DATA_PATH = 'G:/PythonProjects/WineRecognition2/data/menus/Wines.txt'
VOCAB_PATH = 'G:/PythonProjects/WineRecognition2/data/vocabs/Words_Halliday_Wine_AU.json'
DATAINFO_PATH = 'G:/PythonProjects/WineRecognition2/data_info.json'
DEVICE = 'cpu'

In [3]:
with open(VOCAB_PATH, 'r', encoding='utf-8') as file:
    word_to_ix = json.load(file)
len(word_to_ix)

12139

In [4]:
x_test = []
for line in open(DATA_PATH, encoding='utf-8'):
    line = line.strip()
    if line.startswith('http') or not line:
        continue
    x_test.append(line.split())

In [5]:
x_tensor = []
for index, line in enumerate(x_test):
    x_tensor.append(torch.tensor([word_to_ix[word] if word in word_to_ix else word_to_ix['UNK'] for word in line], dtype=torch.int64))
x_tensor

In [6]:
model = torch.load(MODEL_PATH).to(DEVICE)
model.eval()

BiLSTM_CRF(
  (embedding): Embedding(12139, 256, padding_idx=12137)
  (lstm): LSTM(256, 64, batch_first=True, bidirectional=True)
  (hidden2tags): Linear(in_features=128, out_features=15, bias=True)
  (crf): CRF(num_tags=15)
)

In [7]:
y_pred = []
with torch.no_grad():
    for input in x_tensor:
        input = input.to(DEVICE)
        y_pred.append(model(input.unsqueeze(0)))

  score = torch.where(mask[i].unsqueeze(1), next_score, score)


In [8]:
with open(DATAINFO_PATH) as file:
    tag_to_ix = generate_tag_to_ix(json.load(file)['keys']['all'] + ['Punctuation'])
ix_to_tag = {value: key for key, value in tag_to_ix.items()}
ix_to_tag

{0: 'Add_TradeName',
 1: 'Add_Brand',
 2: 'Add_KeyWordTrue',
 3: 'Add_KeyWordFalse',
 4: 'Add_GrapeVarieties',
 5: 'Add_GeoIndication',
 6: 'Add_WineType',
 7: 'Add_BottleSize',
 8: 'Add_Sweetness',
 9: 'Add_WineColor',
 10: 'Add_ClosureType',
 11: 'Add_Certificate',
 12: 'Add_Vintage',
 13: 'Add_Price',
 14: 'Punctuation'}

In [9]:
for index, pred in enumerate(y_pred):
    y_pred[index] = [ix_to_tag[tag] for tag in pred[0]]

In [10]:
run_params = {
    'model_name': MODEL_NAME,
    'model_path': MODEL_PATH,
    'run_name': RUN_NAME,
    'start_time': START_TIME,
    'output_dir': OUTPUT_DIR,
    'data_path': DATA_PATH,
    'vocab_path': VOCAB_PATH,
    'datainfo_path': DATAINFO_PATH,
    'device': DEVICE
}

In [11]:
log_mlflow_on_test(
    run_params,
    classes=list(ix_to_tag.values()),
    x=x_test,
    y=y_pred
)

['%-20s', '%-20s', '%-20s', '%-20s', '%-20s', '%-20s', '%-20s', '%-20s', '%-20s', '%-20s']
['Add_Vintage', 'Add_TradeName', 'Add_GeoIndication', 'Add_GeoIndication', 'Add_KeyWordTrue', 'Add_GeoIndication', 'Add_GeoIndication', 'Add_GrapeVarieties', 'Add_GrapeVarieties', 'Add_KeyWordTrue']
['2018', 'Oakridge', 'Yarra', 'Valley', 'Series', 'Yarra', 'Valley', 'Pinot', 'Noir', '75']
['%-20s', '%-20s', '%-20s', '%-20s', '%-20s', '%-20s', '%-20s', '%-20s', '%-20s', '%-20s']
['Add_Vintage', 'Add_TradeName', 'Add_TradeName', 'Add_KeyWordTrue', 'Add_KeyWordFalse', 'Add_GeoIndication', 'Add_GeoIndication', 'Add_GrapeVarieties', 'Add_GrapeVarieties', 'Add_BottleSize']
['2018', 'Giant', 'Steps', 'Nocton', 'Vineyard', 'Yarra', 'Valley', 'Pinot', 'Noir', '155']
['%-20s', '%-20s', '%-20s', '%-20s', '%-20s', '%-20s', '%-20s', '%-20s']
['Add_Vintage', 'Add_TradeName', 'Add_TradeName', 'Add_GeoIndication', 'Add_GeoIndication', 'Add_GrapeVarieties', 'Add_GrapeVarieties', 'Add_BottleSize']
['2017', 'Mount