In [1]:
import os
import json
import papermill as pm
import datetime

### BiLSTM-CRF with our features

In [2]:
exp_settings = json.load(
    open(r"G:\PythonProjects\WineRecognition2\nn\experiment_settings.json")
)

experiment = {
    'MODEL_NAME': 'BiLSTM_CRF',
    'RUN_NAME': 'train-100WS-64-customFeatures',
    'START_TIME': None,
    'OUTPUT_DIR': None,
    'DATASET_PATH': r'G:\PythonProjects\WineRecognition2\data\text\halliday_winesearcher_menu_gen_samplesv2\Halliday_WineSearcher_MenuGenSamples.txt',
    'VOCAB_PATH': 'G:/PythonProjects/WineRecognition2/data/vocabs/Words_Halliday_Wine_AU.json',
    'DATAINFO_PATH': 'G:/PythonProjects/WineRecognition2/data_info.json',
    'DICTIONARY_PATH': r'G:\PythonProjects\WineRecognition2\data\dictionaries\Dict-byword_Halliday_Winesearcher_Wine_AU-only_completed_rows',
    'DEVICE': 'cuda',
    'BATCH_SIZE': 2048,
    'EMBEDDING_DIM': 64,
    'HIDDEN_DIM': 64,
    'NUM_EPOCHS': 150,
    'LEARNING_RATE': 0.01,
    'SCHEDULER_FACTOR': 0.1,
    'SCHEDULER_PATIENCE': 10,
    'CASE_SENSITIVE_VOCAB': False,
    'WEIGHT_DECAY': 1e-4,
    'TEST_SIZE': 0.2
}

experiment['START_TIME'] = '{:%d%m%Y_%H%M%S}'.format(datetime.datetime.now())
experiment['OUTPUT_DIR'] = f"{exp_settings['artifacts_path']}/train/{experiment['MODEL_NAME'] + '_' + experiment['START_TIME']}"

if not os.path.exists(experiment['OUTPUT_DIR']):
    os.mkdir(experiment['OUTPUT_DIR'])

pm.execute_notebook(
    input_path='train_bilstm_crf_our_features.ipynb',
    output_path=os.path.join(experiment['OUTPUT_DIR'], 'train_bilstm_crf_our_features.ipynb'),
    parameters=experiment
)

Executing:   0%|          | 0/19 [00:00<?, ?cell/s]

{'cells': [{'cell_type': 'code',
   'execution_count': 1,
   'id': 'a4df137a-7825-4ee9-bba5-c22e64e58532',
   'metadata': {'tags': [],
    'papermill': {'exception': False,
     'start_time': '2021-12-17T07:03:02.356905',
     'end_time': '2021-12-17T07:03:04.470909',
     'duration': 2.114004,
     'status': 'completed'},
    'execution': {'iopub.status.busy': '2021-12-17T07:03:02.399909Z',
     'iopub.execute_input': '2021-12-17T07:03:02.400910Z',
     'iopub.status.idle': '2021-12-17T07:03:04.469905Z',
     'shell.execute_reply': '2021-12-17T07:03:04.469905Z'}},
   'outputs': [],
   'source': "import sys\nimport json\n\nimport torch\nfrom torch.optim import Adam\nfrom torch.utils.data import DataLoader\nfrom torch.optim.lr_scheduler import ReduceLROnPlateau\nfrom tqdm.notebook import tqdm\nfrom sklearn.model_selection import train_test_split\nimport numpy as np\n\nif r'G:\\PythonProjects\\WineRecognition2' not in sys.path:\n    sys.path.insert(0, r'G:\\PythonProjects\\WineRecognitio

### Standard BiLSTM-CRF

In [2]:
exp_settings = json.load(
    open(r"G:\PythonProjects\WineRecognition2\nn\experiment_settings.json")
)

for embed_dim in [100, 64]:
    experiment = {
        'MODEL_NAME': 'BiLSTM_CRF',
        'RUN_NAME': f'train-100WS-{embed_dim}',
        'START_TIME': None,
        'OUTPUT_DIR': None,
        'DATASET_PATH': r'G:\PythonProjects\WineRecognition2\data\text\halliday_winesearcher_menu_gen_samplesv2\Halliday_WineSearcher_MenuGenSamples.txt',
        'VOCAB_PATH': 'G:/PythonProjects/WineRecognition2/data/vocabs/Words_Halliday_Wine_AU.json',
        'DATAINFO_PATH': 'G:/PythonProjects/WineRecognition2/data_info.json',
        'DEVICE': 'cuda',
        'BATCH_SIZE': 2048,
        'EMBEDDING_DIM': embed_dim,
        'HIDDEN_DIM': 64,
        'NUM_EPOCHS': 150,
        'LEARNING_RATE': 0.01,
        'SCHEDULER_FACTOR': 0.1,
        'SCHEDULER_PATIENCE': 10,
        'CASE_SENSITIVE_VOCAB': False,
        'WEIGHT_DECAY': 1e-4,
        'TEST_SIZE': 0.2
    }

    experiment['START_TIME'] = '{:%d%m%Y_%H%M%S}'.format(datetime.datetime.now())
    experiment['OUTPUT_DIR'] = f"{exp_settings['artifacts_path']}/train/{experiment['MODEL_NAME'] + '_' + experiment['START_TIME']}"
    
    if not os.path.exists(experiment['OUTPUT_DIR']):
        os.mkdir(experiment['OUTPUT_DIR'])
    
    pm.execute_notebook(
        input_path='train_bilstm_crf.ipynb',
        output_path=os.path.join(experiment['OUTPUT_DIR'], 'train_bilstm_crf.ipynb'),
        parameters=experiment
    )

Executing:   0%|          | 0/19 [00:00<?, ?cell/s]

Executing:   0%|          | 0/19 [00:00<?, ?cell/s]

In [None]:
exp_settings = json.load(
    open(r"G:\PythonProjects\WineRecognition2\nn\experiment_settings.json")
)

for percent in range(85, 101, 5):
    for embedding_dim in [16, 32, 64, 128, 256]:
        experiment = {
            'MODEL_NAME': 'BiLSTM_CRF',
            'RUN_NAME': 'Train-{}-{}'.format(percent, embedding_dim),
            'START_TIME': None,
            'OUTPUT_DIR': None,
            'DATASET_PATH': 'G:/PythonProjects/WineRecognition2/data/text/exp2_datasets/Halliday_WineSearcher_{}.txt'.format(percent),
            'VOCAB_PATH': 'G:/PythonProjects/WineRecognition2/data/vocabs/Words_Halliday_Wine_AU.json',
            'DATAINFO_PATH': 'G:/PythonProjects/WineRecognition2/data_info.json',
            'DEVICE': 'cuda',
            'BATCH_SIZE': 2048,
            'EMBEDDING_DIM': embedding_dim,
            'HIDDEN_DIM': 64,
            'NUM_EPOCHS': 100,
            'LEARNING_RATE': 0.01,
            'WEIGHT_DECAY': 1e-4,
            'TEST_SIZE': 0.2
        }
        
        experiment['START_TIME'] = '{:%d%m%Y_%H%M%S}'.format(datetime.datetime.now())
        experiment['OUTPUT_DIR'] = '{}/train/{}'.format(exp_settings['artifacts_path'], experiment['MODEL_NAME'] + '_' + experiment['START_TIME'])
        
        if not os.path.exists(experiment['OUTPUT_DIR']):
            os.mkdir(experiment['OUTPUT_DIR'])
        
        pm.execute_notebook(
            input_path='train_bilstm_crf_5_percent.ipynb',
            output_path=os.path.join(experiment['OUTPUT_DIR'], 'train_bilstm_crf_5_percent.ipynb'),
            parameters=experiment
        )