-
Notifications
You must be signed in to change notification settings - Fork 1
/
config.ini
23 lines (20 loc) · 934 Bytes
/
config.ini
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
; Dataset configuration file
[demo]
; Should be one of "CSV" or "XML"
ExtractionMode = csv
; Should be one of "SpaCy", "BERT", or "PreTokenized"
; NB if using "BERT", need to have https://github.com/google-research/bert in the PYTHONPATH
Tokenizer = SpaCy
DataDirectories = data/demo_datasets/demo_labeled_dataset/csv
CSVIdentifierPattern = (file[0-9]*).csv
PlaintextDirectory = data/demo_datasets/demo_labeled_dataset/txt
PlaintextIdentifierPattern = {0}.txt
[BERT]
BERT-Base CkptFile = data/uncased_L-12_H-768_A-12/bert_model.ckpt
BERT-Base VocabFile = data/uncased_L-12_H-768_A-12/vocab.txt
BERT-Base ConfigFile = data/uncased_L-12_H-768_A-12/bert_config.json
[Static Embeddings]
; Field name here is what will be provided to CTXEMBS argument for running experiments
GoogleNews = data/GoogleNews-vectors-negative300.bin
; Should be "bin" (for binary word2vec format) or "txt" (for text word2vec format)
GoogleNews Format = bin