In [1]:
import os
import json
import numpy as np

In [2]:
import pickle

In [3]:
!pwd

/usr/itetnas04/data-scratch-01/fencai/data/diora


In [4]:
class Vocabulary(object):
    def __init__(self):
        self.word2idx = {}
        self.idx2word = {}
        self.idx = 0

    def add_word(self, word):
        if word not in self.word2idx:
            self.word2idx[word] = self.idx
            self.idx2word[self.idx] = word
            self.idx += 1

    def __call__(self, word):
        if word not in self.word2idx:
            return self.word2idx['<unk>']
        return self.word2idx[word]

    def __len__(self):
        return len(self.word2idx)

In [5]:
with open('./pytorch/data/partit_data/partnet.dict.pkl', 'rb') as r:
    x = pickle.load(r)

In [6]:
x

<__main__.Vocabulary at 0x7f23e4774c40>

## Validation

In [13]:
# 0.chair aaff8d73 model.step_329900.pt 1380e247 [all: ab1811e0 / 06db957b / 14126ca7]
# 1.table aea84162 model.step_317900.pt b3d65fcf [all: 1cc2528f / d42498e3 / 22f0f3e5]
# 2.bed e81dd9de model.step_57900.pt d43523b6 [all: 408181da / 2e1de6e1 / ea6511bd]
# 3.bag 9149db2e model.step_25900.pt 4bbd9841 [all: 4bb6f870 / 12b06d81 / 56c1faa4]

# 0.chair 037dee7e
# 1.table 1229d735
# 2.bed d66314ad
# 3.bag 87bf56b8

path = './log/f1f35afc/parse.jsonl'
type_ = '3.bag'

In [36]:
with open(path, 'r') as f:
    lines = [l.strip() for l in f.readlines()]

In [37]:
lines_res = [json.loads(x) for x in lines]

In [42]:
lines_res

[{'example_id': '9077',
  'tree': [[['this', 'is'],
    [[[[[[['a', 'bag'], 'with'], [['a', 'square'], 'body']],
        [['and', '2'], 'handles']],
       'at'],
      ['the', 'top']],
     'of']],
   'it']},
 {'example_id': '9337',
  'tree': [[[['this', 'is'],
     [[[['a', 'bag'], [['with', 'a'], 'body']], [['and', '2'], 'handles']],
      [['at', 'the'], 'top']]],
    'of'],
   'it']},
 {'example_id': '9537',
  'tree': [[['this', 'is'],
    [[['a', 'bag'],
      [[['with', 'a'], 'handle'], [[['and', '2'], 'straps'], ['at', 'the']]]],
     ['top', 'of']]],
   'it']},
 {'example_id': '9123',
  'tree': [[[[['this', 'is'],
      [[[[['a', 'bag'], 'with'], 'a'], ['half', 'circle']],
       [[['body', 'and'], '1'], ['handle', 'at']]]],
     ['the', 'top']],
    'of'],
   'it']},
 {'example_id': '9430',
  'tree': [[['this', 'is'],
    [[[['a', 'bag'], 'with'], [['a', 'square'], 'body']],
     [[',', [['and', '2'], 'handles']], 'at']]],
   [[['the', 'top'], 'of'], 'it']]},
 {'example_id': 

In [38]:
def get_len(tree):
    if isinstance(tree, str):
        return 1

    return sum([get_len(x) for x in tree])

In [39]:
# bfs
def get_spans(tree):
    queue = [(tree, 0)]
    spans = []

    while queue:
        current_node = queue.pop(0)

        tree = current_node[0]
        offset = current_node[1]

        spans.append((offset, offset + get_len(tree)-1))

        if not isinstance(tree[0], str):
            queue.append((tree[0], offset))

        if not isinstance(tree[1], str):
            queue.append((tree[1], offset + get_len(tree[0])))
    return set(spans)

In [40]:
def get_stats(span1, span2):
    tp = 0
    fp = 0
    fn = 0
    for span in span1:
        if span in span2:
            tp += 1
        else:
            fp += 1

    for span in span2:
        if span not in span1:
            fn += 1
#     print('tp: {}; fp: {}; fn: {}'.format(tp, fp, fn))
    return tp, fp, fn

In [41]:
sent_f1_txt, corpus_f1_txt = [], [0., 0., 0.]

for idx, line in enumerate(lines_res):
    pred_txt = get_spans(line['tree'])
    example_id = line['example_id']
    with open(os.path.join(f'pytorch/data/partit_data/{type_}/test/', example_id, 'lan_spans.txt'), 'r') as w:
        gold_txt = json.loads(w.read())
    gold_txt = set([(a, b) for a, b in gold_txt])
    
    tp_txt, fp_txt, fn_txt = get_stats(pred_txt, gold_txt) 
    corpus_f1_txt[0] += tp_txt
    corpus_f1_txt[1] += fp_txt
    corpus_f1_txt[2] += fn_txt

    overlap_txt = pred_txt.intersection(gold_txt)
    prec_txt = float(len(overlap_txt)) / (len(pred_txt) + 1e-8)
    reca_txt = float(len(overlap_txt)) / (len(gold_txt) + 1e-8)

    if len(gold_txt) == 0:
        reca_txt = 1. 
        if len(pred_txt) == 0:
            prec_txt = 1.
    f1_txt = 2 * prec_txt * reca_txt / (prec_txt + reca_txt + 1e-8)
    sent_f1_txt.append(f1_txt)

tp_txt, fp_txt, fn_txt = corpus_f1_txt  
prec_txt = tp_txt / (tp_txt + fp_txt)
recall_txt = tp_txt / (tp_txt + fn_txt)
corpus_f1_txt = 2 * prec_txt * recall_txt / (prec_txt + recall_txt) if prec_txt + recall_txt > 0 else 0.
sent_f1_txt = np.mean(np.array(sent_f1_txt))
print('prec_txt: ', prec_txt)
print('recall_txt: ', recall_txt)
print('corpus_f1_txt: ', corpus_f1_txt)
print('sent_f1_txt: ', sent_f1_txt)

prec_txt:  0.2440677966101695
recall_txt:  0.2440677966101695
corpus_f1_txt:  0.2440677966101695
sent_f1_txt:  0.25622640726655654


In [100]:
variable = 42
user_input = "The answer is {variable}"
user_input_formatted = user_input.format(variable=variable)
print(user_input_formatted)

The answer is 42


## Check the coverage between train and test

In [92]:
file_dir = f'./pytorch/data/partit_data//train/'.format(type_)
dir_list = [x for x in os.listdir(file_dir) if '.' not in x]
textfile_list = [
    os.path.join(file_dir, dir_name, 'utterance.txt') for dir_name in dir_list
]

sentences = []

for textfile in textfile_list:
    with open(textfile, 'r') as r:
        sentences.append(r.read().replace(".", "").replace(",", " , ").replace(":", " : ").replace(";", " ; ").replace("/", " ").replace("\'", " \'").replace("\"", " \"").strip().split())

In [97]:
word_train_set = set([tk for tks in sentences for tk in tks])

In [99]:
len(word_train_set)

1480

In [100]:
file_dir = './pytorch/data/partit_data/0.chair/test/'
dir_list = [x for x in os.listdir(file_dir) if '.' not in x]
textfile_list = [
    os.path.join(file_dir, dir_name, 'utterance.txt') for dir_name in dir_list
]

sentences = []

for textfile in textfile_list:
    with open(textfile, 'r') as r:
        sentences.append(r.read().replace(".", "").replace(",", " , ").replace(":", " : ").replace(";", " ; ").replace("/", " ").replace("\'", " \'").replace("\"", " \"").strip().split())

In [101]:
word_test_set = set([tk for tks in sentences for tk in tks])

In [102]:
len(word_test_set)

696

In [104]:
len(word_train_set.intersection(word_test_set))

544

In [1]:
import torchvision.models as models

In [3]:
models.resnet18()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:
backbone.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)

In [4]:
models.resnet50()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [24]:
# remove the useless log
import os
import shutil

In [3]:
exist_logs = os.listdir('./log/')

In [10]:
len(exist_logs)

385

In [5]:
with open('logs.txt', 'r') as f:
    txt_files = f.read().strip().split('\n')

In [14]:
len(txt_files)

33

In [8]:
remains = set(exist_logs) - set(txt_files)

In [25]:
for x in list(remains):
    print(x)
    shutil.rmtree(os.path.join('log', x))

c4b2bf7d
94b04815
87af180c
7656076a
da9dce19
fd9a9d0b
c1627884
e068a6c0
77d0bd2e
9415fca3
98475d11
8200e012
e2aa8afe
e69adf2f
f49d6e31
e48007bc
cd25a3e9
f53faddd
b1f36aef
a7853e5f
aa8f2ead
2f559274
c9b7d0a4
927cb70a
89c3f8de
951919e6
98ff788c
cf398588
16f0b62d
891cc59d
ea3559c9
751332d6
a7c9006b
77ab5303
84712c40
58131aca
8a804247
59c86365
a61abc8c
859f5db2
6f3d8e09
7b0673e6
0147d678
d23791e3
ffec82ad
d096c72a
4f94fbbd
51ab8394
569a1213
53284e51
6f1e6716
de6f3684
4a0e60ee
c8f77d9f
8a5ec13f
e3fa3575
7f5ce2be
08710a35
5f8f00cd
00a6aacb
f1fa6ae7
0c3218a5
d4acc12f
ad1dd814
601b9ab7
e66c13e1
9b33f33b
5a34a98c
cacc39dc
e445bc22
720594da
267a2a56
8120a430
b4026700
54347791
2fe224ff
25fbb871
f33b8c4a
bd7461b3
b5b3766f
b49cb94c
a9bb2a52
6cf59271
de6c3892
e2b16f55
2af132be
5d30e3ed
5e05b11e
7ddeff5f
ad7a5a08
88b12ee3
d957c1a6
dc9b9373
c783bcdc
8f68eecd
7d098280
4551a267
990ce40e
ab60f17c
fc3595f4
9944db09
a4d436af
73d64ef8
5b23ca7b
14496c99
d2605629
422f602d
b15b587c
fb7b0e6b
0fd8b63a
3016664c
1