In [1]:
!pip install transformers



In [2]:
!pip install gdown
!sudo apt-get install unzip
!gdown https://dl.fbaipublicfiles.com/MLQA/MLQA_V1.zip
!unzip MLQA_V1.zip
!rm MLQA_V1.zip

Reading package lists... Done
Building dependency tree       
Reading state information... Done
unzip is already the newest version (6.0-21ubuntu1).
0 upgraded, 0 newly installed, 0 to remove and 14 not upgraded.
Downloading...
From: https://dl.fbaipublicfiles.com/MLQA/MLQA_V1.zip
To: /content/MLQA_V1.zip
100% 75.7M/75.7M [00:01<00:00, 67.8MB/s]
Archive:  MLQA_V1.zip
replace MLQA_V1/dev/dev-context-ar-question-ar.json? [y]es, [n]o, [A]ll, [N]one, [r]ename: A
  inflating: MLQA_V1/dev/dev-context-ar-question-ar.json  
  inflating: MLQA_V1/dev/dev-context-ar-question-de.json  
  inflating: MLQA_V1/dev/dev-context-ar-question-en.json  
  inflating: MLQA_V1/dev/dev-context-ar-question-es.json  
  inflating: MLQA_V1/dev/dev-context-ar-question-hi.json  
  inflating: MLQA_V1/dev/dev-context-ar-question-vi.json  
  inflating: MLQA_V1/dev/dev-context-ar-question-zh.json  
  inflating: MLQA_V1/dev/dev-context-de-question-ar.json  
  inflating: MLQA_V1/dev/dev-context-de-question-de.json  
  infl

In [3]:
from transformers import MarianMTModel, MarianTokenizer

def translate_a_list(sentences):
    batch_size = 30
    n_sentences = len(sentences) 
    n_batch = int((n_sentences-1) / batch_size)
    result = []
    for i in range(n_batch+1):
#         print("{}/{}: {:2%}".format(i, n_batch+1, i/(n_batch+1)))
        to_translate = sentences[i*batch_size : min((i+1)*batch_size, n_sentences)]
        tokenized = tokenizer.prepare_seq2seq_batch(to_translate).to('cuda')
        translated = model.generate(**tokenized)
        tgt_text = [tokenizer.decode(t, skip_special_tokens=True) for t in translated]
        result += tgt_text
    return result

model_name = 'Helsinki-NLP/opus-mt-en-zh'
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name).to('cuda')

sentences = ['What\'s the weather like today?', 'How are you?'] 
print(sentences)
print(translate_a_list(sentences))

["What's the weather like today?", 'How are you?']
['今天天气怎么样?', '你好吗?']


In [4]:
import json
def read_MLQA_v2(path):
    with open(path, 'r') as f:
        MLQA_dict = json.load(f)
    contexts = []
    questions = []
    answers = []
    
    context_idx = 0
    for group in MLQA_dict['data']:
        for passage in group['paragraphs']:
            context = passage['context']
            contexts.append(context)
            for qa in passage['qas']:
                question = qa['question']
                for answer in qa['answers']:
                    answer['context_idx'] = context_idx
                    questions.append(question)
                    answers.append(answer)
            context_idx += 1

    return contexts, questions, answers

train_contexts_v2, train_questions_v2, train_answers_v2 = read_MLQA_v2('MLQA_V1/test/test-context-en-question-en.json')
val_contexts_v2, val_questions_v2, val_answers_v2 = read_MLQA_v2('MLQA_V1/dev/dev-context-en-question-en.json')
print(len(train_contexts_v2))
print(len(train_questions_v2))
print(train_answers_v2[1234])

9916
11590
{'text': 'obesity, prolonged sitting, a chronic cough, and pelvic floor dysfunction', 'answer_start': 449, 'context_idx': 955}


In [5]:
train_contexts_v2_zh = translate_a_list(train_contexts_v2)
print(train_contexts_v2_zh[8])
print(train_contexts_v2[8])

某些类型的网络交通可能希望或需要明确的服务质量,例如:
A defined quality of service may be desired or required for certain types of network traffic, for example:


In [6]:
train_questions_v2_zh = translate_a_list(train_questions_v2) # it takes a tremendous time

In [7]:
train_answers_v2_plain = [answer['text'] for answer in train_answers_v2]
train_answers_v2_plain_zh = translate_a_list(train_answers_v2_plain)

In [8]:
import json
def read_MLQA(path):
    with open(path, 'r') as f:
        MLQA_dict = json.load(f)
    contexts = []
    questions = []
    answers = []

    for group in MLQA_dict['data']:
        for passage in group['paragraphs']:
            context = passage['context']
            for qa in passage['qas']:
                question = qa['question']
                for answer in qa['answers']:
                    contexts.append(context)
                    questions.append(question)
                    answers.append(answer)

    return contexts, questions, answers

val_contexts, val_questions, val_answers = read_MLQA('MLQA_V1/dev/dev-context-en-question-en.json')
zh_contexts, zh_questions, zh_answers = read_MLQA('MLQA_V1/dev/dev-context-zh-question-zh.json')
train_contexts, train_questions, train_answers = read_MLQA('MLQA_V1/test/test-context-en-question-en.json')
print(len(train_contexts))
print(len(val_contexts))
print(len(zh_contexts))
print(val_questions[0], val_answers[0])

11590
1148
504
Does an infection for Sandflies go away over time? {'text': 'remains infected for its lifetime', 'answer_start': 571}


In [9]:
def add_end_idx(answers, contexts):
    for answer, context in zip(answers, contexts):
        gold_text = answer['text']
        start_idx = answer['answer_start']
        end_idx = start_idx + len(gold_text)

        # sometimes squad answers are off by a character or two – fix this
        if context[start_idx:end_idx] == gold_text:
            answer['answer_end'] = end_idx
        elif context[start_idx-1:end_idx-1] == gold_text:
            answer['answer_start'] = start_idx - 1
            answer['answer_end'] = end_idx - 1     # When the gold label is off by one character
        elif context[start_idx-2:end_idx-2] == gold_text:
            answer['answer_start'] = start_idx - 2
            answer['answer_end'] = end_idx - 2     # When the gold label is off by two characters
        else:
            print('****')

add_end_idx(train_answers, train_contexts)
add_end_idx(val_answers, val_contexts)
add_end_idx(zh_answers, zh_contexts)
print(val_questions[0], val_answers[0])
print(zh_questions[2], zh_answers[2])

Does an infection for Sandflies go away over time? {'text': 'remains infected for its lifetime', 'answer_start': 571, 'answer_end': 604}
俄罗斯有多少队获得参赛资格？ {'text': '十支', 'answer_start': 153, 'answer_end': 155}


In [10]:
from transformers import BertTokenizerFast

tokenizer = BertTokenizerFast.from_pretrained('bert-base-multilingual-cased')

train_encodings = tokenizer(train_contexts, train_questions, truncation=True, padding=True)
val_encodings = tokenizer(val_contexts, val_questions, truncation=True, padding=True)
zh_encodings = tokenizer(zh_contexts, zh_questions, truncation=True, padding=True)

In [11]:
def add_token_positions(encodings, answers):
    start_positions = []
    end_positions = []
    for i in range(len(answers)):
        start_positions.append(encodings.char_to_token(i, answers[i]['answer_start']))
        end_positions.append(encodings.char_to_token(i, answers[i]['answer_end'] - 1))
        # if None, the answer passage has been truncated
        if start_positions[-1] is None:
            start_positions[-1] = tokenizer.model_max_length
        if end_positions[-1] is None:
            end_positions[-1] = tokenizer.model_max_length
    encodings.update({'start_positions': start_positions, 'end_positions': end_positions})

add_token_positions(train_encodings, train_answers)
add_token_positions(val_encodings, val_answers)
add_token_positions(zh_encodings, zh_answers)

In [12]:
# GET contexts_token_en_int to align
n_questions = len(train_questions)
contexts_token_en_int = []
for i in range(n_questions):
    context_idx = train_answers_v2[i]['context_idx']
    if context_idx != len(contexts_token_en_int):
        continue
    context_token = []
    for tok in train_encodings['input_ids'][i][1:]:
        if tok != 102:
            context_token.append(tok)
        else:
            break
    contexts_token_en_int.append(context_token)

In [13]:
train_contexts_v2_zh[2896]='吡啶在己烷中的光吸收光谱在195 nm(π→π*跃迁,摩尔吸收率ε= 7500 L·mol-1·cm-1),251 nm(π→π*跃迁,ε)的波长处包含三个谱带= 2000 L·mol-1·cm-1)和270 nm(n→π*跃迁,ε= 450 L·mol-1·cm-1).吡啶的1H核磁共振(NMR)光谱包含三个信号,其积分强度比为2：1：2,与分子中的三个化学上不同的质子相对应.这些信号来自α质子(位置2和6,化学位移8.5 ppm),γ质子(位置4，7.5 ppm)和β质子(位置3和5,7.1 ppm).吡啶的碳类似物苯只有7.27 ppm的质子信号.与苯相比,α和γ质子的化学位移较大,这是由于α和γ位置的电子密度较低,这可以从共振结构得出.吡啶和苯的13C NMR谱图的情况非常相似:吡啶在δ(α-C)= 150 ppm,δ(β-C)= 124 ppm和δ(γ-C)= 136 ppm时显示三重态,而苯在129 ppm处有一条单线.所有班次均引用了无溶剂物质.吡啶通常通过气相色谱法和质谱法检测.'

In [14]:
# Step 1.get full contexts for every qa
train_contexts_v2_zh_full = []
for i in range(n_questions):
    train_contexts_v2_zh_full.append(train_contexts_v2_zh[train_answers_v2[i]['context_idx']])

# Step 2.tokenize the chinese sentence
train_encodings_zh = tokenizer(train_contexts_v2_zh_full, train_questions_v2_zh, truncation=True, padding=True)

# Step 3.get contexts_token_zh_int
contexts_token_zh_int = []
for i in range(n_questions):
    context_idx = train_answers_v2[i]['context_idx']
    if context_idx != len(contexts_token_zh_int):
        continue
    context_token = []
    for tok in train_encodings_zh['input_ids'][i][1:]:
        if tok != 102:
            context_token.append(tok)
        else:
            break
    contexts_token_zh_int.append(context_token)
    
print(len(contexts_token_zh_int[0]))
print(len(contexts_token_en_int[0]))

354
289


In [15]:
! git clone https://github.com/clab/fast_align.git
! sudo apt - get install libgoogle - perftools - dev libsparsehash - dev
% cd fast_align
! mkdir build
% cd build
! cmake ..
! make

Cloning into 'fast_align'...
remote: Enumerating objects: 9, done.[K
remote: Counting objects: 100% (9/9), done.[K
remote: Compressing objects: 100% (7/7), done.[K
remote: Total 213 (delta 2), reused 4 (delta 2), pack-reused 204[K
Receiving objects: 100% (213/213), 70.68 KiB | 7.85 MiB/s, done.
Resolving deltas: 100% (110/110), done.
E: Invalid operation get
/content/fast_align
/content/fast_align/build
-- The C compiler identification is GNU 7.5.0
-- The CXX compiler identification is GNU 7.5.0
-- Check for working C compiler: /usr/bin/cc
-- Check for working C compiler: /usr/bin/cc -- works
-- Detecting C compiler ABI info
-- Detecting C compiler ABI info - done
-- Detecting C compile features
-- Detecting C compile features - done
-- Check for working CXX compiler: /usr/bin/c++
-- Check for working CXX compiler: /usr/bin/c++ -- works
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Detecting CXX compile features
-- Detecting CXX compile features - 

In [16]:
# %cd fast_align/build
def combine_token_lists(en_token, zh_token):
    n = len(en_token)
    result = []
    for i in range(n):
        en_i = en_token[i]
        zh_i = zh_token[i]
        one_combination = [str(tok) for tok in en_i] + ['|||'] + [str(tok) for tok in zh_i]
        result.append(' '.join(one_combination) + '\n')
    return result


en_token = contexts_token_en_int
zh_token = contexts_token_zh_int    
input_of_fastalign = combine_token_lists(en_token, zh_token)
with open('to_align.txt', 'w') as f:
    f.writelines(input_of_fastalign)

In [17]:
%pwd

'/content/fast_align/build'

In [18]:
! ./fast_align -i to_align.txt -d -o -v > forward.align

ARG=i
ARG=d
ARG=o
ARG=v
INITIAL PASS 
.........
expected target length = source length * 1.1595
ITERATION 1
.........
  log_e likelihood: -4.30224e+07
  log_2 likelihood: -6.20682e+07
     cross entropy: 29.8974
        perplexity: 1e+09
      posterior p0: 0.08
 posterior al-feat: -0.162334
       size counts: 8301
ITERATION 2
.........
  log_e likelihood: -1.48968e+07
  log_2 likelihood: -2.14915e+07
     cross entropy: 10.3521
        perplexity: 1307.08
      posterior p0: 0.128264
 posterior al-feat: -0.142384
       size counts: 8301
  1  model al-feat: -0.166956 (tension=4)
  2  model al-feat: -0.155996 (tension=4.49144)
  3  model al-feat: -0.150384 (tension=4.76368)
  4  model al-feat: -0.147231 (tension=4.92368)
  5  model al-feat: -0.145371 (tension=5.02062)
  6  model al-feat: -0.144243 (tension=5.08036)
  7  model al-feat: -0.143549 (tension=5.11756)
  8  model al-feat: -0.143116 (tension=5.14085)
     final tension: 5.1555
ITERATION 3
.........
  log_e likelihood: -1.2533

In [19]:
# Step 1: Convert chinese answer to token
tokenizer = BertTokenizerFast.from_pretrained('bert-base-multilingual-cased')
answer_token_zh_int_mid = tokenizer(train_answers_v2_plain_zh, truncation=True, padding=True)
answer_token_zh_int = []
for answer in answer_token_zh_int_mid['input_ids']:
    l = answer
    res = l[l.index(101) + 1: l.index(102)]
    answer_token_zh_int.append(res)

# Step 2: Find index of tokens for english answers
# range is [a,b]
answer_source_start_token = []
answer_source_end_token = []
for i in range(n_questions):
    s = train_encodings['start_positions'][i] - 1
    t = train_encodings['end_positions'][i] - 1
    answer_source_start_token.append(s)
    answer_source_end_token.append(t)

# Step 3: Build context mapping dict from aligned file
context_mappings = []
with open('forward.align', 'r') as f:
    for line in f:
        context_mapping = {}
        maps = line.strip().split()
        for m in maps:
            src, tgt = m.split('-')
            src = int(src)
            tgt = int(tgt)
            if src in context_mapping.keys():
                context_mapping[src].append(tgt)
            else:
                context_mapping[src] = [tgt]
        context_mappings.append(context_mapping)

print(len(context_mappings))
print(len(train_contexts_v2))

9916
9916


In [20]:
answer_target_start_token_possible = []
answer_target_end_token_possible = []

star_cnt = 0
for i in range(n_questions):
    s_start = answer_source_start_token[i]
    s_end = answer_source_end_token[i]
    t_start = 10000
    t_end = -1
    context_idx = train_answers_v2[i]['context_idx']
    context_mapping = context_mappings[context_idx]
    for j in range(s_start, s_end + 1):
        if j in context_mapping:
            for tgt in context_mapping[j]:
                t_start = min(t_start, tgt)
                t_end = max(t_end, tgt)
    if t_start == 10000 or t_end == -1:
        star_cnt += 1
        t_start = 0
        t_end = -1
    answer_target_start_token_possible.append(t_start)
    answer_target_end_token_possible.append(t_end)
    
print(star_cnt)

1740


In [21]:
def find_sublist(l, sub):
    l1 = len(l)
    l2 = len(sub)
    for i in range(0, l1-l2+1):
        suc = True
        for j in range(l2):
            if l[i+j] != sub[j]:
                suc = False
                break
        if suc:
            return i
    return -1

answer_target_start_token = []
answer_target_end_token = []
small_index = []
for i in range(n_questions):
    context_idx = train_answers_v2[i]['context_idx']
#     print(i)
    answer_token = answer_token_zh_int[i]
    start_possible = answer_target_start_token_possible[i]
    end_possible = answer_target_end_token_possible[i]
    
    context_token = contexts_token_zh_int[context_idx]
    context_token_ranged = context_token[start_possible:]
    
    retrived = find_sublist(context_token_ranged, answer_token)
    if retrived == -1:
        answer_target_start_token.append(start_possible)
        answer_target_end_token.append(end_possible)
    else:
        answer_target_start_token.append(retrived + start_possible)
        answer_target_end_token.append(retrived + start_possible + len(answer_token) - 1)
        small_index.append(i)

print(len(small_index))
print(len(answer_target_start_token))

2394
11590


In [22]:
answer_target_start_token_to_add = [p + 1 for p in answer_target_start_token]
answer_target_end_token_to_add = [p + 1 for p in answer_target_end_token]

train_encodings_zh.update({'start_positions': answer_target_start_token_to_add, 'end_positions': answer_target_end_token_to_add})

In [23]:
def merge_encoding(e1, e2):
    em = {}
    keys = e1.keys()
    for k in keys:
        em[k] = e1[k] + e2[k]
    return em

def select_encodings(e, s):
    keys = e.keys()
    res = {}
    for k in keys:
        new_value = []
        for selected in s:
            new_value.append(e[k][selected])
        res[k] = new_value
    return res

train_small_encodings_zh = select_encodings(train_encodings_zh, small_index)
train_small = merge_encoding(train_encodings, train_small_encodings_zh)
train_large = merge_encoding(train_encodings, train_encodings_zh)
print(train_large.keys())
print(len(train_large['end_positions']))
print(len(train_small['end_positions']))
print(len(train_encodings['end_positions']))

dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'start_positions', 'end_positions'])
23180
13984
11590


In [24]:
import torch

class MLQADataset(torch.utils.data.Dataset):
    def __init__(self, encodings):
        self.encodings = encodings

    def __getitem__(self, idx):
        return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}

    def __len__(self):
        return len(self.encodings['input_ids'])

train_dataset = MLQADataset(train_encodings)
val_dataset = MLQADataset(val_encodings)
zh_dataset = MLQADataset(zh_encodings)
train_zh_dataset = MLQADataset(train_encodings_zh)
train_large_dataset = MLQADataset(train_large)
train_small_dataset = MLQADataset(train_small)


In [25]:
%pwd
%cd ../..

/content


In [26]:
import pickle

with open("train_large_dataset", "wb") as f:
    pickle.dump(train_large_dataset, f)

with open("train_dataset", "wb") as f:
    pickle.dump(train_dataset, f)

with open("val_dataset", "wb") as f:
    pickle.dump(val_dataset, f)
    
with open("zh_dataset", "wb") as f:
    pickle.dump(zh_dataset, f)

with open("train_zh_dataset", "wb") as f:
    pickle.dump(train_zh_dataset, f)
    
with open("train_small_dataset", "wb") as f:
    pickle.dump(train_small_dataset, f)

In [27]:
def compute_f1(predicted, true):
    c = len(set(predicted) & set(true))
    l1 = len(predicted)
    l2 = len(true)
    if(l1 + l2 == 0):
        return 1
    f1 = 2*c/(l1+l2)
    return f1
    
def compute_em(predicted, true):
    return int(predicted == true)

In [28]:
from torch.utils.data import DataLoader
from transformers import AdamW, BertForQuestionAnswering
# input_ids = None
# attention_mask = None
# start_positions = None
# end_positions = None
# model = None
# torch.cuda.empty_cache()

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

model = BertForQuestionAnswering.from_pretrained('bert-base-multilingual-cased')
model.to(device)
model.train()

train_loader = DataLoader(train_large_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=True)
zh_loader = DataLoader(zh_dataset, batch_size=8, shuffle=True)
print(len(train_loader))
print(len(val_loader))
optim = AdamW(model.parameters(), lr=5e-5)


val_batch = 300
max_epoch = 3
train_batch = len(train_loader)
for epoch in range(max_epoch):
    for batch_idx, batch in enumerate(train_loader):
        model.train()
        optim.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        start_positions = batch['start_positions'].to(device)
        end_positions = batch['end_positions'].to(device)
        outputs = model(input_ids, attention_mask=attention_mask, start_positions=start_positions, end_positions=end_positions)
        loss = outputs[0]
        loss.backward()
        optim.step()
        
        if batch_idx % val_batch == 0 or batch_idx == train_batch - 1:
            print("Epoch: {}/{}, batch: {}/{}, {:%}".format(epoch, max_epoch, batch_idx, train_batch, batch_idx/train_batch))
            model.eval()
            eval_cnt = 0
            F1 = 0.0
            EM = 0.0
            for batch_idx, batch in enumerate(val_loader):
                optim.zero_grad()
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                start_positions = batch['start_positions'].to(device)
                end_positions = batch['end_positions'].to(device)
                outputs = model(input_ids, attention_mask=attention_mask, start_positions=start_positions, end_positions=end_positions)
                samples_in_batch = len(input_ids)
                for i in range(samples_in_batch):
                    predict_start = int(outputs[1][i].argmax().cpu())
                    predict_end = int(outputs[2][i].argmax().cpu())
                    true_start = int(start_positions[i].cpu())
                    true_end = int(end_positions[i].cpu())
                    F1 += compute_f1(range(predict_start, predict_end), range(true_start, true_end))
                    EM += compute_em(range(predict_start, predict_end), range(true_start, true_end))
                eval_cnt += samples_in_batch
            F1 /= eval_cnt
            EM /= eval_cnt
            print("English eval score: F1:{}, EM:{}".format(F1, EM))
            
            eval_cnt = 0
            F1 = 0.0
            EM = 0.0
            for batch_idx, batch in enumerate(zh_loader):
                optim.zero_grad()
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                start_positions = batch['start_positions'].to(device)
                end_positions = batch['end_positions'].to(device)
                outputs = model(input_ids, attention_mask=attention_mask, start_positions=start_positions, end_positions=end_positions)
                samples_in_batch = len(input_ids)
                for i in range(samples_in_batch):
                    predict_start = int(outputs[1][i].argmax().cpu())
                    predict_end = int(outputs[2][i].argmax().cpu())
                    true_start = int(start_positions[i].cpu())
                    true_end = int(end_positions[i].cpu())
                    F1 += compute_f1(range(predict_start, predict_end), range(true_start, true_end))
                    EM += compute_em(range(predict_start, predict_end), range(true_start, true_end))
                eval_cnt += samples_in_batch
            F1 /= eval_cnt
            EM /= eval_cnt
            print("Chinese eval score: F1:{}, EM:{}".format(F1, EM))
            
        
        


cuda


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=625.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=714314041.0, style=ProgressStyle(descri…




KeyboardInterrupt: ignored

In [None]:
# FOR TESTING ONLY IGNORE
import json
with open("MLQA_V1/dev/dev-context-en-question-en.json",'r') as load_f:
    load_dict = json.load(load_f)
    print(load_dict.keys())
    print(len(load_dict['data']))
    print(load_dict['data'][0].keys())
    print(load_dict['data'][0]['title'])
    print(load_dict['data'][0]['paragraphs'])
    print(load_dict['data'][0]['paragraphs'][0].keys())
#     print(load_dict['data'][0]['paragraphs'][0]['context'])
    print(load_dict['data'][0]['paragraphs'][0]['qas'])
    print(load_dict['data'][0]['paragraphs'][0]['qas'][0])
    
#TODO:(about code)
# 1. baseline Evaluation metric, Train faster (finished)
# 1.5 need to compare eng and chinese
# 2. word-level translate and generate new dataset of another language
# 3. sentence-level translate and alignment + retrive(min, max as tar) 

# 可能 
# 输入：英文句子+中文句子（乱序） 
# 输出：单词的对应关系

# English eval score: F1:0.6419080228993544, EM:0.5287456445993032 -- MLQA. M-BERT
# Chinese eval score: F1:0.42465514513374364, EM:0.2996031746031746
true    predicted
5 - 10  6- 11
6-10 intersection
6-10/5-10 recall
6-10/6-11 precision