# バイナリから塩基配列に変換

処理としてはECCを付与してから塩基配列に変換\
分割する場合、合成すべき配列のリストを返す

## 論点


*   巨大なデータの場合、分割するのか
*   分割する場合、アドレスのサイズ



In [1]:
!pip install bitarray
!pip install numpy
!pip install hamming-codec

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [255]:
from bitarray import bitarray
import numpy as np
import hamming_codec
import itertools
import matplotlib.pyplot as plt
import random
import pandas as pd
import json
import time, datetime
import os

In [228]:
def split_data(data : bytes,split_length : int):
  return [data[i:i+split_length] for i in range(0, len(data), split_length)]

def to_trits(n):
    if n == 0:
        return [0]
    digits = []
    while n:
        digits.append(int(n % 3))
        n //= 3
    return digits[::-1]

def add_ecc(split_bytes,ecc_interval):
  r = []
  for b in split_bytes:
    r.append(hamming_codec.encode(int.from_bytes(b,'big'),ecc_interval * 8))
  return r

def binaries_to_trits(binaries,max) : #バイナリの配列をtritの配列に
  r = []
  for b in binaries : 
    t = to_trits(int(b,base=2))
    t = ([0] * (max - len(t))) + t
    r.append(t)
  return r


def trits_to_binaries(trits,trit_max,bit_max) :
  r = []
  trits_blocks = [trits[idx:idx + trit_max] for idx in range(0,len(trits), trit_max)]
  for b in trits_blocks :
    maped = map(str, b)
    bin_str = bin(int("".join(maped),3))
    if (len(bin_str) - 2) < bit_max :
      zeros = bit_max - (len(bin_str) - 2)
      bin_str = bin_str[:2] + ('0' * zeros) + bin_str[2:]
    r.append(bin_str)
  return r


def binaries_to_identified_bytes(binaries,address_size):
  address = bytes()
  data = bytes()
  address_count = 0
  for i,b in enumerate(binaries) :
    h = hex(int(hamming_codec.decode(int(b,2),len(b)),2))[2:]
    if h == '0' :
      address_count += 1
      continue
    if len(h) % 2 != 0 :
      h = '0'+ h
    if address_count < address_size :
      address += bytes.fromhex(h)
      address_count += len(bytes.fromhex(h))
    else :
      data += bytes.fromhex(h)
      trimed_address = bytearray()
      for a in address :
        if a == bytes.fromhex('3f')[0] :
          continue
        trimed_address.append(a)
  return (bytes(trimed_address),data)



def trits_to_dna_bases(trits,origin_base = 'A') :
  bases = [origin_base]
  mapper = {
      'A' : {0:'G',1:'C',2:'T'},
      'C' : {0:'T',1:'G',2:'A'},
      'G' : {0:'A',1:'T',2:'C'},
      'T' : {0:'C',1:'A',2:'G'}
  }
  for i,t in enumerate(trits):
    bases.append(mapper[bases[i]][t])
  return bases

def dna_bases_to_trits(bases) :
  trits = []
  mapper = {
      'A' : {'G':0,'C':1,'T':2},
      'C' : {'T':0,'G':1,'A':2},
      'G' : {'A':0,'T':1,'C':2},
      'T' : {'C':0,'A':1,'G':2}
  }
  for i,b in enumerate(bases):
    if i == len(bases) - 1 : break
    if bases[i + 1] == b : continue
    trits.append(mapper[b][bases[i + 1]])
  return trits

def encode_binary_to_dna_bases(file_path,bytes_per_oligo,ecc_interval = 4,address_size = 4) :
  trit_max = {1:8,2:14,3:19,4:24} #ecc_intervalごとのtritのmax桁数
  bit_max = {1:12,2:21,3:29,4:38} #ecc_intervalごとのECC付与後bitのmax桁数
  with open(file_path,'rb') as f:
    data_bytes = f.read()#生データ
    sd1 = split_data(data_bytes,bytes_per_oligo)#オリゴごとにデータを分ける

    identified_datas = [] 
    for i,d in enumerate(sd1):#オリゴごとのデータにアドレス(hex文字列のbytes)を付与する
      address = bytes(hex(i + 1)[2:],'utf-8')
      address = bytes(('?'*(address_size - len(address))).encode('utf-8')) + address #アドレスサイズ合わせ
      identified_datas.append((address,d))
    
    identified_and_spilt_datas = [] #ECCを付与するためにECC付与間隔ごとに各オリゴのデータとアドレスのbytesを分ける
    for t in identified_datas :
      identified_and_spilt_datas.append((split_data(t[0],ecc_interval),split_data(t[1],ecc_interval)))
    
    encoded_datas = [] #ECCを付与したデータ(2進数)
    for t in identified_and_spilt_datas:
      encoded_datas.append((add_ecc(t[0],ecc_interval),add_ecc(t[1],ecc_interval)))

    encoded_datas_trits = [] #ECCを付与したデータ(Trit(3進数))
    for t in encoded_datas :
      encoded_datas_trits.append(
        list(itertools.chain(*(binaries_to_trits(t[0],trit_max[ecc_interval]) + binaries_to_trits(t[1],trit_max[ecc_interval]))))
        )
    
    target_bases = [] #合成すべき目標の配列にエンコード
    for t in encoded_datas_trits :
      target_bases.append(trits_to_dna_bases(t))

  return target_bases


def is_hex(val):
    try:
        int(val, 16)
        return True
    except ValueError:
        return False

def decode_dna_bases_to_binary(bases,ecc_interval = 4,address_size = 4) :
  trit_max = {1:8,2:14,3:19,4:24} #ecc_intervalごとのtritのmax桁数
  bit_max = {1:12,2:21,3:29,4:38} #ecc_intervalごとのECC付与後bitのmax桁数
  ibs = []
  address_error = []
  for b in bases:
    at = dna_bases_to_trits(b)
    bs = trits_to_binaries(at,trit_max[ecc_interval],bit_max[ecc_interval])
    #print(bs)
    ib = binaries_to_identified_bytes(bs,address_size)
    if is_hex(ib[0]) :
      ibs.append(ib)
    else :
      address_error.append(ib)

  ibs.sort(key = lambda x: int(x[0],16))
  print('address error counts : ' + str(len(address_error)))
  return ibs

def decode_dna_bases_to_binary_with_details(bases,ecc_interval = 4,address_size = 4) :
  trit_max = {1:8,2:14,3:19,4:24} #ecc_intervalごとのtritのmax桁数
  bit_max = {1:12,2:21,3:29,4:38} #ecc_intervalごとのECC付与後bitのmax桁数
  ibs = []
  address_error = []
  for b in bases:
    at = dna_bases_to_trits(b)
    bs = trits_to_binaries(at,trit_max[ecc_interval],bit_max[ecc_interval])
    #print(bs)
    ib = binaries_to_identified_bytes(bs,address_size)
    if is_hex(ib[0]) :
      ibs.append(ib)
    else :
      address_error.append(ib)

  ibs.sort(key = lambda x: int(x[0],16))
  #print('address error counts : ' + str(len(address_error)))
  address_error_count = len(address_error)
  return ibs,address_error_count

def weighted_random(denominator:int,numerator:int):
  n = list(range(numerator))
  r = random.randint(0,denominator)
  return r in n

def weighted_random_exclusive(denominator:int,numerators:list):
  ns = []
  for i,n in enumerate(numerators) :
    if i == 0 : 
      if n == 0 :
        ns.append([-1])
      else :
        ns.append(list(range(n)))
    elif n == 0 :
       ns.append([-1])
    else :
      ns.append(list(range(max(ns[i - 1]) + 1,max(ns[i - 1]) + 1 + n)))
  r = random.randint(0,denominator)
  for i,n in enumerate(ns) :
    if r in n :
      return i
  return -1
  
def extension(base : str, cycle : int,miss_extension_denominator : int,miss_extension_numerator : int,deletion_and_over_extension_denominator : int,deletion_numerator : int,over_extension_numerator : int) :
  bases = ['A','G','C','T']
  r = []
  for i in range(cycle):
    ext_base = base
    if weighted_random(miss_extension_denominator,miss_extension_numerator):# 塩基ミス
      other_bases = [s for s in bases if base not in s]
      ext_base = other_bases[random.randint(0,2)]

    ns = [over_extension_numerator,deletion_numerator]
    deletion_or_over = weighted_random_exclusive(deletion_and_over_extension_denominator,ns)# 伸長失敗と過剰に伸長
    if deletion_or_over == -1 :# 正常に伸長
      r.append(ext_base)
      continue
    if deletion_or_over == 0 : #過剰に伸長
      r.append(ext_base)
      r.append(ext_base)
      continue
    if deletion_or_over == 1 : #欠損
      continue

  return r

def synthesis_dna(bases : list,
                  reaction_cycle : int,
                  miss_extension_denominator : int,
                  miss_extension_numerator : int,
                  deletion_and_over_extension_denominator : int,
                  deletion_numerator : int,
                  over_extension_numerator : int) :
  synthesis_result = []
  for b in bases :
    r = extension(b,reaction_cycle,miss_extension_denominator,miss_extension_numerator,deletion_and_over_extension_denominator,deletion_numerator,over_extension_numerator)
    synthesis_result = synthesis_result + r
  return synthesis_result

def extension_with_details(base : str, cycle : int,miss_extension_denominator : int,miss_extension_numerator : int,deletion_and_over_extension_denominator : int,deletion_numerator : int,over_extension_numerator : int) :
  bases = ['A','G','C','T']
  r = []
  miss_extension_count = 0
  over_extension_count = 0
  deletion_count = 0
  for i in range(cycle):
    ext_base = base
    if weighted_random(miss_extension_denominator,miss_extension_numerator):# 塩基ミス
      other_bases = [s for s in bases if base not in s]
      ext_base = other_bases[random.randint(0,2)]
      miss_extension_count += 1

    ns = [over_extension_numerator,deletion_numerator]
    deletion_or_over = weighted_random_exclusive(deletion_and_over_extension_denominator,ns)# 伸長失敗と過剰に伸長
    if deletion_or_over == -1 :# 正常に伸長
      r.append(ext_base)
      continue
    if deletion_or_over == 0 : #過剰に伸長
      r.append(ext_base)
      r.append(ext_base)
      over_extension_count += 1
      continue
    if deletion_or_over == 1 : #欠損
      deletion_count += 1
      continue

  return r,miss_extension_count,over_extension_count,deletion_count

def synthesis_dna_with_details(bases : list,
                  reaction_cycle : int,
                  miss_extension_denominator : int,
                  miss_extension_numerator : int,
                  deletion_and_over_extension_denominator : int,
                  deletion_numerator : int,
                  over_extension_numerator : int) :
  synthesis_result = []
  miss_extension_count = over_extension_count = deletion_count = 0
  for b in bases :
    r,_miss_extension_count,_over_extension_count,_deletion_count = extension_with_details(b,reaction_cycle,miss_extension_denominator,miss_extension_numerator,deletion_and_over_extension_denominator,deletion_numerator,over_extension_numerator)
    synthesis_result = synthesis_result + r
    miss_extension_count += _miss_extension_count
    over_extension_count += _over_extension_count
    deletion_count += _deletion_count
  return synthesis_result,miss_extension_count,over_extension_count,deletion_count

def tuple_list_to_dict(tups):
  dic = {}
  for a, b in tups:
      dic.setdefault(a, []).append(b)
  return dic

def extract_list_of_tuple(tups):
    return [list(tup) for tup in zip(*tups)]

def count_diff_byte(ref : bytes,sample :bytes):
  diff_count = 0 if len(ref) == len(sample) else (len(ref) - len(sample))
  if diff_count < 0 :
    diff_count = abs(diff_count)
    for i,r in enumerate(ref):
      diff_count = diff_count if r == sample[i] else diff_count + 1
  else :
    for i,s in enumerate(sample):
      diff_count = diff_count if s == ref[i] else diff_count + 1
  return diff_count

def get_file_size(file_path :str):
  with open(file_path,'rb') as f:
    return len(f.read())

def flat(l :list):
  return list(itertools.chain.from_iterable(l))


# 通しで実行

In [None]:
######################
#変換関連パラメーター#
######################

bytes_per_oligo = 16 #オリゴ当たりのデータ量(byte)
ecc_interval = 4 #何byteごとにECCを付与するか Max=4
address_size = 4 #アドレスのサイズ(byte)

######################
#合成関連パラメーター#
######################
deletion_and_over_extension_denominator = 1000 #欠損と過剰伸長の確率の分母
deletion_numerator = 100 #欠損確率の分子
over_extension_numerator = 0 #過剰伸長確率の分子
miss_extension_denominator = 1000 #塩基ミスの確率の分母
miss_extension_numerator = 0#塩基ミスの確率の分子
reaction_cycle = 3 #伸長反応のリアクション数⇒目標伸長数


#ファイルから目標の配列生成
#オリゴごとの目標配列のリストが得られる
targets = encode_binary_to_dna_bases('1k_data',bytes_per_oligo,ecc_interval,address_size)
print(list(itertools.chain.from_iterable(targets)))
#print('目標の配列')
#print(targets)

#オリゴごとに合成
synthesized_bases = []
for t in targets :
  synthesized_bases.append(synthesis_dna(t,
                            reaction_cycle,
                            miss_extension_denominator,
                            miss_extension_numerator,
                            deletion_and_over_extension_denominator,
                            deletion_numerator,
                            over_extension_numerator))

#print('合成された配列')
#print(synthesized_bases)

decoded_targets = decode_dna_bases_to_binary(targets,ecc_interval,address_size)
decoded_synthesized_bases = decode_dna_bases_to_binary(synthesized_bases,ecc_interval,address_size)

print('デコードした目標配列')
#print(decoded_targets)
print(extract_list_of_tuple(decoded_targets)[0])
print('デコードした合成配列')
print(decoded_synthesized_bases)

# シミュレーション

In [256]:
##############################
#シミュレーションパラメーター#
##############################
from itertools import cycle


number_of_simulations = 100 #シミュレーション回数
input_file_path = '1k_data' #使用するデータのパス
output_file_prefix = 'output'
output_file_directory = './dump_datas/'
will_dump = False



######################  
#変換関連パラメーター#
######################

bytes_per_oligo = 16 #オリゴ当たりのデータ量(byte)
ecc_interval = 4 #何byteごとにECCを付与するか Max=4
address_size = 4 #アドレスのサイズ(byte)



######################
#合成関連パラメーター#
######################
deletion_and_over_extension_denominator = 1000 #欠損と過剰伸長の確率の分母
deletion_numerator = 10 #欠損確率の分子
over_extension_numerator = 100 #過剰伸長確率の分子
miss_extension_denominator = 10000 #塩基ミスの確率の分母
miss_extension_numerator = 1#塩基ミスの確率の分子
reaction_cycle = 3 #伸長反応のリアクション数⇒目標伸長数


miss_extension_count = over_extension_count = deletion_count = address_error_count = address_error_catfish_count = error_bytes_in_correct_address_data = 0


errors_in_each_simulation = {'errors_in_each_target':[],'address_error_count':[],'error_bytes_in_correct_address_data':[],'address_error_catfish_count':[]}

targets = encode_binary_to_dna_bases(input_file_path,bytes_per_oligo,ecc_interval,address_size)
target_base_count = len(flat(targets))
decoded_targets = decode_dna_bases_to_binary(targets,ecc_interval,address_size)
decoded_targets_dict = tuple_list_to_dict(decoded_targets)
available_address = extract_list_of_tuple(decoded_targets)[0]


for i in range(number_of_simulations):
    synthesized_bases = []
    errors_in_each_target = {'miss_extension_count' : [],'over_extension_count' : [],'deletion_count':[]}
    for t in targets :
        r,_miss_extension_count,_over_extension_count,_deletion_count = synthesis_dna_with_details(t,reaction_cycle,miss_extension_denominator,miss_extension_numerator,deletion_and_over_extension_denominator,deletion_numerator,over_extension_numerator)
        synthesized_bases.append(r)
        miss_extension_count += _miss_extension_count
        over_extension_count += _over_extension_count
        deletion_count += _deletion_count
        errors_in_each_target['miss_extension_count'].append(_miss_extension_count)
        errors_in_each_target['over_extension_count'].append(_over_extension_count)
        errors_in_each_target['deletion_count'].append(_deletion_count)
        
    decoded_synthesized_bases,_address_error_count = decode_dna_bases_to_binary_with_details(synthesized_bases,ecc_interval,address_size)
    address_error_count += _address_error_count
    _error_bytes_in_correct_address_data = 0
    _address_error_catfish_count = 0
    for d in decoded_synthesized_bases :
        if d[0] in available_address :
            _error_bytes_in_correct_address_data += count_diff_byte(decoded_targets_dict[d[0]][0],d[1])
        else :
            _address_error_catfish_count += 1
    error_bytes_in_correct_address_data += _error_bytes_in_correct_address_data
    address_error_catfish_count += _address_error_catfish_count
    errors_in_each_simulation['errors_in_each_target'].append(errors_in_each_target)
    errors_in_each_simulation['error_bytes_in_correct_address_data'].append(_error_bytes_in_correct_address_data)
    errors_in_each_simulation['address_error_count'].append(_address_error_count)
    errors_in_each_simulation['address_error_catfish_count'].append(_address_error_catfish_count)

print('error address : ' + str(address_error_count + address_error_catfish_count) + '/' + str((len(targets) * number_of_simulations)) + ' (catfish : ' + str(address_error_catfish_count) + ')' +'\n')
print('error bytes in correct address : ' + str(error_bytes_in_correct_address_data) + '/' + str((get_file_size(input_file_path) * number_of_simulations))  +'\n')
print('miss extension : ' + str(miss_extension_count) + '/' + str(number_of_simulations * target_base_count * reaction_cycle)  +'\n')
print('over extension : ' + str(over_extension_count) + '/' + str(number_of_simulations * target_base_count * reaction_cycle)  +'\n')
print('deletion : ' + str(deletion_count) + '/' + str(number_of_simulations * target_base_count * reaction_cycle)  +'\n')
# print(errors_in_each_simulation)

if will_dump :
    sim_results_with_meta_data = {
    'number_of_simulations':number_of_simulations,
    'input_file_size':get_file_size(input_file_path),
    'bytes_per_oligo':bytes_per_oligo,
    'ecc_interval':ecc_interval,
    'address_size':address_size,
    'deletion_and_over_extension_denominator':deletion_and_over_extension_denominator,
    'deletion_numerator':deletion_numerator,
    'over_extension_numerator':over_extension_numerator,
    'miss_extension_denominator':miss_extension_denominator,
    'miss_extension_numerator':miss_extension_numerator,
    'reaction_cycle':reaction_cycle,
    'results' : errors_in_each_simulation,
    }
    json_str = json.dumps(sim_results_with_meta_data)
    output_file_name = output_file_prefix + '_' + datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d-%H-%M-%S') + '.json'
    output_file_path = output_file_directory + output_file_name
    print('start dump results.')
    if not os.path.exists(output_file_directory):
        os.makedirs(output_file_directory)
    with open(output_file_path, mode='w') as f:
        f.write(json_str)
    print('complete dump results. filename : ' + output_file_name)

address error counts : 0
error address : 29/6400 (catfish : 0)

error bytes in correct address : 1090/102400

miss extension : 228/2323200

over extension : 232565/2323200

deletion : 23359/2323200

start dump results.
complete dump results. filename : output_2022-09-23-19-19-16.json


In [None]:
ref = bytes('abcdefg'.encode('utf-8'))
sample = bytes('abcdef'.encode('utf-8'))
print(count_diff_byte(ref,sample))
ref = bytes('abcdef'.encode('utf-8'))
sample = bytes('abcdefgh'.encode('utf-8'))
print(count_diff_byte(ref,sample))

1
2


# 以下メモ

In [223]:
deno = 1000
num = 10

ts = 10000

counts = 0
for j in range(ts):
    _counts = 0
    for i in range(ts):
        _counts = _counts + 1 if weighted_random(deno,num) else _counts
    counts += _counts
print(str(counts / ts))



99.9465


In [253]:
print(datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d-%H-%M-%S'))

2022-09-23-19-02-44


In [206]:
print(get_file_size('1k_data'))

1024


In [234]:
ref = bytes('abcdefg'.encode('utf-8'))
sample = bytes('abcdef'.encode('utf-8'))
print(count_diff_byte(ref,sample))
ref = bytes('abcdef'.encode('utf-8'))
sample = bytes('abcdefgh'.encode('utf-8'))
print(count_diff_byte(ref,sample))
ref = bytes('abcdef'.encode('utf-8'))
sample = bytes('abcdef'.encode('utf-8'))
print(count_diff_byte(ref,sample))

1
2
0


In [None]:
nums = [0,20,0]
counts_of_one = 0
for i in range(1000) :  
  if weighted_random_exclusive(100,nums) == 1 :
    counts_of_one += 1

print(counts_of_one)

196


In [None]:

bytes_per_oligo = 16 #オリゴ当たりのデータ量(byte)
ecc_interval = 3 #何byteごとにECCを付与するか Max=4
address_size = 4 #アドレスのサイズ(byte)

trit_max = {1:8,
            2:14,
            3:19,
            4:24}
bit_max = {1:12,
           2:21,
           3:29,
           4:38}
print(trit_max)
print(trit_max[ecc_interval])

with open('long_data','rb') as f:
  data_bytes = f.read()#生データ
  sd1 = split_data(data_bytes,bytes_per_oligo)#オリゴごとにデータを分ける
  identified_datas = [] #オリゴごとのデータにアドレス(hex文字列のbytes)を付与する
  for i,d in enumerate(sd1):
    address = bytes(hex(i + 1)[2:],'utf-8')
    #zero = 0
    #address = zero.to_bytes(address_size - len(address),'big') + address #アドレスサイズ合わせ
    address = bytes(('?'*(address_size - len(address))).encode('utf-8')) + address #アドレスサイズ合わせ
    identified_datas.append((address,d))
  print(identified_datas)

  identified_and_spilt_datas = [] #ECCを付与するためにECC付与間隔ごとに各オリゴのデータとアドレスのbytesを分ける
  for t in identified_datas :
    identified_and_spilt_datas.append((split_data(t[0],ecc_interval),split_data(t[1],ecc_interval)))
  print(identified_and_spilt_datas)

  encoded_datas = [] #ECCを付与したデータ(2進数)
  for t in identified_and_spilt_datas:
    encoded_datas.append((add_ecc(t[0],ecc_interval),add_ecc(t[1],ecc_interval)))
  print(encoded_datas)

  encoded_datas_trits = [] #ECCを付与したデータ(Trit(3進数))
  for t in encoded_datas :
    encoded_datas_trits.append(
        list(itertools.chain(*(binaries_to_trits(t[0],trit_max[ecc_interval]) + binaries_to_trits(t[1],trit_max[ecc_interval]))))
        )
    #encoded_datas_trits.append((binaries_to_trits(t[0],trit_max[ecc_interval]),binaries_to_trits(t[1],trit_max[ecc_interval])))
  
  print(encoded_datas_trits)

  bases = [] #合成すべき目標の配列にエンコード
  for t in encoded_datas_trits :
    bases.append(trits_to_dna_bases(t))

  print(bases)


  
  bt = encoded_datas_trits[0]
  bs = bases[0]
  at = dna_bases_to_trits(bs)
  print(at)
  bs = trits_to_binaries(at,trit_max[ecc_interval],bit_max[ecc_interval])
  #decoded = []
  print(bs)
  ib = binaries_to_identified_bytes(bs,address_size)
  print('address:' + ib[0].decode('utf-8') + '\ndata:' + ib[1].decode('utf-8'))
  # print(bs)
  # for b in bs :
  #   h = hex(int(hamming_codec.decode(int(b,2),len(b)),2))[2:]
  #   if h == '0' : continue
  #   decoded.append(bytes.fromhex(h).decode('utf-8'))
  # print("".join(decoded))
  # print(bt)
  # print(bs)
  # print(at)
  # print(bt == at)



In [None]:
deletion_and_over_extension_denominator = 1000
deletion_numerator = 100

#over_extension_denominator = 1000
over_extension_numerator = 300

miss_extension_denominator = 1000
miss_extension_numerator = 1

cycle = 2

bases = ['A','G','C','T']
base = 'A'

output = []

for i in range(cycle):
  ext_base = base
  if weighted_random(miss_extension_denominator,miss_extension_numerator):# 塩基ミス
    other_bases = [s for s in bases if base not in s]
    ext_base = other_bases[random.randint(0,2)]

  ns = [over_extension_numerator,deletion_numerator]
  deletion_or_over = weighted_random_exclusive(deletion_and_over_extension_denominator,ns)# 伸長失敗と過剰に伸長
  print(deletion_or_over)
  if deletion_or_over == -1 :# 正常に伸長
    output.append(ext_base)
    continue
  if deletion_or_over == 0 : #過剰に伸長
    output.append(ext_base)
    output.append(ext_base)
    continue
  if deletion_or_over == 1 : #欠損
    continue
print(output)



NameError: ignored