This notebook is identifying PFAM domains for all BRENDA sequences.<br/><br/>Copyright (C) 2020  Martin Engqvist Lab<br/>This program is free software: you can redistribute it and/or modify<br/>it under the terms of the GNU General Public License as published by<br/>the Free Software Foundation, either version 3 of the License, or<br/>(at your option) any later version.<br/>This program is distributed in the hope that it will be useful,<br/>but WITHOUT ANY WARRANTY; without even the implied warranty of<br/>MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the<br/>GNU General Public License for more details.<br/>You should have received a copy of the GNU General Public License<br/>along with this program.  If not, see <http://www.gnu.org/licenses/>.

In [1]:
import os
from dotenv import load_dotenv, find_dotenv
from os.path import join, dirname, basename, exists, isdir

### Load environmental variables from the project root directory ###
# find .env automagically by walking up directories until it's found
dotenv_path = find_dotenv()

# load up the entries as environment variables
load_dotenv(dotenv_path)

# now you can get the variables using their names

# Check whether a network drive has been specified
DATABASE = os.environ.get("NETWORK_URL")
if DATABASE == 'None':
    pass
else:
    pass
    #mount network drive here

# set up directory paths
CURRENT_DIR = os.getcwd()
PROJ = dirname(dotenv_path) # project root directory

DATA = join(PROJ, 'data') #data directory
RAW_EXTERNAL = join(DATA, 'raw_external') # external data raw directory
RAW_INTERNAL = join(DATA, 'raw_internal') # internal data raw directory
INTERMEDIATE = join(DATA, 'intermediate') # intermediate data directory
FINAL = join(DATA, 'final') # final data directory

RESULTS = join(PROJ, 'results') # output directory
FIGURES = join(RESULTS, 'figures') # figure output directory
PICTURES = join(RESULTS, 'pictures') # picture output directory


folder_name = 'brenda_domains'
if folder_name != '':
    #make folders if they don't exist
    if not exists(join(INTERMEDIATE, 'BRENDA_for_paper', folder_name)):
        os.makedirs(join(INTERMEDIATE, 'BRENDA_for_paper', folder_name))

print('Standard variables loaded, you are good to go!')

Standard variables loaded, you are good to go!


In [2]:
import os
import subprocess
from os.path import join, exists, getsize
import multiprocessing
import pandas as pd
import numpy as np

from urllib.request import urlopen
from urllib.error import URLError, HTTPError
import time
import re


from tqdm import tqdm

### Download the Pfam hmm file
From database version 33.1

In [4]:
url = 'ftp://ftp.ebi.ac.uk/pub/databases/Pfam/releases/Pfam33.1/Pfam-A.hmm.gz'
outfile = join(INTERMEDIATE, 'BRENDA_for_paper', folder_name, 'Pfam-A.hmm.gz')


if not exists(outfile.replace('.gz', '')):
    # download the hmm file
    my_cmd = 'wget -O {} {}'.format(outfile, url)
    os.system(my_cmd)

    # unzip file
    my_cmd = 'gunzip {}'.format(outfile)
    os.system(my_cmd)

### Go through fasta files and discover all Pfam domains

In [12]:
def worker(package):
    '''
    A worker to carry out the hmmsearch.
    '''
    fi, inpath, outpath, pfam_hmms = package

    fasta_file = join(inpath, fi)
    hmm_outfile = join(outpath, fi+'.out')

    if not exists(hmm_outfile):
        mycmd = 'hmmsearch -E 1e-15 --domtblout %s %s %s > tmp.out' % (hmm_outfile, pfam_hmms, fasta_file)
        os.system(mycmd)


        
def search_all(ecnum, reverse=False):
    '''
    Function for distributing the files on a number of workers.
    '''
    num_cores = multiprocessing.cpu_count()

    # define folders
    outpath = join(INTERMEDIATE, 'BRENDA_for_paper', folder_name)
    inpath = join(INTERMEDIATE, 'BRENDA_for_paper', 'ec_identity_clustering')
    pfam_hmms = join(outpath, 'Pfam-A.hmm')

    # run assemble a list of the file to run
    packages = []
    for fi in sorted(os.listdir(inpath), reverse=reverse):

        if not fi.startswith(ecnum):
            continue

        if not fi.endswith('90_augmented.fasta'):
            continue

        packages.append((fi, inpath, outpath, pfam_hmms))

    # distribute work on the cores
    with multiprocessing.Pool(processes=num_cores) as pool:
        results = pool.map(worker, packages)



def parse_hmm_output(filepath, ec):
    '''
    Parse an hmm output file and return as pandas data frame.
    '''
    data = {'ec':[], 'uid':[], 'hmm_model':[], 'pfam':[], 'hmm_model_len':[], 'hmm_match_from':[],
            'hmm_match_to':[], 'hmm_match_coverage':[], 'match_evalue':[], 'gene_match_from':[],
            'gene_match_to':[]}

    with open(filepath, 'r') as f:
        for line in f:
            if line.startswith('#'):
                continue

            parts = line.strip().split()
            if len(parts) < 10:
                print(line)

            uid = parts[0].split(';')[0]

            dom_name = parts[3]
            pfam = parts[4]

            evalue = parts[6]

            qlen = parts[5]
            hmm_from = parts[15]
            hmm_to = parts[16]
            coverage = (int(hmm_to)-int(hmm_from))/int(qlen)

            ali_from = parts[17]
            ali_to = parts[18]

            if coverage >= 0.35:
                data['ec'].append(ec)
                data['uid'].append(uid)
                data['hmm_model'].append(dom_name)
                data['pfam'].append(pfam)
                data['hmm_model_len'].append(qlen)
                data['hmm_match_from'].append(hmm_from)
                data['hmm_match_to'].append(hmm_to)
                data['hmm_match_coverage'].append('%.2f' % float(coverage))
                data['match_evalue'].append(evalue)
                data['gene_match_from'].append(ali_from)
                data['gene_match_to'].append(ali_to)

    return pd.DataFrame(data)



def get_pfam_domains():
    '''
    Go through all the augmented fasta files and predict Pfam domains.
    '''

    # align all the matches
    for ec_start in tqdm(range(1, 8)):
        ec_start = str(ec_start)
        search_all(ecnum=ec_start, reverse=False)

    # parse the outfiles
    filepath = join(INTERMEDIATE, 'BRENDA_for_paper', folder_name)
    outfile = join(INTERMEDIATE, 'BRENDA_for_paper', 'pfam_hmm_results.tsv')
    
    all_domain_data = None
    for fi in tqdm(sorted(os.listdir(filepath))):

        if not fi.endswith('.fasta.out'):
            continue

        if getsize(join(filepath, fi)) == 0:
            continue

        print(fi)

        # parse outputs
        ec = fi.split('_')[0]
        hmm_data = parse_hmm_output(join(filepath, fi), ec)

        # add to the main data
        if all_domain_data is None:
            all_domain_data = hmm_data
        else:
            all_domain_data = all_domain_data.append(hmm_data)

    all_domain_data.to_csv(outfile, sep='\t', index=False)
    
    
# run analysis
outfile = join(INTERMEDIATE, 'BRENDA_for_paper', 'pfam_hmm_results.tsv')
if not exists(outfile):
    get_pfam_domains()

### There are sometimes several predictions for the same region of a protein, but we only want to keep one

In [8]:
df = pd.read_csv(outfile, sep='\t')

display(df.head())
display(df.tail())
display(df.describe())

Unnamed: 0,ec,uid,hmm_model,pfam,hmm_model_len,hmm_match_from,hmm_match_to,hmm_match_coverage,match_evalue,gene_match_from,gene_match_to
0,1.1.1.100,A0A137SKR6,3Beta_HSD,PF01073.20,280,1,158,0.56,2.2e-16,4,166
1,1.1.1.100,A0A0F4VT46,ABC_tran,PF00005.28,137,2,137,0.99,8.3e-29,23,177
2,1.1.1.100,F8GQY5,Abhydrolase_1,PF00561.21,257,2,111,0.42,2.9e-27,78,187
3,1.1.1.100,A0A0N1NPA0,Abhydrolase_1,PF00561.21,257,1,252,0.98,1.1e-20,29,280
4,1.1.1.100,F8GQY5,Abhydrolase_6,PF12697.8,220,1,216,0.98,3.2000000000000003e-22,79,310


Unnamed: 0,ec,uid,hmm_model,pfam,hmm_model_len,hmm_match_from,hmm_match_to,hmm_match_coverage,match_evalue,gene_match_from,gene_match_to
8553087,7.6.2.9,D4YFX4,OpuAC,PF04069.13,257,3,257,0.99,7.499999999999999e-48,39,304
8553088,7.6.2.9,E0QLH2,OpuAC,PF04069.13,257,3,257,0.99,3.2e-46,10,275
8553089,7.6.2.9,S2K9H2,OpuAC,PF04069.13,257,160,257,0.38,1.6999999999999998e-30,4,102
8553090,7.6.2.9,G0QK58,Pkinase,PF00069.26,264,1,240,0.91,5.1e-55,84,325
8553091,7.6.2.9,G0QK58,PK_Tyr_Ser-Thr,PF07714.18,259,2,215,0.82,7.4999999999999996e-34,85,295


Unnamed: 0,hmm_model_len,hmm_match_from,hmm_match_to,hmm_match_coverage,match_evalue,gene_match_from,gene_match_to
count,8553092.0,8553092.0,8553092.0,8553092.0,8553092.0,8553092.0,8553092.0
mean,190.2124,6.33366,183.175,0.929905,1.3016620000000002e-17,189.8545,371.6371
std,112.8297,21.47707,109.6854,0.1244718,7.983808000000001e-17,487.4608,483.5598
min,10.0,1.0,6.0,0.35,0.0,1.0,10.0
25%,108.0,1.0,104.0,0.94,7.3e-65,12.0,191.0
50%,166.0,2.0,159.0,0.98,8.6e-40,60.0,299.0
75%,255.0,3.0,244.0,0.99,4.1000000000000003e-25,254.0,443.0
max,2048.0,912.0,2048.0,1.0,1e-15,35150.0,35210.0


### First figure out what the cutoff should be in terms of what is an overlap

In [11]:
def test_overlap(pos1, pos2, cutoff):
    '''
    Find out whether two defined regions overlap or not.
    Overlap is defined True when at least one of the domains 
    overlap with the other above a certain fraction of the whole.
    '''
    pos1 = [int(s) for s in pos1]
    pos2 = [int(s) for s in pos2]
    both_pairs = sorted([pos1, pos2])
    
    # simplest case, they do not overlap at all
    if both_pairs[0][1] < both_pairs[1][0]:
        return False
    
    else:
        # get overlap part
        overlap_region = both_pairs[0][1] - both_pairs[1][0]
        
        # fraction of first domain
        frac_first = overlap_region / (both_pairs[0][1] - both_pairs[0][0])
                          
        # fraction of second domain
        frac_second = overlap_region / (both_pairs[1][1] - both_pairs[1][0])
        
        if max(frac_first, frac_second) > cutoff:
            #print(both_pairs, True, max(frac_first, frac_second))
            return True
        else:
            #print(both_pairs, False, max(frac_first, frac_second))
            return False
    

    
    
def remove_overlaps(data, cutoff):
    '''
    When domains overlap (as defined by the cutoff level), 
    keep only the domain with the best e-value score.
    '''
    overlaps = [True]
    while any(overlaps):
        
        start_end_pairs = list(zip(data.gene_match_from.values, data.gene_match_to.values))
        e_vals = data.match_evalue.values
        e_vals = e_vals.astype(np.float) 

        
        for i in tqdm(range(len(start_end_pairs))):
            overlaps = []
            for j in range(len(start_end_pairs[i+1:])):

                overlaps.append(test_overlap(start_end_pairs[i], start_end_pairs[i+1+j], cutoff))

            # prepend true or false for the first sequence, depending on whether there was an overlap
            if any(overlaps):
                overlaps = [False for s in range(i)] + [True] + overlaps

                # from the ones with an overlap, what is the position of the one with the lowest e-value
                overlap_e_vals = e_vals[overlaps]

                idx = overlap_e_vals.argmin()
                #print(overlaps)
                #print(overlap_e_vals)
                #print(i+idx)


                # flip the one that said true but had the smallest e-value
                overlaps[i+idx] = False

                # invert the list to flip True vs False
                overlaps = [not i for i in overlaps]

                # filter the data to keep all the ones that originally said False, but now say true
                data = data[overlaps]
                #print(data)
                break
                
    return data


    
# test a range of cutoff values to figure out which one is best
cutoff_vals = [s/100 for s in range(5, 105, 5)]
motifs = []
for cutoff in tqdm(cutoff_vals):
    
    temp_data_filepath = join(INTERMEDIATE, 'BRENDA_for_paper', 'motifs_result_{}.tsv'.format(cutoff))
    if not exists(temp_data_filepath):
        non_redundant_data = df.groupby('uid').apply(lambda x: remove_overlaps(x, cutoff=cutoff))
    else:
        non_redundant_data = pd.read_csv(temp_data_filepath, sep='\t')
        
    motifs.append(non_redundant_data.uid.count())


print('Unfiltered domain number: %s' % df.uid.count())



# plot the data
plt.scatter([s * 100 for s in cutoff_vals], motifs)
plt.xlabel('Pairwise domain overlap required for removal (%)')
plt.ylabel('Domain number')
plt.xticks(np.arange(0, 100+10, 10))
plt.title('Domain overlap sensitivity analysis')


plt.savefig(join(FIGURES, 'domain_sensitivity_analysis.png'), dpi=300, facecolor='w', edgecolor='w',
        orientation='portrait', papertype=None, format=None,
        transparent=False, bbox_inches=None, pad_inches=0.1,
        frameon=None, metadata=None)

plt.savefig(join(FIGURES, 'domain_sensitivity_analysis.png'), facecolor='w', edgecolor='w',
        orientation='portrait', papertype=None, format=None,
        transparent=False, bbox_inches=None, pad_inches=0.1,
        frameon=None, metadata=None)

  0%|          | 0/20 [00:00<?, ?it/s]
  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 3819.95it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 8719.97it/s]

100%|██████████| 4/4 [00:00<00:00, 43577.18it/s]

100%|██████████| 1/1 [00:00<00:00, 15650.39it/s]

  0%|          | 0/5 [00:00<?, ?it/s][A

100%|██████████| 3/3 [00:00<00:00, 18131.00it/s]

100%|██████████| 2/2 [00:00<00:00, 15141.89it/s]

100%|██████████| 3/3 [00:00<00:00, 19152.07it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 11522.81it/s]

100%|██████████| 1/1 [00:00<00:00, 7752.87it/s]

100%|██████████| 1/1 [00:00<00:00, 9776.93it/s]

100%|██████████| 1/1 [00:00<00:00, 1717.57it/s]

100%|██████████| 2/2 [00:00<00:00, 16844.59it/s]

  0%|          | 0/5 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 13617.87it/s]

100%|██████████| 1/1 [00:00<00:00, 14364.05it/s]

  0%|          | 0/4 [00:00<?, ?it/s][A

 33%|███▎     

100%|██████████| 1/1 [00:00<00:00, 10433.59it/s]

100%|██████████| 1/1 [00:00<00:00, 10180.35it/s]

100%|██████████| 2/2 [00:00<00:00, 15709.00it/s]

  0%|          | 0/8 [00:00<?, ?it/s][A

 14%|█▍        | 1/7 [00:00<00:00, 457.59it/s]

 33%|███▎      | 2/6 [00:00<00:00, 1169.63it/s]

 60%|██████    | 3/5 [00:00<00:00, 1015.41it/s]

100%|██████████| 4/4 [00:00<00:00, 22104.37it/s]

100%|██████████| 1/1 [00:00<00:00, 5343.06it/s]

100%|██████████| 1/1 [00:00<00:00, 8630.26it/s]

100%|██████████| 1/1 [00:00<00:00, 3953.16it/s]

  0%|          | 0/4 [00:00<?, ?it/s][A

 33%|███▎      | 1/3 [00:00<00:00, 218.51it/s]

100%|██████████| 2/2 [00:00<00:00, 13508.23it/s]

  0%|          | 0/4 [00:00<?, ?it/s][A

 33%|███▎      | 1/3 [00:00<00:00, 299.14it/s]

100%|██████████| 2/2 [00:00<00:00, 15006.45it/s]

100%|██████████| 1/1 [00:00<00:00, 7710.12it/s]

100%|██████████| 1/1 [00:00<00:00, 1224.61it/s]

100%|██████████| 1/1 [00:00<00:00, 9709.04it/s]

100%|██████████| 1/1 [00:00<00:00, 132

100%|██████████| 4/4 [00:00<00:00, 42690.12it/s]

100%|██████████| 3/3 [00:00<00:00, 24576.00it/s]

 14%|█▍        | 1/7 [00:00<00:00, 868.75it/s]

100%|██████████| 6/6 [00:00<00:00, 53092.46it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 10782.27it/s]

100%|██████████| 3/3 [00:00<00:00, 18724.57it/s]

100%|██████████| 1/1 [00:00<00:00, 8388.61it/s]

100%|██████████| 1/1 [00:00<00:00, 9686.61it/s]

100%|██████████| 2/2 [00:00<00:00, 14051.27it/s]

100%|██████████| 1/1 [00:00<00:00, 6700.17it/s]

100%|██████████| 1/1 [00:00<00:00, 10618.49it/s]

100%|██████████| 2/2 [00:00<00:00, 1008.37it/s]

100%|██████████| 2/2 [00:00<00:00, 18236.10it/s]

100%|██████████| 1/1 [00:00<00:00, 10082.46it/s]

100%|██████████| 1/1 [00:00<00:00, 10330.80it/s]

100%|██████████| 1/1 [00:00<00:00, 10727.12it/s]

100%|██████████| 2/2 [00:00<00:00, 18001.30it/s]

100%|██████████| 1/1 [00:00<00:00, 10866.07it/s]

100%|██████████| 1/1 [00:00<00:00, 8683.86it/s]

100%|██████████

100%|██████████| 2/2 [00:00<00:00, 16912.52it/s]

100%|██████████| 2/2 [00:00<00:00, 17225.07it/s]

100%|██████████| 2/2 [00:00<00:00, 15169.27it/s]

100%|██████████| 2/2 [00:00<00:00, 15335.66it/s]

  0%|          | 0/3 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 7612.17it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 8962.19it/s]

100%|██████████| 1/1 [00:00<00:00, 7269.16it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 9300.01it/s]

100%|██████████| 1/1 [00:00<00:00, 8738.13it/s]

100%|██████████| 1/1 [00:00<00:00, 10645.44it/s]

100%|██████████| 2/2 [00:00<00:00, 8397.01it/s]

100%|██████████| 1/1 [00:00<00:00, 8962.19it/s]

100%|██████████| 1/1 [00:00<00:00, 1615.06it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 15534.46it/s]

100%|██████████| 1/1 [00:00<00:00, 16070.13it/s]

 50%|█████     | 2/4 [00:00<00:00, 931.96it/s]

100%|██████████| 3/3 [00:00<00:00, 26214.4

100%|██████████| 2/2 [00:00<00:00, 21902.37it/s]

100%|██████████| 1/1 [00:00<00:00, 12372.58it/s]

100%|██████████| 1/1 [00:00<00:00, 12595.51it/s]

100%|██████████| 1/1 [00:00<00:00, 16513.01it/s]

100%|██████████| 1/1 [00:00<00:00, 10255.02it/s]

100%|██████████| 1/1 [00:00<00:00, 10058.28it/s]

100%|██████████| 1/1 [00:00<00:00, 1800.90it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 7639.90it/s]

100%|██████████| 1/1 [00:00<00:00, 9300.01it/s]

100%|██████████| 1/1 [00:00<00:00, 1508.74it/s]

100%|██████████| 2/2 [00:00<00:00, 3350.08it/s]

100%|██████████| 3/3 [00:00<00:00, 22795.13it/s]

100%|██████████| 1/1 [00:00<00:00, 8648.05it/s]

100%|██████████| 2/2 [00:00<00:00, 8674.88it/s]

  7%|▋         | 1/14 [00:00<00:00, 567.10it/s]

 18%|█▊        | 2/11 [00:00<00:00, 1399.50it/s]

 38%|███▊      | 3/8 [00:00<00:00, 3193.63it/s]

100%|██████████| 5/5 [00:00<00:00, 30261.93it/s]

100%|██████████| 1/1 [00:00<00:00, 7371.36it/s]

100%|██████████| 1

  0%|          | 0/4 [00:00<?, ?it/s][A

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 13934.56it/s]

100%|██████████| 1/1 [00:00<00:00, 11983.73it/s]

100%|██████████| 1/1 [00:00<00:00, 8719.97it/s]

100%|██████████| 3/3 [00:00<00:00, 22114.08it/s]

100%|██████████| 1/1 [00:00<00:00, 5899.16it/s]

100%|██████████| 1/1 [00:00<00:00, 7570.95it/s]

100%|██████████| 1/1 [00:00<00:00, 10591.68it/s]

100%|██████████| 2/2 [00:00<00:00, 18157.16it/s]

100%|██████████| 1/1 [00:00<00:00, 14122.24it/s]

100%|██████████| 2/2 [00:00<00:00, 12748.64it/s]

100%|██████████| 1/1 [00:00<00:00, 5419.00it/s]

100%|██████████| 1/1 [00:00<00:00, 4854.52it/s]

100%|██████████| 1/1 [00:00<00:00, 5940.94it/s]

100%|██████████| 1/1 [00:00<00:00, 8577.31it/s]

100%|██████████| 1/1 [00:00<00:00, 6786.90it/s]

100%|██████████| 1/1 [00:00<00:00, 7049.25it/s]

100%|██████████| 1/1 [00:00<00:00, 5991.86it/s]

100%|██████████| 1/1 [00:00<00:00, 8683.86it/s]

  0%|          | 0/2 [00:00

100%|██████████| 1/1 [00:00<00:00, 11366.68it/s]

100%|██████████| 1/1 [00:00<00:00, 9939.11it/s]

 33%|███▎      | 1/3 [00:00<00:00, 314.84it/s]

100%|██████████| 2/2 [00:00<00:00, 19373.23it/s]

100%|██████████| 1/1 [00:00<00:00, 11214.72it/s]

100%|██████████| 1/1 [00:00<00:00, 10979.85it/s]

  0%|          | 0/3 [00:00<?, ?it/s][A

100%|██████████| 2/2 [00:00<00:00, 247.56it/s]

100%|██████████| 2/2 [00:00<00:00, 15252.01it/s]

100%|██████████| 2/2 [00:00<00:00, 12501.65it/s]

100%|██████████| 1/1 [00:00<00:00, 6335.81it/s]

100%|██████████| 2/2 [00:00<00:00, 15307.68it/s]

100%|██████████| 1/1 [00:00<00:00, 7570.95it/s]

100%|██████████| 5/5 [00:00<00:00, 25025.68it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 8097.11it/s]

100%|██████████| 1/1 [00:00<00:00, 10356.31it/s]

100%|██████████| 2/2 [00:00<00:00, 17697.49it/s]

  0%|          | 0/6 [00:00<?, ?it/s][A

  0%|          | 0/4 [00:00<?, ?it/s][A

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 2/2 [00:00<00:00, 16039.40it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 6432.98it/s]

100%|██████████| 1/1 [00:00<00:00, 7219.11it/s]

100%|██████████| 1/1 [00:00<00:00, 9118.05it/s]

100%|██████████| 1/1 [00:00<00:00, 9118.05it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 11155.06it/s]

100%|██████████| 1/1 [00:00<00:00, 10082.46it/s]

100%|██████████| 1/1 [00:00<00:00, 13706.88it/s]

100%|██████████| 1/1 [00:00<00:00, 15141.89it/s]

100%|██████████| 1/1 [00:00<00:00, 15592.21it/s]

100%|██████████| 1/1 [00:00<00:00, 16256.99it/s]

100%|██████████| 1/1 [00:00<00:00, 8594.89it/s]

100%|██████████| 1/1 [00:00<00:00, 4675.92it/s]

100%|██████████| 1/1 [00:00<00:00, 8542.37it/s]

100%|██████████| 1/1 [00:00<00:00, 15420.24it/s]

 25%|██▌       | 2/8 [00:00<00:00, 2246.55it/s]

 33%|███▎      | 2/6 [00:00<00:00, 2407.75it/s]

100%|██████████| 4/4 [00:00<00:00, 43919.41it/s]

  0%|          | 0/8 [00:

100%|██████████| 3/3 [00:00<00:00, 21006.53it/s]

100%|██████████| 2/2 [00:00<00:00, 12104.77it/s]

100%|██████████| 1/1 [00:00<00:00, 1972.86it/s]

100%|██████████| 1/1 [00:00<00:00, 4048.56it/s]

100%|██████████| 1/1 [00:00<00:00, 9404.27it/s]

100%|██████████| 1/1 [00:00<00:00, 9510.89it/s]

100%|██████████| 2/2 [00:00<00:00, 15857.48it/s]

100%|██████████| 1/1 [00:00<00:00, 8144.28it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 9619.96it/s]

100%|██████████| 1/1 [00:00<00:00, 8272.79it/s]

100%|██████████| 1/1 [00:00<00:00, 8665.92it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 8289.14it/s]

100%|██████████| 2/2 [00:00<00:00, 15917.66it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 9177.91it/s]

100%|██████████| 2/2 [00:00<00:00, 15033.35it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 8648.05it/s]

100%|██████████| 1/1 [00:00<00:00, 482.83it

100%|██████████| 2/2 [00:00<00:00, 19065.02it/s]

100%|██████████| 2/2 [00:00<00:00, 18275.83it/s]

100%|██████████| 3/3 [00:00<00:00, 23519.46it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 3975.64it/s]

100%|██████████| 2/2 [00:00<00:00, 15650.39it/s]

100%|██████████| 1/1 [00:00<00:00, 7219.11it/s]

100%|██████████| 1/1 [00:00<00:00, 7738.57it/s]

100%|██████████| 1/1 [00:00<00:00, 4169.29it/s]

100%|██████████| 4/4 [00:00<00:00, 25497.29it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 9664.29it/s]

100%|██████████| 1/1 [00:00<00:00, 6195.43it/s]

100%|██████████| 1/1 [00:00<00:00, 5940.94it/s]

100%|██████████| 2/2 [00:00<00:00, 15947.92it/s]

100%|██████████| 1/1 [00:00<00:00, 9341.43it/s]

100%|██████████| 1/1 [00:00<00:00, 8981.38it/s]

100%|██████████| 1/1 [00:00<00:00, 7710.12it/s]

100%|██████████| 3/3 [00:00<00:00, 20100.50it/s]

 14%|█▍        | 1/7 [00:00<00:00, 473.77it/s]

 25%|██▌       | 1/4 [00:00<

  0%|          | 0/3 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 9532.51it/s]

100%|██████████| 1/1 [00:00<00:00, 9664.29it/s]

100%|██████████| 1/1 [00:00<00:00, 11586.48it/s]

100%|██████████| 2/2 [00:00<00:00, 20311.40it/s]

  0%|          | 0/4 [00:00<?, ?it/s][A

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 11214.72it/s]

100%|██████████| 2/2 [00:00<00:00, 23109.11it/s]

100%|██████████| 1/1 [00:00<00:00, 7244.05it/s]

100%|██████████| 1/1 [00:00<00:00, 14979.66it/s]

100%|██████████| 1/1 [00:00<00:00, 14217.98it/s]

100%|██████████| 1/1 [00:00<00:00, 13148.29it/s]

100%|██████████| 1/1 [00:00<00:00, 13025.79it/s]

100%|██████████| 2/2 [00:00<00:00, 23366.60it/s]

100%|██████████| 1/1 [00:00<00:00, 13315.25it/s]

100%|██████████| 1/1 [00:00<00:00, 12671.61it/s]

100%|██████████| 1/1 [00:00<00:00, 13107.20it/s]

100%|██████████| 2/2 [00:00<00:00, 17189.77it/s]

100%|██████████| 3/3 [00:00<00:00, 17722.41it/s]

100%|██████████| 1/1 [00:00

100%|██████████| 1/1 [00:00<00:00, 8542.37it/s]

100%|██████████| 2/2 [00:00<00:00, 17886.16it/s]

100%|██████████| 1/1 [00:00<00:00, 1608.86it/s]

100%|██████████| 1/1 [00:00<00:00, 9118.05it/s]

100%|██████████| 5/5 [00:00<00:00, 30705.01it/s]

100%|██████████| 1/1 [00:00<00:00, 582.54it/s]

100%|██████████| 1/1 [00:00<00:00, 9341.43it/s]

100%|██████████| 1/1 [00:00<00:00, 8577.31it/s]

 25%|██▌       | 2/8 [00:00<00:00, 795.96it/s]

 33%|███▎      | 2/6 [00:00<00:00, 1458.63it/s]

100%|██████████| 4/4 [00:00<00:00, 28777.39it/s]

100%|██████████| 1/1 [00:00<00:00, 9058.97it/s]

100%|██████████| 1/1 [00:00<00:00, 8081.51it/s]

 33%|███▎      | 1/3 [00:00<00:00, 405.21it/s]

100%|██████████| 2/2 [00:00<00:00, 17331.83it/s]

100%|██████████| 1/1 [00:00<00:00, 10058.28it/s]

100%|██████████| 1/1 [00:00<00:00, 1749.08it/s]

100%|██████████| 1/1 [00:00<00:00, 8289.14it/s]

  0%|          | 0/5 [00:00<?, ?it/s][A

 25%|██▌       | 1/4 [00:00<00:00, 656.90it/s]

100%|██████████| 3/3 [00:0

100%|██████████| 1/1 [00:00<00:00, 9341.43it/s]

100%|██████████| 1/1 [00:00<00:00, 9709.04it/s]

100%|██████████| 1/1 [00:00<00:00, 7598.38it/s]

100%|██████████| 1/1 [00:00<00:00, 9341.43it/s]

100%|██████████| 1/1 [00:00<00:00, 7307.15it/s]

100%|██████████| 1/1 [00:00<00:00, 8272.79it/s]

100%|██████████| 1/1 [00:00<00:00, 9664.29it/s]

100%|██████████| 1/1 [00:00<00:00, 1563.29it/s]

100%|██████████| 1/1 [00:00<00:00, 4350.94it/s]

100%|██████████| 1/1 [00:00<00:00, 9642.08it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 8756.38it/s]

100%|██████████| 1/1 [00:00<00:00, 9177.91it/s]

100%|██████████| 1/1 [00:00<00:00, 7584.64it/s]

100%|██████████| 1/1 [00:00<00:00, 7796.10it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 653.01it/s]

100%|██████████| 1/1 [00:00<00:00, 9425.40it/s]

100%|██████████| 1/1 [00:00<00:00, 4076.10it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 7943.76

 67%|██████▋   | 4/6 [00:00<00:00, 1944.28it/s]

100%|██████████| 5/5 [00:00<00:00, 23198.58it/s]

100%|██████████| 1/1 [00:00<00:00, 4369.07it/s]

100%|██████████| 2/2 [00:00<00:00, 15797.76it/s]

100%|██████████| 2/2 [00:00<00:00, 13706.88it/s]

100%|██████████| 1/1 [00:00<00:00, 8577.31it/s]

100%|██████████| 1/1 [00:00<00:00, 8719.97it/s]

100%|██████████| 1/1 [00:00<00:00, 8701.88it/s]

100%|██████████| 2/2 [00:00<00:00, 15169.27it/s]

100%|██████████| 2/2 [00:00<00:00, 20360.70it/s]

100%|██████████| 2/2 [00:00<00:00, 3225.15it/s]

  0%|          | 0/4 [00:00<?, ?it/s][A

 33%|███▎      | 1/3 [00:00<00:00, 869.11it/s]

100%|██████████| 2/2 [00:00<00:00, 23431.87it/s]

100%|██████████| 2/2 [00:00<00:00, 24528.09it/s]

100%|██████████| 1/1 [00:00<00:00, 13189.64it/s]

100%|██████████| 1/1 [00:00<00:00, 9664.29it/s]

100%|██████████| 1/1 [00:00<00:00, 12787.51it/s]

100%|██████████| 1/1 [00:00<00:00, 14169.95it/s]

100%|██████████| 2/2 [00:00<00:00, 23563.51it/s]

 22%|██▏       | 

100%|██████████| 1/1 [00:00<00:00, 13189.64it/s]

100%|██████████| 1/1 [00:00<00:00, 7612.17it/s]

100%|██████████| 1/1 [00:00<00:00, 5825.42it/s]

100%|██████████| 1/1 [00:00<00:00, 10010.27it/s]

100%|██████████| 2/2 [00:00<00:00, 17260.51it/s]

100%|██████████| 1/1 [00:00<00:00, 14217.98it/s]

100%|██████████| 1/1 [00:00<00:00, 13357.66it/s]

100%|██████████| 1/1 [00:00<00:00, 12945.38it/s]

100%|██████████| 1/1 [00:00<00:00, 10155.70it/s]

  0%|          | 0/4 [00:00<?, ?it/s][A

 33%|███▎      | 1/3 [00:00<00:00, 605.50it/s]

100%|██████████| 2/2 [00:00<00:00, 17549.39it/s]

100%|██████████| 1/1 [00:00<00:00, 10810.06it/s]

100%|██████████| 1/1 [00:00<00:00, 10305.42it/s]

100%|██████████| 1/1 [00:00<00:00, 3328.81it/s]

100%|██████████| 1/1 [00:00<00:00, 3279.36it/s]

100%|██████████| 1/1 [00:00<00:00, 10565.00it/s]

100%|██████████| 1/1 [00:00<00:00, 8612.53it/s]

100%|██████████| 1/1 [00:00<00:00, 10255.02it/s]

100%|██████████| 1/1 [00:00<00:00, 10205.12it/s]

100%|██████████

100%|██████████| 3/3 [00:00<00:00, 23003.50it/s]

100%|██████████| 1/1 [00:00<00:00, 9845.78it/s]

100%|██████████| 3/3 [00:00<00:00, 21998.10it/s]

100%|██████████| 2/2 [00:00<00:00, 15279.80it/s]

100%|██████████| 1/1 [00:00<00:00, 10082.46it/s]

100%|██████████| 1/1 [00:00<00:00, 8612.53it/s]

100%|██████████| 1/1 [00:00<00:00, 7256.58it/s]

100%|██████████| 2/2 [00:00<00:00, 17512.75it/s]

100%|██████████| 2/2 [00:00<00:00, 2207.53it/s]

100%|██████████| 1/1 [00:00<00:00, 9868.95it/s]

100%|██████████| 1/1 [00:00<00:00, 1040.25it/s]

100%|██████████| 2/2 [00:00<00:00, 13252.15it/s]

100%|██████████| 3/3 [00:00<00:00, 24291.34it/s]

100%|██████████| 1/1 [00:00<00:00, 10131.17it/s]

100%|██████████| 1/1 [00:00<00:00, 2989.53it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 7423.55it/s]

100%|██████████| 1/1 [00:00<00:00, 9731.56it/s]

100%|██████████| 1/1 [00:00<00:00, 8439.24it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:0

  0%|          | 0/4 [00:00<?, ?it/s][A

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 9446.63it/s]

100%|██████████| 1/1 [00:00<00:00, 1626.33it/s]

100%|██████████| 2/2 [00:00<00:00, 9157.87it/s]

100%|██████████| 1/1 [00:00<00:00, 7516.67it/s]

100%|██████████| 2/2 [00:00<00:00, 17512.75it/s]

100%|██████████| 1/1 [00:00<00:00, 8559.80it/s]

100%|██████████| 2/2 [00:00<00:00, 17734.90it/s]

100%|██████████| 1/1 [00:00<00:00, 9619.96it/s]

100%|██████████| 1/1 [00:00<00:00, 6502.80it/s]

100%|██████████| 1/1 [00:00<00:00, 7096.96it/s]

100%|██████████| 1/1 [00:00<00:00, 6061.13it/s]

100%|██████████| 1/1 [00:00<00:00, 398.77it/s]

100%|██████████| 2/2 [00:00<00:00, 17296.10it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 512.94it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 10407.70it/s]

100%|██████████| 1/1 [00:00<00:00, 9218.25it/s]

100%|██████████| 2/2 [00:00<00:00, 4478.70it/s

100%|██████████| 4/4 [00:00<00:00, 23831.27it/s]

100%|██████████| 3/3 [00:00<00:00, 21807.47it/s]

100%|██████████| 1/1 [00:00<00:00, 13189.64it/s]

100%|██████████| 1/1 [00:00<00:00, 13706.88it/s]

  0%|          | 0/4 [00:00<?, ?it/s][A

 33%|███▎      | 1/3 [00:00<00:00, 561.49it/s]

100%|██████████| 2/2 [00:00<00:00, 25040.62it/s]

100%|██████████| 1/1 [00:00<00:00, 12372.58it/s]

100%|██████████| 4/4 [00:00<00:00, 41425.22it/s]

100%|██████████| 2/2 [00:00<00:00, 19195.90it/s]

100%|██████████| 1/1 [00:00<00:00, 10810.06it/s]

100%|██████████| 1/1 [00:00<00:00, 13357.66it/s]

100%|██████████| 1/1 [00:00<00:00, 12520.31it/s]

100%|██████████| 1/1 [00:00<00:00, 10058.28it/s]

100%|██████████| 1/1 [00:00<00:00, 14027.77it/s]

100%|██████████| 1/1 [00:00<00:00, 909.83it/s]

100%|██████████| 4/4 [00:00<00:00, 30671.33it/s]

100%|██████████| 2/2 [00:00<00:00, 11715.93it/s]

100%|██████████| 2/2 [00:00<00:00, 17660.23it/s]

100%|██████████| 1/1 [00:00<00:00, 9939.11it/s]

100%|████████

100%|██████████| 1/1 [00:00<00:00, 8943.08it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 8962.19it/s]

100%|██████████| 2/2 [00:00<00:00, 14820.86it/s]

100%|██████████| 2/2 [00:00<00:00, 13066.37it/s]

  0%|          | 0/8 [00:00<?, ?it/s][A

  0%|          | 0/5 [00:00<?, ?it/s][A

 33%|███▎      | 1/3 [00:00<00:00, 496.90it/s]

100%|██████████| 2/2 [00:00<00:00, 13294.15it/s]

  0%|          | 0/4 [00:00<?, ?it/s][A

 33%|███▎      | 1/3 [00:00<00:00, 403.80it/s]

100%|██████████| 2/2 [00:00<00:00, 16980.99it/s]

100%|██████████| 2/2 [00:00<00:00, 7025.63it/s]

 33%|███▎      | 1/3 [00:00<00:00, 709.94it/s]

100%|██████████| 2/2 [00:00<00:00, 17549.39it/s]

100%|██████████| 1/1 [00:00<00:00, 7724.32it/s]

  0%|          | 0/3 [00:00<?, ?it/s][A

100%|██████████| 2/2 [00:00<00:00, 8136.38it/s]

100%|██████████| 1/1 [00:00<00:00, 7503.23it/s]

100%|██████████| 1/1 [00:00<00:00, 5159.05it/s]

  0%|          | 0/14 [00:00<?, ?it/s][A

  0%|     

100%|██████████| 1/1 [00:00<00:00, 10010.27it/s]

100%|██████████| 1/1 [00:00<00:00, 516.03it/s]

100%|██████████| 1/1 [00:00<00:00, 5065.58it/s]

100%|██████████| 1/1 [00:00<00:00, 4100.00it/s]

100%|██████████| 2/2 [00:00<00:00, 13911.46it/s]

100%|██████████| 1/1 [00:00<00:00, 8648.05it/s]

100%|██████████| 1/1 [00:00<00:00, 7096.96it/s]

100%|██████████| 2/2 [00:00<00:00, 1824.01it/s]

100%|██████████| 1/1 [00:00<00:00, 9731.56it/s]

100%|██████████| 4/4 [00:00<00:00, 23269.37it/s]

100%|██████████| 1/1 [00:00<00:00, 6636.56it/s]

100%|██████████| 2/2 [00:00<00:00, 16844.59it/s]

100%|██████████| 1/1 [00:00<00:00, 9039.45it/s]

100%|██████████| 1/1 [00:00<00:00, 5146.39it/s]

100%|██████████| 1/1 [00:00<00:00, 9892.23it/s]

  0%|          | 0/8 [00:00<?, ?it/s][A

  0%|          | 0/6 [00:00<?, ?it/s][A

  0%|          | 0/4 [00:00<?, ?it/s][A

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 9962.72it/s]

100%|██████████| 1/1 [00:00<00:00, 5322.72it/

100%|██████████| 2/2 [00:00<00:00, 11037.64it/s]

100%|██████████| 1/1 [00:00<00:00, 10618.49it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 11008.67it/s]

100%|██████████| 1/1 [00:00<00:00, 13751.82it/s]

100%|██████████| 1/1 [00:00<00:00, 13231.24it/s]

 33%|███▎      | 1/3 [00:00<00:00, 1099.14it/s]

100%|██████████| 2/2 [00:00<00:00, 22671.91it/s]

100%|██████████| 2/2 [00:00<00:00, 24818.37it/s]

100%|██████████| 1/1 [00:00<00:00, 13751.82it/s]

100%|██████████| 2/2 [00:00<00:00, 23237.14it/s]

100%|██████████| 1/1 [00:00<00:00, 9489.38it/s]

100%|██████████| 1/1 [00:00<00:00, 6955.73it/s]

100%|██████████| 1/1 [00:00<00:00, 12985.46it/s]

100%|██████████| 1/1 [00:00<00:00, 10538.45it/s]

100%|██████████| 1/1 [00:00<00:00, 13231.24it/s]

100%|██████████| 2/2 [00:00<00:00, 20116.57it/s]

100%|██████████| 1/1 [00:00<00:00, 13706.88it/s]

100%|██████████| 1/1 [00:00<00:00, 13189.64it/s]

  0%|          | 0/4 [00:00<?, ?it/s][A

 33%|███▎      | 1/

  0%|          | 0/4 [00:00<?, ?it/s][A

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 16448.25it/s]

100%|██████████| 1/1 [00:00<00:00, 10837.99it/s]

100%|██████████| 1/1 [00:00<00:00, 8774.69it/s]

100%|██████████| 2/2 [00:00<00:00, 23763.76it/s]

100%|██████████| 1/1 [00:00<00:00, 4655.17it/s]

100%|██████████| 1/1 [00:00<00:00, 13662.23it/s]

100%|██████████| 1/1 [00:00<00:00, 9915.61it/s]

100%|██████████| 1/1 [00:00<00:00, 13025.79it/s]

100%|██████████| 2/2 [00:00<00:00, 22369.62it/s]

100%|██████████| 2/2 [00:00<00:00, 23967.45it/s]

100%|██████████| 2/2 [00:00<00:00, 767.70it/s]

100%|██████████| 1/1 [00:00<00:00, 13148.29it/s]

  0%|          | 0/8 [00:00<?, ?it/s][A

  0%|          | 0/5 [00:00<?, ?it/s][A

100%|██████████| 3/3 [00:00<00:00, 24244.53it/s]

100%|██████████| 1/1 [00:00<00:00, 2259.86it/s]

100%|██████████| 2/2 [00:00<00:00, 23629.88it/s]

100%|██████████| 1/1 [00:00<00:00, 13231.24it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 13617.87it/s]

100%|██████████| 1/1 [00:00<00:00, 14716.86it/s]

100%|██████████| 1/1 [00:00<00:00, 9939.11it/s]

100%|██████████| 1/1 [00:00<00:00, 12336.19it/s]

100%|██████████| 1/1 [00:00<00:00, 12087.33it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 14665.40it/s]

100%|██████████| 2/2 [00:00<00:00, 24105.20it/s]

100%|██████████| 1/1 [00:00<00:00, 4917.12it/s]

100%|██████████| 1/1 [00:00<00:00, 13400.33it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 13189.64it/s]

100%|██████████| 3/3 [00:00<00:00, 18641.35it/s]

100%|██████████| 2/2 [00:00<00:00, 25343.23it/s]

100%|██████████| 1/1 [00:00<00:00, 14074.85it/s]

  0%|          | 0/3 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 539.18it/s]

100%|██████████| 1/1 [00:00<00:00, 13888.42it/s]

100%|██████████| 1/1 [00:00<00:00, 14665.40it/s]

100%|██████████| 1/1 [00:00<00:00, 8

100%|██████████| 2/2 [00:00<00:00, 18001.30it/s]

100%|██████████| 2/2 [00:00<00:00, 18315.74it/s]

100%|██████████| 1/1 [00:00<00:00, 11491.24it/s]

100%|██████████| 2/2 [00:00<00:00, 19553.86it/s]

100%|██████████| 1/1 [00:00<00:00, 9597.95it/s]

100%|██████████| 1/1 [00:00<00:00, 10782.27it/s]

100%|██████████| 1/1 [00:00<00:00, 10255.02it/s]

100%|██████████| 1/1 [00:00<00:00, 9986.44it/s]

 25%|██▌       | 1/4 [00:00<00:00, 99.19it/s]

100%|██████████| 2/2 [00:00<00:00, 17512.75it/s]

100%|██████████| 1/1 [00:00<00:00, 8035.07it/s]

100%|██████████| 3/3 [00:00<00:00, 17672.63it/s]

100%|██████████| 4/4 [00:00<00:00, 15279.80it/s]

100%|██████████| 1/1 [00:00<00:00, 6232.25it/s]

  0%|          | 0/4 [00:00<?, ?it/s][A

 33%|███▎      | 1/3 [00:00<00:00, 493.33it/s]

100%|██████████| 2/2 [00:00<00:00, 12282.00it/s]

100%|██████████| 2/2 [00:00<00:00, 16256.99it/s]

100%|██████████| 3/3 [00:00<00:00, 19328.59it/s]

100%|██████████| 1/1 [00:00<00:00, 9039.45it/s]

100%|██████████| 1

100%|██████████| 1/1 [00:00<00:00, 9177.91it/s]

  0%|          | 0/4 [00:00<?, ?it/s][A

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 9218.25it/s]

100%|██████████| 1/1 [00:00<00:00, 8050.49it/s]

100%|██████████| 1/1 [00:00<00:00, 10280.16it/s]

 33%|███▎      | 1/3 [00:00<00:00, 114.95it/s]

100%|██████████| 2/2 [00:00<00:00, 16384.00it/s]

100%|██████████| 2/2 [00:00<00:00, 14899.84it/s]

100%|██████████| 1/1 [00:00<00:00, 4021.38it/s]

  0%|          | 0/20 [00:00<?, ?it/s][A

  0%|          | 0/16 [00:00<?, ?it/s][A

  0%|          | 0/12 [00:00<?, ?it/s][A

  0%|          | 0/9 [00:00<?, ?it/s][A

  0%|          | 0/6 [00:00<?, ?it/s][A

100%|██████████| 3/3 [00:00<00:00, 20627.72it/s]

 33%|███▎      | 1/3 [00:00<00:00, 351.22it/s]

100%|██████████| 2/2 [00:00<00:00, 14169.95it/s]

100%|██████████| 5/5 [00:00<00:00, 26479.19it/s]

100%|██████████| 3/3 [00:00<00:00, 21290.88it/s]

100%|██████████| 1/1 [00:00<00:00, 9446.63it/s]

  0%|       

  0%|          | 0/42 [00:00<?, ?it/s][A

  0%|          | 0/38 [00:00<?, ?it/s][A

  0%|          | 0/34 [00:00<?, ?it/s][A

  0%|          | 0/30 [00:00<?, ?it/s][A

  0%|          | 0/25 [00:00<?, ?it/s][A

  0%|          | 0/17 [00:00<?, ?it/s][A

  0%|          | 0/15 [00:00<?, ?it/s][A

  0%|          | 0/11 [00:00<?, ?it/s][A

  0%|          | 0/8 [00:00<?, ?it/s][A

100%|██████████| 4/4 [00:00<00:00, 23967.45it/s]

100%|██████████| 2/2 [00:00<00:00, 17189.77it/s]

100%|██████████| 1/1 [00:00<00:00, 8473.34it/s]

100%|██████████| 1/1 [00:00<00:00, 7598.38it/s]

100%|██████████| 1/1 [00:00<00:00, 9664.29it/s]

100%|██████████| 1/1 [00:00<00:00, 8665.92it/s]

100%|██████████| 2/2 [00:00<00:00, 8073.73it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 6743.25it/s]

  0%|          | 0/56 [00:00<?, ?it/s][A

  0%|          | 0/50 [00:00<?, ?it/s][A

  0%|          | 0/45 [00:00<?, ?it/s][A

  0%|          | 0/40 [00:00<?, ?it/s][A

  0%| 

100%|██████████| 2/2 [00:00<00:00, 17296.10it/s]

100%|██████████| 1/1 [00:00<00:00, 9279.43it/s]

100%|██████████| 1/1 [00:00<00:00, 9039.45it/s]

100%|██████████| 2/2 [00:00<00:00, 16194.22it/s]

100%|██████████| 1/1 [00:00<00:00, 7397.36it/s]

100%|██████████| 2/2 [00:00<00:00, 16225.55it/s]

100%|██████████| 1/1 [00:00<00:00, 9664.29it/s]

100%|██████████| 1/1 [00:00<00:00, 8224.13it/s]

100%|██████████| 2/2 [00:00<00:00, 15738.48it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A

 14%|█▍        | 1/7 [00:00<00:00, 503.88it/s]

 33%|███▎      | 2/6 [00:00<00:00, 1291.75it/s]

 60%|██████    | 3/5 [00:00<00:00, 1322.01it/s]

100%|██████████| 4/4 [00:00<00:00, 2212.19it/s]

100%|██████████| 1/1 [00:00<00:00, 9939.11it/s]

100%|██████████| 3/3 [00:00<00:00, 6168.09it/s]

100%|██████████| 1/1 [00:00<00:00, 9425.40it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 9258.95it/s]

100%|██████████| 1/1 [00:00<00:00, 7667.83it/s]

100%|██████████| 2/2 [00:00<00

100%|██████████| 1/1 [00:00<00:00, 7049.25it/s]

100%|██████████| 1/1 [00:00<00:00, 1974.72it/s]

100%|██████████| 1/1 [00:00<00:00, 9177.91it/s]

100%|██████████| 1/1 [00:00<00:00, 9137.92it/s]

100%|██████████| 1/1 [00:00<00:00, 1724.63it/s]

100%|██████████| 1/1 [00:00<00:00, 2223.92it/s]

100%|██████████| 1/1 [00:00<00:00, 9532.51it/s]

100%|██████████| 3/3 [00:00<00:00, 23652.09it/s]

100%|██████████| 1/1 [00:00<00:00, 8490.49it/s]

100%|██████████| 1/1 [00:00<00:00, 8594.89it/s]

100%|██████████| 1/1 [00:00<00:00, 7898.88it/s]

100%|██████████| 1/1 [00:00<00:00, 9731.56it/s]

100%|██████████| 2/2 [00:00<00:00, 14873.42it/s]

100%|██████████| 1/1 [00:00<00:00, 10155.70it/s]

100%|██████████| 1/1 [00:00<00:00, 9532.51it/s]

100%|██████████| 1/1 [00:00<00:00, 357.14it/s]

100%|██████████| 1/1 [00:00<00:00, 10672.53it/s]

100%|██████████| 1/1 [00:00<00:00, 8981.38it/s]

100%|██████████| 2/2 [00:00<00:00, 16743.73it/s]

100%|██████████| 1/1 [00:00<00:00, 9986.44it/s]

100%|██████████|

100%|██████████| 2/2 [00:00<00:00, 21732.15it/s]

100%|██████████| 1/1 [00:00<00:00, 5084.00it/s]

100%|██████████| 3/3 [00:00<00:00, 2552.83it/s]

100%|██████████| 1/1 [00:00<00:00, 11881.88it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 4136.39it/s]

100%|██████████| 1/1 [00:00<00:00, 13662.23it/s]

100%|██████████| 1/1 [00:00<00:00, 13148.29it/s]

100%|██████████| 1/1 [00:00<00:00, 8886.24it/s]

100%|██████████| 1/1 [00:00<00:00, 1314.01it/s]

100%|██████████| 1/1 [00:00<00:00, 9300.01it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A

  0%|          | 0/8 [00:00<?, ?it/s][A

  0%|          | 0/6 [00:00<?, ?it/s][A

  0%|          | 0/4 [00:00<?, ?it/s][A

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 12446.01it/s]

100%|██████████| 2/2 [00:00<00:00, 24244.53it/s]

100%|██████████| 1/1 [00:00<00:00, 14217.98it/s]

100%|██████████| 1/1 [00:00<00:00, 9664.29it/s]

100%|██████████| 1/1 [00:00<00:00, 10538.45it/s]

100%

100%|██████████| 2/2 [00:00<00:00, 13210.41it/s]

100%|██████████| 1/1 [00:00<00:00, 8405.42it/s]

100%|██████████| 1/1 [00:00<00:00, 10034.22it/s]

100%|██████████| 1/1 [00:00<00:00, 9238.56it/s]

  0%|          | 0/4 [00:00<?, ?it/s][A

 33%|███▎      | 1/3 [00:00<00:00, 683.00it/s]

100%|██████████| 2/2 [00:00<00:00, 15448.63it/s]

100%|██████████| 2/2 [00:00<00:00, 11949.58it/s]

100%|██████████| 2/2 [00:00<00:00, 17260.51it/s]

100%|██████████| 1/1 [00:00<00:00, 8867.45it/s]

100%|██████████| 1/1 [00:00<00:00, 7724.32it/s]

100%|██████████| 2/2 [00:00<00:00, 1228.74it/s]

100%|██████████| 2/2 [00:00<00:00, 14146.05it/s]

100%|██████████| 1/1 [00:00<00:00, 10131.17it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 10330.80it/s]

100%|██████████| 1/1 [00:00<00:00, 4457.28it/s]

 33%|███▎      | 1/3 [00:00<00:00, 491.42it/s]

100%|██████████| 2/2 [00:00<00:00, 16743.73it/s]

100%|██████████| 1/1 [00:00<00:00, 2832.08it/s]

100%|██████████| 1/1 [00:00

100%|██████████| 2/2 [00:00<00:00, 9974.56it/s]

100%|██████████| 1/1 [00:00<00:00, 5356.71it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 12905.55it/s]

100%|██████████| 1/1 [00:00<00:00, 13662.23it/s]

  0%|          | 0/6 [00:00<?, ?it/s][A

  0%|          | 0/4 [00:00<?, ?it/s][A

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 9098.27it/s]

100%|██████████| 2/2 [00:00<00:00, 22857.24it/s]

100%|██████████| 1/1 [00:00<00:00, 12122.27it/s]

100%|██████████| 1/1 [00:00<00:00, 13530.01it/s]

100%|██████████| 1/1 [00:00<00:00, 11915.64it/s]

  0%|          | 0/3 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 13751.82it/s]

100%|██████████| 2/2 [00:00<00:00, 12104.77it/s]

100%|██████████| 1/1 [00:00<00:00, 2576.35it/s]

100%|██████████| 1/1 [00:00<00:00, 13189.64it/s]

100%|██████████| 1/1 [00:00<00:00, 816.33it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 11554.56it/s]

  0%

100%|██████████| 1/1 [00:00<00:00, 1731.75it/s]

100%|██████████| 2/2 [00:00<00:00, 2718.28it/s]

100%|██████████| 2/2 [00:00<00:00, 14364.05it/s]

100%|██████████| 1/1 [00:00<00:00, 8559.80it/s]

100%|██████████| 2/2 [00:00<00:00, 16448.25it/s]

100%|██████████| 1/1 [00:00<00:00, 10280.16it/s]

100%|██████████| 2/2 [00:00<00:00, 11081.38it/s]

100%|██████████| 1/1 [00:00<00:00, 7145.32it/s]

100%|██████████| 2/2 [00:00<00:00, 12139.81it/s]

100%|██████████| 1/1 [00:00<00:00, 7884.03it/s]

100%|██████████| 1/1 [00:00<00:00, 8886.24it/s]

100%|██████████| 1/1 [00:00<00:00, 988.06it/s]

100%|██████████| 2/2 [00:00<00:00, 14847.09it/s]

100%|██████████| 2/2 [00:00<00:00, 15477.14it/s]

100%|██████████| 1/1 [00:00<00:00, 6105.25it/s]

100%|██████████| 1/1 [00:00<00:00, 9058.97it/s]

100%|██████████| 1/1 [00:00<00:00, 2734.23it/s]

100%|██████████| 1/1 [00:00<00:00, 12633.45it/s]

100%|██████████| 1/1 [00:00<00:00, 10230.01it/s]

100%|██████████| 1/1 [00:00<00:00, 7639.90it/s]

100%|███████

100%|██████████| 1/1 [00:00<00:00, 8473.34it/s]

100%|██████████| 1/1 [00:00<00:00, 1116.40it/s]

100%|██████████| 1/1 [00:00<00:00, 4080.06it/s]

100%|██████████| 2/2 [00:00<00:00, 15060.34it/s]

100%|██████████| 1/1 [00:00<00:00, 9098.27it/s]

100%|██████████| 1/1 [00:00<00:00, 3663.15it/s]

100%|██████████| 1/1 [00:00<00:00, 8224.13it/s]

100%|██████████| 1/1 [00:00<00:00, 3968.12it/s]

100%|██████████| 1/1 [00:00<00:00, 5108.77it/s]

100%|██████████| 1/1 [00:00<00:00, 8867.45it/s]

  0%|          | 0/4 [00:00<?, ?it/s][A

100%|██████████| 3/3 [00:00<00:00, 21254.92it/s]

100%|██████████| 2/2 [00:00<00:00, 16677.15it/s]

100%|██████████| 1/1 [00:00<00:00, 8612.53it/s]

100%|██████████| 2/2 [00:00<00:00, 17660.23it/s]

100%|██████████| 1/1 [00:00<00:00, 8208.03it/s]

100%|██████████| 2/2 [00:00<00:00, 17260.51it/s]

100%|██████████| 1/1 [00:00<00:00, 9137.92it/s]

100%|██████████| 1/1 [00:00<00:00, 10180.35it/s]

100%|██████████| 1/1 [00:00<00:00, 10180.35it/s]

100%|██████████| 1/1

100%|██████████| 1/1 [00:00<00:00, 2371.00it/s]

100%|██████████| 1/1 [00:00<00:00, 1748.36it/s]

100%|██████████| 1/1 [00:00<00:00, 8256.50it/s]

100%|██████████| 1/1 [00:00<00:00, 9078.58it/s]

100%|██████████| 1/1 [00:00<00:00, 12865.96it/s]

100%|██████████| 2/2 [00:00<00:00, 5065.58it/s]

100%|██████████| 1/1 [00:00<00:00, 13934.56it/s]

100%|██████████| 1/1 [00:00<00:00, 12557.80it/s]

100%|██████████| 2/2 [00:00<00:00, 22610.80it/s]

100%|██████████| 1/1 [00:00<00:00, 14563.56it/s]

100%|██████████| 2/2 [00:00<00:00, 23831.27it/s]

100%|██████████| 1/1 [00:00<00:00, 13357.66it/s]

100%|██████████| 1/1 [00:00<00:00, 13400.33it/s]

100%|██████████| 1/1 [00:00<00:00, 13443.28it/s]

100%|██████████| 1/1 [00:00<00:00, 11491.24it/s]

100%|██████████| 1/1 [00:00<00:00, 11814.94it/s]

100%|██████████| 1/1 [00:00<00:00, 14563.56it/s]

100%|██████████| 2/2 [00:00<00:00, 11444.21it/s]

100%|██████████| 3/3 [00:00<00:00, 29127.11it/s]

100%|██████████| 1/1 [00:00<00:00, 10979.85it/s]

100%|

100%|██████████| 1/1 [00:00<00:00, 9039.45it/s]

100%|██████████| 1/1 [00:00<00:00, 8176.03it/s]

100%|██████████| 1/1 [00:00<00:00, 6213.78it/s]

100%|██████████| 1/1 [00:00<00:00, 5907.47it/s]

100%|██████████| 2/2 [00:00<00:00, 11732.32it/s]

100%|██████████| 2/2 [00:00<00:00, 15917.66it/s]

100%|██████████| 2/2 [00:00<00:00, 11699.59it/s]

100%|██████████| 1/1 [00:00<00:00, 10230.01it/s]

100%|██████████| 1/1 [00:00<00:00, 9642.08it/s]

100%|██████████| 2/2 [00:00<00:00, 14488.10it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 8924.05it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 9404.27it/s]

100%|██████████| 2/2 [00:00<00:00, 18157.16it/s]

100%|██████████| 1/1 [00:00<00:00, 7358.43it/s]

100%|██████████| 1/1 [00:00<00:00, 11715.93it/s]

100%|██████████| 1/1 [00:00<00:00, 9939.11it/s]

100%|██████████| 1/1 [00:00<00:00, 7943.76it/s]

100%|██████████| 1/1 [00:00<00:00, 10866.07it/s]

100%|██████████| 1/1 [00:0

100%|██████████| 1/1 [00:00<00:00, 7332.70it/s]

100%|██████████| 2/2 [00:00<00:00, 23629.88it/s]

100%|██████████| 1/1 [00:00<00:00, 5974.79it/s]

100%|██████████| 1/1 [00:00<00:00, 13573.80it/s]

100%|██████████| 1/1 [00:00<00:00, 3795.75it/s]

100%|██████████| 1/1 [00:00<00:00, 9962.72it/s]

100%|██████████| 2/2 [00:00<00:00, 22250.95it/s]

100%|██████████| 1/1 [00:00<00:00, 12748.64it/s]

100%|██████████| 3/3 [00:00<00:00, 6393.76it/s]

100%|██████████| 1/1 [00:00<00:00, 12300.01it/s]

100%|██████████| 1/1 [00:00<00:00, 3840.94it/s]

100%|██████████| 2/2 [00:00<00:00, 13273.11it/s]

100%|██████████| 1/1 [00:00<00:00, 8886.24it/s]

  0%|          | 0/8 [00:00<?, ?it/s][A

  0%|          | 0/6 [00:00<?, ?it/s][A

  0%|          | 0/4 [00:00<?, ?it/s][A

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 7854.50it/s]

100%|██████████| 1/1 [00:00<00:00, 8811.56it/s]

100%|██████████| 1/1 [00:00<00:00, 10591.68it/s]

100%|██████████| 2/2 [00:00<00:00, 17154.

100%|██████████| 1/1 [00:00<00:00, 8507.72it/s]

100%|██████████| 2/2 [00:00<00:00, 5207.08it/s]

100%|██████████| 2/2 [00:00<00:00, 11244.78it/s]

100%|██████████| 1/1 [00:00<00:00, 1895.30it/s]

100%|██████████| 1/1 [00:00<00:00, 9238.56it/s]

  0%|          | 0/4 [00:00<?, ?it/s][A

 33%|███▎      | 1/3 [00:00<00:00, 316.72it/s]

100%|██████████| 2/2 [00:00<00:00, 16480.57it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 10058.28it/s]

100%|██████████| 2/2 [00:00<00:00, 3746.59it/s]

100%|██████████| 3/3 [00:00<00:00, 22795.13it/s]

100%|██████████| 1/1 [00:00<00:00, 2460.00it/s]

100%|██████████| 1/1 [00:00<00:00, 3862.16it/s]

  0%|          | 0/4 [00:00<?, ?it/s][A

100%|██████████| 2/2 [00:00<00:00, 18040.02it/s]

100%|██████████| 1/1 [00:00<00:00, 6594.82it/s]

100%|██████████| 2/2 [00:00<00:00, 14538.32it/s]

100%|██████████| 1/1 [00:00<00:00, 3509.88it/s]

100%|██████████| 1/1 [00:00<00:00, 9664.29it/s]

100%|██████████| 1/1 [00:00<00:00, 9

100%|██████████| 1/1 [00:00<00:00, 9576.04it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 4017.53it/s]

100%|██████████| 1/1 [00:00<00:00, 8665.92it/s]

100%|██████████| 2/2 [00:00<00:00, 14027.77it/s]

100%|██████████| 1/1 [00:00<00:00, 9078.58it/s]

100%|██████████| 1/1 [00:00<00:00, 3521.67it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 10951.19it/s]

100%|██████████| 2/2 [00:00<00:00, 15006.45it/s]

100%|██████████| 1/1 [00:00<00:00, 9597.95it/s]

100%|██████████| 2/2 [00:00<00:00, 7025.63it/s]

100%|██████████| 1/1 [00:00<00:00, 4476.31it/s]

100%|██████████| 2/2 [00:00<00:00, 12390.85it/s]

100%|██████████| 2/2 [00:00<00:00, 9177.91it/s]

100%|██████████| 1/1 [00:00<00:00, 12905.55it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 13706.88it/s]

100%|██████████| 2/2 [00:00<00:00, 9068.77it/s]

100%|██████████| 1/1 [00:00<00:00, 13662.23it/s]

100%|██████████| 1/1 [00:00<00:00,

100%|██████████| 1/1 [00:00<00:00, 4691.62it/s]

100%|██████████| 1/1 [00:00<00:00, 11814.94it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 14074.85it/s]

100%|██████████| 2/2 [00:00<00:00, 13865.47it/s]

100%|██████████| 2/2 [00:00<00:00, 12264.05it/s]

100%|██████████| 1/1 [00:00<00:00, 5841.65it/s]

100%|██████████| 1/1 [00:00<00:00, 4510.00it/s]

100%|██████████| 2/2 [00:00<00:00, 24966.10it/s]

100%|██████████| 2/2 [00:00<00:00, 6177.18it/s]

100%|██████████| 1/1 [00:00<00:00, 13797.05it/s]

100%|██████████| 1/1 [00:00<00:00, 13934.56it/s]

100%|██████████| 1/1 [00:00<00:00, 13934.56it/s]

100%|██████████| 2/2 [00:00<00:00, 24036.13it/s]

  0%|          | 0/3 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 2884.67it/s]

100%|██████████| 2/2 [00:00<00:00, 16545.58it/s]

  0%|          | 0/12 [00:00<?, ?it/s][A

  9%|▉         | 1/11 [00:00<00:00, 355.18it/s]

 20%|██        | 2/10 [00:00<00:00, 682.67it/s]

 33%|███▎      | 3/9 [00:00<00

100%|██████████| 1/1 [00:00<00:00, 9000.65it/s]

100%|██████████| 1/1 [00:00<00:00, 11184.81it/s]

  0%|          | 0/4 [00:00<?, ?it/s][A

100%|██████████| 2/2 [00:00<00:00, 20311.40it/s]

100%|██████████| 1/1 [00:00<00:00, 9664.29it/s]

100%|██████████| 1/1 [00:00<00:00, 7710.12it/s]

100%|██████████| 1/1 [00:00<00:00, 4288.65it/s]

100%|██████████| 1/1 [00:00<00:00, 13357.66it/s]

100%|██████████| 1/1 [00:00<00:00, 5801.25it/s]

100%|██████████| 3/3 [00:00<00:00, 8267.35it/s]

100%|██████████| 1/1 [00:00<00:00, 8439.24it/s]

100%|██████████| 1/1 [00:00<00:00, 2970.47it/s]

100%|██████████| 1/1 [00:00<00:00, 13617.87it/s]

100%|██████████| 2/2 [00:00<00:00, 15709.00it/s]

100%|██████████| 4/4 [00:00<00:00, 40524.68it/s]

100%|██████████| 1/1 [00:00<00:00, 12671.61it/s]

100%|██████████| 2/2 [00:00<00:00, 23431.87it/s]

  0%|          | 0/4 [00:00<?, ?it/s][A

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 13066.37it/s]

100%|██████████| 2/2 [00:00<00:0

100%|██████████| 1/1 [00:00<00:00, 6955.73it/s]

100%|██████████| 2/2 [00:00<00:00, 15224.33it/s]

100%|██████████| 1/1 [00:00<00:00, 5683.34it/s]

100%|██████████| 1/1 [00:00<00:00, 9754.20it/s]

100%|██████████| 1/1 [00:00<00:00, 3572.66it/s]

100%|██████████| 2/2 [00:00<00:00, 15947.92it/s]

100%|██████████| 1/1 [00:00<00:00, 3606.45it/s]

100%|██████████| 1/1 [00:00<00:00, 9754.20it/s]

100%|██████████| 2/2 [00:00<00:00, 15252.01it/s]

100%|██████████| 1/1 [00:00<00:00, 7002.18it/s]

100%|██████████| 1/1 [00:00<00:00, 6297.75it/s]

100%|██████████| 1/1 [00:00<00:00, 4021.38it/s]

100%|██████████| 1/1 [00:00<00:00, 8774.69it/s]

100%|██████████| 1/1 [00:00<00:00, 9962.72it/s]

100%|██████████| 2/2 [00:00<00:00, 5522.45it/s]

100%|██████████| 1/1 [00:00<00:00, 8224.13it/s]

100%|██████████| 1/1 [00:00<00:00, 9300.01it/s]

100%|██████████| 1/1 [00:00<00:00, 10672.53it/s]

100%|██████████| 1/1 [00:00<00:00, 9425.40it/s]

100%|██████████| 1/1 [00:00<00:00, 5405.03it/s]

100%|██████████|

100%|██████████| 1/1 [00:00<00:00, 12300.01it/s]

100%|██████████| 1/1 [00:00<00:00, 13486.51it/s]

100%|██████████| 3/3 [00:00<00:00, 10459.61it/s]

100%|██████████| 1/1 [00:00<00:00, 11366.68it/s]

100%|██████████| 1/1 [00:00<00:00, 3628.29it/s]

100%|██████████| 1/1 [00:00<00:00, 7530.17it/s]

100%|██████████| 1/1 [00:00<00:00, 8793.09it/s]

100%|██████████| 8/8 [00:00<00:00, 56679.78it/s]

100%|██████████| 1/1 [00:00<00:00, 13934.56it/s]

100%|██████████| 1/1 [00:00<00:00, 13443.28it/s]

100%|██████████| 1/1 [00:00<00:00, 8719.97it/s]

100%|██████████| 1/1 [00:00<00:00, 13273.11it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 9446.63it/s]

100%|██████████| 2/2 [00:00<00:00, 18600.02it/s]

100%|██████████| 1/1 [00:00<00:00, 13189.64it/s]

100%|██████████| 2/2 [00:00<00:00, 9167.88it/s]

100%|██████████| 3/3 [00:00<00:00, 31855.47it/s]

100%|██████████| 3/3 [00:00<00:00, 14380.47it/s]

100%|██████████| 1/1 [00:00<00:00, 13888.42it/s]

100%|█████████

100%|██████████| 1/1 [00:00<00:00, 2513.06it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 5599.87it/s]

100%|██████████| 2/2 [00:00<00:00, 19239.93it/s]

100%|██████████| 3/3 [00:00<00:00, 20526.77it/s]

100%|██████████| 1/1 [00:00<00:00, 9986.44it/s]

100%|██████████| 2/2 [00:00<00:00, 17119.61it/s]

100%|██████████| 1/1 [00:00<00:00, 3545.48it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 8830.11it/s]

100%|██████████| 1/1 [00:00<00:00, 4588.95it/s]

100%|██████████| 1/1 [00:00<00:00, 10082.46it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 4481.09it/s]

100%|██████████| 1/1 [00:00<00:00, 9467.95it/s]

100%|██████████| 2/2 [00:00<00:00, 3343.41it/s]

100%|██████████| 2/2 [00:00<00:00, 17697.49it/s]

100%|██████████| 3/3 [00:00<00:00, 24672.38it/s]

100%|██████████| 2/2 [00:00<00:00, 7688.92it/s]

100%|██████████| 2/2 [00:00<00:00, 15363.75it/s]

100%|██████████| 1/1 [00:00<00:00,

  0%|          | 0/4 [00:00<?, ?it/s][A

100%|██████████| 2/2 [00:00<00:00, 5127.51it/s]

100%|██████████| 1/1 [00:00<00:00, 14266.34it/s]

100%|██████████| 1/1 [00:00<00:00, 13662.23it/s]

100%|██████████| 1/1 [00:00<00:00, 780.63it/s]

100%|██████████| 2/2 [00:00<00:00, 13640.01it/s]

100%|██████████| 1/1 [00:00<00:00, 4096.00it/s]

100%|██████████| 1/1 [00:00<00:00, 12905.55it/s]

100%|██████████| 2/2 [00:00<00:00, 15650.39it/s]

100%|██████████| 2/2 [00:00<00:00, 24745.16it/s]

100%|██████████| 1/1 [00:00<00:00, 3104.59it/s]

100%|██████████| 1/1 [00:00<00:00, 12865.96it/s]

100%|██████████| 1/1 [00:00<00:00, 11781.75it/s]

100%|██████████| 1/1 [00:00<00:00, 3986.98it/s]

100%|██████████| 1/1 [00:00<00:00, 13315.25it/s]

  0%|          | 0/2 [00:00<?, ?it/s][A

100%|██████████| 1/1 [00:00<00:00, 12018.06it/s]

100%|██████████| 1/1 [00:00<00:00, 9137.92it/s]

100%|██████████| 2/2 [00:00<00:00, 22075.28it/s]

100%|██████████| 1/1 [00:00<00:00, 9510.89it/s]

100%|██████████| 2/2 [00

KeyboardInterrupt: 

### Now remove redundant domains using the most appropriate cutoff

In [None]:
# filter out overlapping domains using pre-determined cutoff value
non_redundant_data = all_genome_data.groupby('uid').apply(lambda x: remove_overlaps(x, cutoff=0.2))

# write file to disk
non_redundant_data.to_csv(join(INTERMEDIATE, 'BRENDA_for_paper', 'pfam_hmm_results_non-redundant.tsv'), sep='\t', index=False)

### x

In [10]:
df.groupby('uid')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f12edf2d390>

# Other approach

### Download Pfam pre-calculated domain data

In [None]:
url = 'ftp://ftp.ebi.ac.uk/pub/databases/Pfam/releases/Pfam33.1/Pfam-A.full.uniprot.gz'
outfile = join(RAW_EXTERNAL, 'BRENDA_for_paper', 'Pfam-A.full.uniprot.gz')

if not exists(outfile.replace('.gz', '')):
    # download the hmm file
    my_cmd = 'wget -O {} {}'.format(outfile, url)
    os.system(my_cmd)

    # unzip file
    my_cmd = 'gunzip {}'.format(outfile)
    os.system(my_cmd)

### Get all uniprot identifiers from fasta files

In [3]:
def get_identifiers():
    '''
    Function for distributing the files on a number of workers.
    '''
    # define folder
    inpath = join(INTERMEDIATE, 'BRENDA_for_paper', 'ec_identity_clustering')

    # run assemble a list of the file to run
    data = {}
    for fi in sorted(os.listdir(inpath)):

        if not fi.endswith('90_augmented.fasta'):
            continue

        # print(fi)

        with open(join(inpath, fi), 'r') as f:
            for line in f:
                # only look at header lines
                if line.startswith('>'):
                    line_data = line.lstrip('>').rstrip().split(';')
                    uid = line_data[0]

                    data[uid] = []

    print('{} identifiers'.format(len(data.keys())))

    return data


data = get_identifiers()

  1%|          | 245/37060 [00:00<00:15, 2448.96it/s]

1.1.1.100_clustered_sequences_90_augmented.fasta
1.1.1.101_clustered_sequences_90_augmented.fasta
1.1.1.102_clustered_sequences_90_augmented.fasta
1.1.1.103_clustered_sequences_90_augmented.fasta
1.1.1.104_clustered_sequences_90_augmented.fasta
1.1.1.105_clustered_sequences_90_augmented.fasta
1.1.1.107_clustered_sequences_90_augmented.fasta
1.1.1.108_clustered_sequences_90_augmented.fasta
1.1.1.109_clustered_sequences_90_augmented.fasta
1.1.1.10_clustered_sequences_90_augmented.fasta
1.1.1.110_clustered_sequences_90_augmented.fasta
1.1.1.112_clustered_sequences_90_augmented.fasta
1.1.1.113_clustered_sequences_90_augmented.fasta
1.1.1.114_clustered_sequences_90_augmented.fasta
1.1.1.115_clustered_sequences_90_augmented.fasta
1.1.1.116_clustered_sequences_90_augmented.fasta
1.1.1.117_clustered_sequences_90_augmented.fasta
1.1.1.118_clustered_sequences_90_augmented.fasta
1.1.1.119_clustered_sequences_90_augmented.fasta
1.1.1.11_clustered_sequences_90_augmented.fasta
1.1.1.120_clustered_se

  2%|▏         | 810/37060 [00:00<00:16, 2216.51it/s]

1.1.1.232_clustered_sequences_90_augmented.fasta
1.1.1.233_clustered_sequences_90_augmented.fasta
1.1.1.234_clustered_sequences_90_augmented.fasta
1.1.1.235_clustered_sequences_90_augmented.fasta
1.1.1.236_clustered_sequences_90_augmented.fasta
1.1.1.237_clustered_sequences_90_augmented.fasta
1.1.1.238_clustered_sequences_90_augmented.fasta
1.1.1.239_clustered_sequences_90_augmented.fasta
1.1.1.23_clustered_sequences_90_augmented.fasta
1.1.1.241_clustered_sequences_90_augmented.fasta
1.1.1.242_clustered_sequences_90_augmented.fasta
1.1.1.243_clustered_sequences_90_augmented.fasta
1.1.1.244_clustered_sequences_90_augmented.fasta
1.1.1.245_clustered_sequences_90_augmented.fasta
1.1.1.246_clustered_sequences_90_augmented.fasta
1.1.1.247_clustered_sequences_90_augmented.fasta
1.1.1.248_clustered_sequences_90_augmented.fasta
1.1.1.249_clustered_sequences_90_augmented.fasta
1.1.1.24_clustered_sequences_90_augmented.fasta
1.1.1.250_clustered_sequences_90_augmented.fasta
1.1.1.251_clustered_se

  4%|▍         | 1660/37060 [00:00<00:13, 2567.62it/s]

1.1.1.360_clustered_sequences_90_augmented.fasta
1.1.1.361_clustered_sequences_90_augmented.fasta
1.1.1.362_clustered_sequences_90_augmented.fasta
1.1.1.363_clustered_sequences_90_augmented.fasta
1.1.1.364_clustered_sequences_90_augmented.fasta
1.1.1.365_clustered_sequences_90_augmented.fasta
1.1.1.366_clustered_sequences_90_augmented.fasta
1.1.1.367_clustered_sequences_90_augmented.fasta
1.1.1.368_clustered_sequences_90_augmented.fasta
1.1.1.369_clustered_sequences_90_augmented.fasta
1.1.1.36_clustered_sequences_90_augmented.fasta
1.1.1.370_clustered_sequences_90_augmented.fasta
1.1.1.371_clustered_sequences_90_augmented.fasta
1.1.1.372_clustered_sequences_90_augmented.fasta
1.1.1.373_clustered_sequences_90_augmented.fasta
1.1.1.374_clustered_sequences_90_augmented.fasta
1.1.1.375_clustered_sequences_90_augmented.fasta
1.1.1.376_clustered_sequences_90_augmented.fasta
1.1.1.377_clustered_sequences_90_augmented.fasta
1.1.1.378_clustered_sequences_90_augmented.fasta
1.1.1.37_clustered_se

  7%|▋         | 2750/37060 [00:00<00:10, 3280.84it/s]

1.1.1.92_clustered_sequences_90_augmented.fasta
1.1.1.93_clustered_sequences_90_augmented.fasta
1.1.1.94_clustered_sequences_90_augmented.fasta
1.1.1.95_clustered_sequences_90_augmented.fasta
1.1.1.96_clustered_sequences_90_augmented.fasta
1.1.1.99_clustered_sequences_90_augmented.fasta
1.1.1.9_clustered_sequences_90_augmented.fasta
1.1.1.B18_clustered_sequences_90_augmented.fasta
1.1.1.B19_clustered_sequences_90_augmented.fasta
1.1.1.B20_clustered_sequences_90_augmented.fasta
1.1.1.B25_clustered_sequences_90_augmented.fasta
1.1.1.B28_clustered_sequences_90_augmented.fasta
1.1.1.B35_clustered_sequences_90_augmented.fasta
1.1.1.B38_clustered_sequences_90_augmented.fasta
1.1.1.B3_clustered_sequences_90_augmented.fasta
1.1.1.B40_clustered_sequences_90_augmented.fasta
1.1.1.B47_clustered_sequences_90_augmented.fasta
1.1.1.B4_clustered_sequences_90_augmented.fasta
1.1.1.B51_clustered_sequences_90_augmented.fasta
1.1.1.B52_clustered_sequences_90_augmented.fasta
1.1.1.B57_clustered_sequences_

 12%|█▏        | 4265/37060 [00:01<00:08, 4004.94it/s]

1.11.1.7_clustered_sequences_90_augmented.fasta
1.11.1.8_clustered_sequences_90_augmented.fasta
1.11.1.9_clustered_sequences_90_augmented.fasta
1.11.1.B10_clustered_sequences_90_augmented.fasta
1.11.1.B2_clustered_sequences_90_augmented.fasta
1.11.1.B6_clustered_sequences_90_augmented.fasta
1.11.1.B7_clustered_sequences_90_augmented.fasta
1.11.1.B8_clustered_sequences_90_augmented.fasta
1.11.2.1_clustered_sequences_90_augmented.fasta
1.11.2.2_clustered_sequences_90_augmented.fasta
1.11.2.3_clustered_sequences_90_augmented.fasta
1.11.2.4_clustered_sequences_90_augmented.fasta
1.11.2.5_clustered_sequences_90_augmented.fasta
1.12.1.1_clustered_sequences_90_augmented.fasta
1.12.1.2_clustered_sequences_90_augmented.fasta
1.12.1.3_clustered_sequences_90_augmented.fasta
1.12.1.4_clustered_sequences_90_augmented.fasta
1.12.1.5_clustered_sequences_90_augmented.fasta
1.12.2.1_clustered_sequences_90_augmented.fasta
1.12.5.1_clustered_sequences_90_augmented.fasta
1.12.7.1_clustered_sequences_90_au

 17%|█▋        | 6400/37060 [00:01<00:05, 5840.21it/s]

1.14.13.61_clustered_sequences_90_augmented.fasta
1.14.13.62_clustered_sequences_90_augmented.fasta
1.14.13.63_clustered_sequences_90_augmented.fasta
1.14.13.65_clustered_sequences_90_augmented.fasta
1.14.13.67_clustered_sequences_90_augmented.fasta
1.14.13.68_clustered_sequences_90_augmented.fasta
1.14.13.69_clustered_sequences_90_augmented.fasta
1.14.13.6_clustered_sequences_90_augmented.fasta
1.14.13.70_clustered_sequences_90_augmented.fasta
1.14.13.71_clustered_sequences_90_augmented.fasta
1.14.13.72_clustered_sequences_90_augmented.fasta
1.14.13.73_clustered_sequences_90_augmented.fasta
1.14.13.74_clustered_sequences_90_augmented.fasta
1.14.13.75_clustered_sequences_90_augmented.fasta
1.14.13.76_clustered_sequences_90_augmented.fasta
1.14.13.77_clustered_sequences_90_augmented.fasta
1.14.13.78_clustered_sequences_90_augmented.fasta
1.14.13.79_clustered_sequences_90_augmented.fasta
1.14.13.7_clustered_sequences_90_augmented.fasta
1.14.13.80_clustered_sequences_90_augmented.fasta
1.

 19%|█▉        | 7223/37060 [00:01<00:04, 6197.17it/s]

1.14.99.8_clustered_sequences_90_augmented.fasta
1.14.99.9_clustered_sequences_90_augmented.fasta
1.14.99.B10_clustered_sequences_90_augmented.fasta
1.14.99.B1_clustered_sequences_90_augmented.fasta
1.14.99.B4_clustered_sequences_90_augmented.fasta
1.14.99.B5_clustered_sequences_90_augmented.fasta
1.15.1.1_clustered_sequences_90_augmented.fasta
1.15.1.2_clustered_sequences_90_augmented.fasta
1.16.1.10_clustered_sequences_90_augmented.fasta
1.16.1.1_clustered_sequences_90_augmented.fasta
1.16.1.2_clustered_sequences_90_augmented.fasta
1.16.1.3_clustered_sequences_90_augmented.fasta
1.16.1.4_clustered_sequences_90_augmented.fasta
1.16.1.5_clustered_sequences_90_augmented.fasta
1.16.1.7_clustered_sequences_90_augmented.fasta
1.16.1.8_clustered_sequences_90_augmented.fasta
1.16.1.9_clustered_sequences_90_augmented.fasta
1.16.3.1_clustered_sequences_90_augmented.fasta
1.16.3.2_clustered_sequences_90_augmented.fasta
1.16.3.3_clustered_sequences_90_augmented.fasta
1.16.5.1_clustered_sequences

 22%|██▏       | 8016/37060 [00:01<00:07, 4013.70it/s]

1.2.1.8_clustered_sequences_90_augmented.fasta
1.2.1.90_clustered_sequences_90_augmented.fasta
1.2.1.91_clustered_sequences_90_augmented.fasta
1.2.1.92_clustered_sequences_90_augmented.fasta
1.2.1.93_clustered_sequences_90_augmented.fasta
1.2.1.94_clustered_sequences_90_augmented.fasta
1.2.1.95_clustered_sequences_90_augmented.fasta
1.2.1.96_clustered_sequences_90_augmented.fasta
1.2.1.97_clustered_sequences_90_augmented.fasta
1.2.1.98_clustered_sequences_90_augmented.fasta
1.2.1.99_clustered_sequences_90_augmented.fasta
1.2.1.9_clustered_sequences_90_augmented.fasta
1.2.1.B23_clustered_sequences_90_augmented.fasta
1.2.1.B25_clustered_sequences_90_augmented.fasta
1.2.1.B29_clustered_sequences_90_augmented.fasta
1.2.1.B8_clustered_sequences_90_augmented.fasta
1.2.2.1_clustered_sequences_90_augmented.fasta
1.2.2.2_clustered_sequences_90_augmented.fasta
1.2.2.3_clustered_sequences_90_augmented.fasta
1.2.2.4_clustered_sequences_90_augmented.fasta
1.2.2.B1_clustered_sequences_90_augmented.f

 25%|██▍       | 9169/37060 [00:02<00:07, 3769.42it/s]

1.3.1.27_clustered_sequences_90_augmented.fasta
1.3.1.28_clustered_sequences_90_augmented.fasta
1.3.1.29_clustered_sequences_90_augmented.fasta
1.3.1.2_clustered_sequences_90_augmented.fasta
1.3.1.30_clustered_sequences_90_augmented.fasta
1.3.1.31_clustered_sequences_90_augmented.fasta
1.3.1.32_clustered_sequences_90_augmented.fasta
1.3.1.33_clustered_sequences_90_augmented.fasta
1.3.1.34_clustered_sequences_90_augmented.fasta
1.3.1.35_clustered_sequences_90_augmented.fasta
1.3.1.36_clustered_sequences_90_augmented.fasta
1.3.1.38_clustered_sequences_90_augmented.fasta
1.3.1.39_clustered_sequences_90_augmented.fasta
1.3.1.3_clustered_sequences_90_augmented.fasta
1.3.1.40_clustered_sequences_90_augmented.fasta
1.3.1.42_clustered_sequences_90_augmented.fasta
1.3.1.43_clustered_sequences_90_augmented.fasta
1.3.1.44_clustered_sequences_90_augmented.fasta
1.3.1.45_clustered_sequences_90_augmented.fasta
1.3.1.47_clustered_sequences_90_augmented.fasta
1.3.1.48_clustered_sequences_90_augmented.

 26%|██▌       | 9657/37060 [00:02<00:07, 3904.47it/s]

1.3.99.B13_clustered_sequences_90_augmented.fasta
1.3.99.B15_clustered_sequences_90_augmented.fasta
1.3.99.B16_clustered_sequences_90_augmented.fasta
1.3.99.B2_clustered_sequences_90_augmented.fasta
1.4.1.11_clustered_sequences_90_augmented.fasta
1.4.1.12_clustered_sequences_90_augmented.fasta
1.4.1.13_clustered_sequences_90_augmented.fasta
1.4.1.14_clustered_sequences_90_augmented.fasta
1.4.1.15_clustered_sequences_90_augmented.fasta
1.4.1.16_clustered_sequences_90_augmented.fasta
1.4.1.17_clustered_sequences_90_augmented.fasta
1.4.1.18_clustered_sequences_90_augmented.fasta
1.4.1.19_clustered_sequences_90_augmented.fasta
1.4.1.1_clustered_sequences_90_augmented.fasta
1.4.1.20_clustered_sequences_90_augmented.fasta
1.4.1.21_clustered_sequences_90_augmented.fasta
1.4.1.23_clustered_sequences_90_augmented.fasta
1.4.1.24_clustered_sequences_90_augmented.fasta
1.4.1.25_clustered_sequences_90_augmented.fasta
1.4.1.26_clustered_sequences_90_augmented.fasta
1.4.1.2_clustered_sequences_90_aug

 27%|██▋       | 10126/37060 [00:02<00:08, 3137.14it/s]

1.5.1.30_clustered_sequences_90_augmented.fasta
1.5.1.32_clustered_sequences_90_augmented.fasta
1.5.1.33_clustered_sequences_90_augmented.fasta
1.5.1.34_clustered_sequences_90_augmented.fasta
1.5.1.35_clustered_sequences_90_augmented.fasta
1.5.1.36_clustered_sequences_90_augmented.fasta
1.5.1.37_clustered_sequences_90_augmented.fasta
1.5.1.38_clustered_sequences_90_augmented.fasta
1.5.1.39_clustered_sequences_90_augmented.fasta
1.5.1.3_clustered_sequences_90_augmented.fasta
1.5.1.40_clustered_sequences_90_augmented.fasta
1.5.1.41_clustered_sequences_90_augmented.fasta
1.5.1.42_clustered_sequences_90_augmented.fasta
1.5.1.43_clustered_sequences_90_augmented.fasta
1.5.1.44_clustered_sequences_90_augmented.fasta
1.5.1.45_clustered_sequences_90_augmented.fasta
1.5.1.46_clustered_sequences_90_augmented.fasta
1.5.1.47_clustered_sequences_90_augmented.fasta
1.5.1.48_clustered_sequences_90_augmented.fasta
1.5.1.49_clustered_sequences_90_augmented.fasta
1.5.1.4_clustered_sequences_90_augmented.

 30%|██▉       | 10955/37060 [00:02<00:08, 3118.90it/s]

1.6.5.12_clustered_sequences_90_augmented.fasta
1.6.5.1_clustered_sequences_90_augmented.fasta
1.6.5.2_clustered_sequences_90_augmented.fasta
1.6.5.3_clustered_sequences_90_augmented.fasta
1.6.5.4_clustered_sequences_90_augmented.fasta
1.6.5.5_clustered_sequences_90_augmented.fasta
1.6.5.6_clustered_sequences_90_augmented.fasta
1.6.5.7_clustered_sequences_90_augmented.fasta
1.6.5.9_clustered_sequences_90_augmented.fasta
1.6.6.10_clustered_sequences_90_augmented.fasta
1.6.6.11_clustered_sequences_90_augmented.fasta
1.6.6.12_clustered_sequences_90_augmented.fasta
1.6.6.13_clustered_sequences_90_augmented.fasta
1.6.6.1_clustered_sequences_90_augmented.fasta
1.6.6.2_clustered_sequences_90_augmented.fasta
1.6.6.3_clustered_sequences_90_augmented.fasta
1.6.6.4_clustered_sequences_90_augmented.fasta
1.6.6.5_clustered_sequences_90_augmented.fasta
1.6.6.6_clustered_sequences_90_augmented.fasta
1.6.6.7_clustered_sequences_90_augmented.fasta
1.6.6.8_clustered_sequences_90_augmented.fasta
1.6.6.9_

 31%|███       | 11324/37060 [00:02<00:10, 2401.14it/s]

1.8.98.1_clustered_sequences_90_augmented.fasta
1.8.98.2_clustered_sequences_90_augmented.fasta
1.8.98.3_clustered_sequences_90_augmented.fasta
1.8.98.4_clustered_sequences_90_augmented.fasta
1.8.98.5_clustered_sequences_90_augmented.fasta
1.8.98.6_clustered_sequences_90_augmented.fasta
1.8.99.1_clustered_sequences_90_augmented.fasta
1.8.99.2_clustered_sequences_90_augmented.fasta
1.8.99.3_clustered_sequences_90_augmented.fasta
1.8.99.4_clustered_sequences_90_augmented.fasta
1.8.99.5_clustered_sequences_90_augmented.fasta
1.8.99.B1_clustered_sequences_90_augmented.fasta
1.8.99.B2_clustered_sequences_90_augmented.fasta
1.8.99.B3_clustered_sequences_90_augmented.fasta
1.9.3.1_clustered_sequences_90_augmented.fasta
1.9.3.2_clustered_sequences_90_augmented.fasta
1.9.6.1_clustered_sequences_90_augmented.fasta
1.9.98.1_clustered_sequences_90_augmented.fasta
1.9.99.1_clustered_sequences_90_augmented.fasta
1.97.1.10_clustered_sequences_90_augmented.fasta
1.97.1.11_clustered_sequences_90_augmen

 31%|███▏      | 11670/37060 [00:02<00:09, 2641.07it/s]

2.1.1.140_clustered_sequences_90_augmented.fasta
2.1.1.141_clustered_sequences_90_augmented.fasta
2.1.1.142_clustered_sequences_90_augmented.fasta
2.1.1.143_clustered_sequences_90_augmented.fasta
2.1.1.144_clustered_sequences_90_augmented.fasta
2.1.1.145_clustered_sequences_90_augmented.fasta
2.1.1.146_clustered_sequences_90_augmented.fasta
2.1.1.148_clustered_sequences_90_augmented.fasta
2.1.1.149_clustered_sequences_90_augmented.fasta
2.1.1.14_clustered_sequences_90_augmented.fasta
2.1.1.150_clustered_sequences_90_augmented.fasta
2.1.1.151_clustered_sequences_90_augmented.fasta
2.1.1.152_clustered_sequences_90_augmented.fasta
2.1.1.154_clustered_sequences_90_augmented.fasta
2.1.1.155_clustered_sequences_90_augmented.fasta
2.1.1.156_clustered_sequences_90_augmented.fasta
2.1.1.157_clustered_sequences_90_augmented.fasta
2.1.1.158_clustered_sequences_90_augmented.fasta
2.1.1.159_clustered_sequences_90_augmented.fasta
2.1.1.15_clustered_sequences_90_augmented.fasta
2.1.1.160_clustered_se

 34%|███▍      | 12520/37060 [00:03<00:09, 2672.68it/s]

2.1.1.201_clustered_sequences_90_augmented.fasta
2.1.1.202_clustered_sequences_90_augmented.fasta
2.1.1.203_clustered_sequences_90_augmented.fasta
2.1.1.204_clustered_sequences_90_augmented.fasta
2.1.1.205_clustered_sequences_90_augmented.fasta
2.1.1.206_clustered_sequences_90_augmented.fasta
2.1.1.207_clustered_sequences_90_augmented.fasta
2.1.1.208_clustered_sequences_90_augmented.fasta
2.1.1.209_clustered_sequences_90_augmented.fasta
2.1.1.20_clustered_sequences_90_augmented.fasta
2.1.1.210_clustered_sequences_90_augmented.fasta
2.1.1.211_clustered_sequences_90_augmented.fasta
2.1.1.212_clustered_sequences_90_augmented.fasta
2.1.1.213_clustered_sequences_90_augmented.fasta
2.1.1.214_clustered_sequences_90_augmented.fasta
2.1.1.215_clustered_sequences_90_augmented.fasta
2.1.1.216_clustered_sequences_90_augmented.fasta
2.1.1.217_clustered_sequences_90_augmented.fasta
2.1.1.218_clustered_sequences_90_augmented.fasta
2.1.1.219_clustered_sequences_90_augmented.fasta
2.1.1.21_clustered_se

 35%|███▍      | 12869/37060 [00:03<00:10, 2262.20it/s]

2.1.1.46_clustered_sequences_90_augmented.fasta
2.1.1.47_clustered_sequences_90_augmented.fasta
2.1.1.48_clustered_sequences_90_augmented.fasta
2.1.1.49_clustered_sequences_90_augmented.fasta
2.1.1.4_clustered_sequences_90_augmented.fasta
2.1.1.50_clustered_sequences_90_augmented.fasta
2.1.1.51_clustered_sequences_90_augmented.fasta
2.1.1.52_clustered_sequences_90_augmented.fasta
2.1.1.53_clustered_sequences_90_augmented.fasta
2.1.1.54_clustered_sequences_90_augmented.fasta
2.1.1.55_clustered_sequences_90_augmented.fasta
2.1.1.56_clustered_sequences_90_augmented.fasta
2.1.1.57_clustered_sequences_90_augmented.fasta
2.1.1.58_clustered_sequences_90_augmented.fasta
2.1.1.59_clustered_sequences_90_augmented.fasta
2.1.1.5_clustered_sequences_90_augmented.fasta
2.1.1.60_clustered_sequences_90_augmented.fasta
2.1.1.61_clustered_sequences_90_augmented.fasta
2.1.1.62_clustered_sequences_90_augmented.fasta
2.1.1.63_clustered_sequences_90_augmented.fasta
2.1.1.64_clustered_sequences_90_augmented.

 36%|███▌      | 13163/37060 [00:03<00:11, 2022.66it/s]

2.1.1.B117_clustered_sequences_90_augmented.fasta
2.1.1.B3_clustered_sequences_90_augmented.fasta
2.1.1.B43_clustered_sequences_90_augmented.fasta
2.1.1.B74_clustered_sequences_90_augmented.fasta
2.1.1.B75_clustered_sequences_90_augmented.fasta
2.1.1.B76_clustered_sequences_90_augmented.fasta
2.1.1.B83_clustered_sequences_90_augmented.fasta
2.1.1.B84_clustered_sequences_90_augmented.fasta
2.1.1.B85_clustered_sequences_90_augmented.fasta
2.1.1.B99_clustered_sequences_90_augmented.fasta
2.1.2.10_clustered_sequences_90_augmented.fasta
2.1.2.11_clustered_sequences_90_augmented.fasta
2.1.2.13_clustered_sequences_90_augmented.fasta
2.1.2.1_clustered_sequences_90_augmented.fasta
2.1.2.2_clustered_sequences_90_augmented.fasta
2.1.2.3_clustered_sequences_90_augmented.fasta
2.1.2.5_clustered_sequences_90_augmented.fasta
2.1.2.6_clustered_sequences_90_augmented.fasta
2.1.2.7_clustered_sequences_90_augmented.fasta
2.1.2.8_clustered_sequences_90_augmented.fasta
2.1.2.9_clustered_sequences_90_augmen

 37%|███▋      | 13660/37060 [00:03<00:11, 1975.21it/s]

2.2.1.7_clustered_sequences_90_augmented.fasta
2.2.1.8_clustered_sequences_90_augmented.fasta
2.2.1.9_clustered_sequences_90_augmented.fasta
2.3.1.101_clustered_sequences_90_augmented.fasta
2.3.1.102_clustered_sequences_90_augmented.fasta
2.3.1.103_clustered_sequences_90_augmented.fasta
2.3.1.104_clustered_sequences_90_augmented.fasta
2.3.1.105_clustered_sequences_90_augmented.fasta
2.3.1.106_clustered_sequences_90_augmented.fasta
2.3.1.107_clustered_sequences_90_augmented.fasta
2.3.1.108_clustered_sequences_90_augmented.fasta
2.3.1.109_clustered_sequences_90_augmented.fasta
2.3.1.110_clustered_sequences_90_augmented.fasta
2.3.1.111_clustered_sequences_90_augmented.fasta
2.3.1.112_clustered_sequences_90_augmented.fasta
2.3.1.115_clustered_sequences_90_augmented.fasta
2.3.1.116_clustered_sequences_90_augmented.fasta
2.3.1.117_clustered_sequences_90_augmented.fasta
2.3.1.118_clustered_sequences_90_augmented.fasta
2.3.1.119_clustered_sequences_90_augmented.fasta
2.3.1.11_clustered_sequenc

 38%|███▊      | 13950/37060 [00:04<00:10, 2109.84it/s]

2.3.1.1_clustered_sequences_90_augmented.fasta
2.3.1.200_clustered_sequences_90_augmented.fasta
2.3.1.201_clustered_sequences_90_augmented.fasta
2.3.1.202_clustered_sequences_90_augmented.fasta
2.3.1.203_clustered_sequences_90_augmented.fasta
2.3.1.204_clustered_sequences_90_augmented.fasta
2.3.1.205_clustered_sequences_90_augmented.fasta
2.3.1.206_clustered_sequences_90_augmented.fasta
2.3.1.207_clustered_sequences_90_augmented.fasta
2.3.1.208_clustered_sequences_90_augmented.fasta
2.3.1.209_clustered_sequences_90_augmented.fasta
2.3.1.20_clustered_sequences_90_augmented.fasta
2.3.1.210_clustered_sequences_90_augmented.fasta
2.3.1.211_clustered_sequences_90_augmented.fasta
2.3.1.212_clustered_sequences_90_augmented.fasta
2.3.1.213_clustered_sequences_90_augmented.fasta
2.3.1.214_clustered_sequences_90_augmented.fasta
2.3.1.215_clustered_sequences_90_augmented.fasta
2.3.1.216_clustered_sequences_90_augmented.fasta
2.3.1.217_clustered_sequences_90_augmented.fasta
2.3.1.218_clustered_seq

 39%|███▉      | 14411/37060 [00:04<00:12, 1838.42it/s]

2.3.1.31_clustered_sequences_90_augmented.fasta
2.3.1.32_clustered_sequences_90_augmented.fasta
2.3.1.33_clustered_sequences_90_augmented.fasta
2.3.1.35_clustered_sequences_90_augmented.fasta
2.3.1.36_clustered_sequences_90_augmented.fasta
2.3.1.37_clustered_sequences_90_augmented.fasta
2.3.1.38_clustered_sequences_90_augmented.fasta
2.3.1.39_clustered_sequences_90_augmented.fasta
2.3.1.3_clustered_sequences_90_augmented.fasta
2.3.1.40_clustered_sequences_90_augmented.fasta
2.3.1.41_clustered_sequences_90_augmented.fasta
2.3.1.42_clustered_sequences_90_augmented.fasta
2.3.1.43_clustered_sequences_90_augmented.fasta
2.3.1.45_clustered_sequences_90_augmented.fasta
2.3.1.46_clustered_sequences_90_augmented.fasta
2.3.1.47_clustered_sequences_90_augmented.fasta
2.3.1.48_clustered_sequences_90_augmented.fasta
2.3.1.4_clustered_sequences_90_augmented.fasta
2.3.1.50_clustered_sequences_90_augmented.fasta
2.3.1.51_clustered_sequences_90_augmented.fasta
2.3.1.53_clustered_sequences_90_augmented.

 40%|████      | 14972/37060 [00:04<00:12, 1834.94it/s]

2.3.2.2_clustered_sequences_90_augmented.fasta
2.3.2.31_clustered_sequences_90_augmented.fasta
2.3.2.32_clustered_sequences_90_augmented.fasta
2.3.2.3_clustered_sequences_90_augmented.fasta
2.3.2.4_clustered_sequences_90_augmented.fasta
2.3.2.5_clustered_sequences_90_augmented.fasta
2.3.2.6_clustered_sequences_90_augmented.fasta
2.3.2.8_clustered_sequences_90_augmented.fasta
2.3.2.B11_clustered_sequences_90_augmented.fasta
2.3.2.B12_clustered_sequences_90_augmented.fasta
2.3.2.B13_clustered_sequences_90_augmented.fasta
2.3.2.B14_clustered_sequences_90_augmented.fasta
2.3.2.B1_clustered_sequences_90_augmented.fasta
2.3.2.B2_clustered_sequences_90_augmented.fasta
2.3.2.B3_clustered_sequences_90_augmented.fasta
2.3.2.B8_clustered_sequences_90_augmented.fasta
2.3.3.10_clustered_sequences_90_augmented.fasta
2.3.3.11_clustered_sequences_90_augmented.fasta
2.3.3.13_clustered_sequences_90_augmented.fasta
2.3.3.14_clustered_sequences_90_augmented.fasta
2.3.3.15_clustered_sequences_90_augmented.

 42%|████▏     | 15715/37060 [00:04<00:08, 2436.93it/s]

2.4.1.12_clustered_sequences_90_augmented.fasta
2.4.1.130_clustered_sequences_90_augmented.fasta
2.4.1.131_clustered_sequences_90_augmented.fasta
2.4.1.132_clustered_sequences_90_augmented.fasta
2.4.1.133_clustered_sequences_90_augmented.fasta
2.4.1.134_clustered_sequences_90_augmented.fasta
2.4.1.135_clustered_sequences_90_augmented.fasta
2.4.1.136_clustered_sequences_90_augmented.fasta
2.4.1.137_clustered_sequences_90_augmented.fasta
2.4.1.138_clustered_sequences_90_augmented.fasta
2.4.1.139_clustered_sequences_90_augmented.fasta
2.4.1.13_clustered_sequences_90_augmented.fasta
2.4.1.140_clustered_sequences_90_augmented.fasta
2.4.1.141_clustered_sequences_90_augmented.fasta
2.4.1.142_clustered_sequences_90_augmented.fasta
2.4.1.143_clustered_sequences_90_augmented.fasta
2.4.1.144_clustered_sequences_90_augmented.fasta
2.4.1.145_clustered_sequences_90_augmented.fasta
2.4.1.146_clustered_sequences_90_augmented.fasta
2.4.1.147_clustered_sequences_90_augmented.fasta
2.4.1.148_clustered_se

 45%|████▍     | 16650/37060 [00:04<00:06, 3130.56it/s]

2.4.1.306_clustered_sequences_90_augmented.fasta
2.4.1.307_clustered_sequences_90_augmented.fasta
2.4.1.308_clustered_sequences_90_augmented.fasta
2.4.1.309_clustered_sequences_90_augmented.fasta
2.4.1.30_clustered_sequences_90_augmented.fasta
2.4.1.310_clustered_sequences_90_augmented.fasta
2.4.1.311_clustered_sequences_90_augmented.fasta
2.4.1.312_clustered_sequences_90_augmented.fasta
2.4.1.313_clustered_sequences_90_augmented.fasta
2.4.1.314_clustered_sequences_90_augmented.fasta
2.4.1.315_clustered_sequences_90_augmented.fasta
2.4.1.316_clustered_sequences_90_augmented.fasta
2.4.1.317_clustered_sequences_90_augmented.fasta
2.4.1.318_clustered_sequences_90_augmented.fasta
2.4.1.319_clustered_sequences_90_augmented.fasta
2.4.1.31_clustered_sequences_90_augmented.fasta
2.4.1.320_clustered_sequences_90_augmented.fasta
2.4.1.321_clustered_sequences_90_augmented.fasta
2.4.1.322_clustered_sequences_90_augmented.fasta
2.4.1.323_clustered_sequences_90_augmented.fasta
2.4.1.324_clustered_se

 46%|████▌     | 17129/37060 [00:05<00:07, 2630.41it/s]

2.4.2.31_clustered_sequences_90_augmented.fasta
2.4.2.32_clustered_sequences_90_augmented.fasta
2.4.2.34_clustered_sequences_90_augmented.fasta
2.4.2.36_clustered_sequences_90_augmented.fasta
2.4.2.37_clustered_sequences_90_augmented.fasta
2.4.2.38_clustered_sequences_90_augmented.fasta
2.4.2.39_clustered_sequences_90_augmented.fasta
2.4.2.3_clustered_sequences_90_augmented.fasta
2.4.2.40_clustered_sequences_90_augmented.fasta
2.4.2.41_clustered_sequences_90_augmented.fasta
2.4.2.43_clustered_sequences_90_augmented.fasta
2.4.2.44_clustered_sequences_90_augmented.fasta
2.4.2.45_clustered_sequences_90_augmented.fasta
2.4.2.46_clustered_sequences_90_augmented.fasta
2.4.2.47_clustered_sequences_90_augmented.fasta
2.4.2.48_clustered_sequences_90_augmented.fasta
2.4.2.49_clustered_sequences_90_augmented.fasta
2.4.2.4_clustered_sequences_90_augmented.fasta
2.4.2.50_clustered_sequences_90_augmented.fasta
2.4.2.51_clustered_sequences_90_augmented.fasta
2.4.2.52_clustered_sequences_90_augmented.

 47%|████▋     | 17522/37060 [00:05<00:09, 1980.46it/s]

2.5.1.17_clustered_sequences_90_augmented.fasta
2.5.1.18_clustered_sequences_90_augmented.fasta
2.5.1.19_clustered_sequences_90_augmented.fasta
2.5.1.1_clustered_sequences_90_augmented.fasta
2.5.1.20_clustered_sequences_90_augmented.fasta
2.5.1.21_clustered_sequences_90_augmented.fasta
2.5.1.22_clustered_sequences_90_augmented.fasta
2.5.1.23_clustered_sequences_90_augmented.fasta
2.5.1.26_clustered_sequences_90_augmented.fasta
2.5.1.27_clustered_sequences_90_augmented.fasta
2.5.1.28_clustered_sequences_90_augmented.fasta
2.5.1.29_clustered_sequences_90_augmented.fasta
2.5.1.2_clustered_sequences_90_augmented.fasta
2.5.1.30_clustered_sequences_90_augmented.fasta
2.5.1.31_clustered_sequences_90_augmented.fasta
2.5.1.32_clustered_sequences_90_augmented.fasta
2.5.1.33_clustered_sequences_90_augmented.fasta
2.5.1.34_clustered_sequences_90_augmented.fasta
2.5.1.35_clustered_sequences_90_augmented.fasta
2.5.1.36_clustered_sequences_90_augmented.fasta
2.5.1.37_clustered_sequences_90_augmented.

 49%|████▉     | 18120/37060 [00:05<00:08, 2136.91it/s]

2.5.1.76_clustered_sequences_90_augmented.fasta
2.5.1.77_clustered_sequences_90_augmented.fasta
2.5.1.78_clustered_sequences_90_augmented.fasta
2.5.1.79_clustered_sequences_90_augmented.fasta
2.5.1.7_clustered_sequences_90_augmented.fasta
2.5.1.80_clustered_sequences_90_augmented.fasta
2.5.1.81_clustered_sequences_90_augmented.fasta
2.5.1.82_clustered_sequences_90_augmented.fasta
2.5.1.83_clustered_sequences_90_augmented.fasta
2.5.1.84_clustered_sequences_90_augmented.fasta
2.5.1.85_clustered_sequences_90_augmented.fasta
2.5.1.86_clustered_sequences_90_augmented.fasta
2.5.1.87_clustered_sequences_90_augmented.fasta
2.5.1.88_clustered_sequences_90_augmented.fasta
2.5.1.89_clustered_sequences_90_augmented.fasta
2.5.1.8_clustered_sequences_90_augmented.fasta
2.5.1.90_clustered_sequences_90_augmented.fasta
2.5.1.91_clustered_sequences_90_augmented.fasta
2.5.1.92_clustered_sequences_90_augmented.fasta
2.5.1.93_clustered_sequences_90_augmented.fasta
2.5.1.94_clustered_sequences_90_augmented.

 51%|█████     | 18730/37060 [00:05<00:07, 2497.28it/s]


2.6.1.9_clustered_sequences_90_augmented.fasta
2.6.1.B16_clustered_sequences_90_augmented.fasta
2.6.1.B17_clustered_sequences_90_augmented.fasta
2.6.1.B18_clustered_sequences_90_augmented.fasta
2.6.1.B19_clustered_sequences_90_augmented.fasta
2.6.1.B3_clustered_sequences_90_augmented.fasta
2.6.1.B6_clustered_sequences_90_augmented.fasta
2.6.2.1_clustered_sequences_90_augmented.fasta
2.6.99.1_clustered_sequences_90_augmented.fasta
2.6.99.2_clustered_sequences_90_augmented.fasta
2.6.99.3_clustered_sequences_90_augmented.fasta
2.6.99.4_clustered_sequences_90_augmented.fasta
2.7.1.100_clustered_sequences_90_augmented.fasta
2.7.1.101_clustered_sequences_90_augmented.fasta
2.7.1.103_clustered_sequences_90_augmented.fasta
2.7.1.104_clustered_sequences_90_augmented.fasta
2.7.1.105_clustered_sequences_90_augmented.fasta
2.7.1.106_clustered_sequences_90_augmented.fasta
2.7.1.107_clustered_sequences_90_augmented.fasta
2.7.1.108_clustered_sequences_90_augmented.fasta
2.7.1.109_clustered_sequences

 51%|█████▏    | 19085/37060 [00:06<00:06, 2682.10it/s]

2.7.1.181_clustered_sequences_90_augmented.fasta
2.7.1.182_clustered_sequences_90_augmented.fasta
2.7.1.183_clustered_sequences_90_augmented.fasta
2.7.1.184_clustered_sequences_90_augmented.fasta
2.7.1.185_clustered_sequences_90_augmented.fasta
2.7.1.186_clustered_sequences_90_augmented.fasta
2.7.1.187_clustered_sequences_90_augmented.fasta
2.7.1.188_clustered_sequences_90_augmented.fasta
2.7.1.189_clustered_sequences_90_augmented.fasta
2.7.1.18_clustered_sequences_90_augmented.fasta
2.7.1.190_clustered_sequences_90_augmented.fasta
2.7.1.191_clustered_sequences_90_augmented.fasta
2.7.1.192_clustered_sequences_90_augmented.fasta
2.7.1.193_clustered_sequences_90_augmented.fasta
2.7.1.194_clustered_sequences_90_augmented.fasta
2.7.1.195_clustered_sequences_90_augmented.fasta
2.7.1.196_clustered_sequences_90_augmented.fasta
2.7.1.197_clustered_sequences_90_augmented.fasta
2.7.1.198_clustered_sequences_90_augmented.fasta
2.7.1.199_clustered_sequences_90_augmented.fasta
2.7.1.19_clustered_se

 52%|█████▏    | 19383/37060 [00:06<00:08, 2105.68it/s]

2.7.1.50_clustered_sequences_90_augmented.fasta
2.7.1.51_clustered_sequences_90_augmented.fasta
2.7.1.52_clustered_sequences_90_augmented.fasta
2.7.1.53_clustered_sequences_90_augmented.fasta
2.7.1.54_clustered_sequences_90_augmented.fasta
2.7.1.55_clustered_sequences_90_augmented.fasta
2.7.1.56_clustered_sequences_90_augmented.fasta
2.7.1.57_clustered_sequences_90_augmented.fasta
2.7.1.58_clustered_sequences_90_augmented.fasta
2.7.1.59_clustered_sequences_90_augmented.fasta
2.7.1.5_clustered_sequences_90_augmented.fasta
2.7.1.60_clustered_sequences_90_augmented.fasta
2.7.1.63_clustered_sequences_90_augmented.fasta
2.7.1.64_clustered_sequences_90_augmented.fasta
2.7.1.65_clustered_sequences_90_augmented.fasta
2.7.1.66_clustered_sequences_90_augmented.fasta
2.7.1.67_clustered_sequences_90_augmented.fasta
2.7.1.68_clustered_sequences_90_augmented.fasta
2.7.1.69_clustered_sequences_90_augmented.fasta
2.7.1.6_clustered_sequences_90_augmented.fasta
2.7.1.70_clustered_sequences_90_augmented.

 53%|█████▎    | 19634/37060 [00:06<00:12, 1397.43it/s]

2.7.11.20_clustered_sequences_90_augmented.fasta
2.7.11.21_clustered_sequences_90_augmented.fasta
2.7.11.22_clustered_sequences_90_augmented.fasta
2.7.11.23_clustered_sequences_90_augmented.fasta
2.7.11.24_clustered_sequences_90_augmented.fasta
2.7.11.25_clustered_sequences_90_augmented.fasta
2.7.11.26_clustered_sequences_90_augmented.fasta
2.7.11.27_clustered_sequences_90_augmented.fasta
2.7.11.28_clustered_sequences_90_augmented.fasta
2.7.11.2_clustered_sequences_90_augmented.fasta
2.7.11.30_clustered_sequences_90_augmented.fasta
2.7.11.31_clustered_sequences_90_augmented.fasta
2.7.11.32_clustered_sequences_90_augmented.fasta
2.7.11.33_clustered_sequences_90_augmented.fasta
2.7.11.4_clustered_sequences_90_augmented.fasta
2.7.11.5_clustered_sequences_90_augmented.fasta
2.7.11.7_clustered_sequences_90_augmented.fasta
2.7.11.8_clustered_sequences_90_augmented.fasta
2.7.11.9_clustered_sequences_90_augmented.fasta
2.7.11.B1_clustered_sequences_90_augmented.fasta
2.7.12.1_clustered_sequenc

 54%|█████▎    | 19833/37060 [00:06<00:14, 1150.69it/s]

2.7.2.2_clustered_sequences_90_augmented.fasta
2.7.2.3_clustered_sequences_90_augmented.fasta
2.7.2.4_clustered_sequences_90_augmented.fasta
2.7.2.5_clustered_sequences_90_augmented.fasta
2.7.2.6_clustered_sequences_90_augmented.fasta
2.7.2.7_clustered_sequences_90_augmented.fasta
2.7.2.8_clustered_sequences_90_augmented.fasta
2.7.2.9_clustered_sequences_90_augmented.fasta
2.7.3.10_clustered_sequences_90_augmented.fasta
2.7.3.11_clustered_sequences_90_augmented.fasta
2.7.3.12_clustered_sequences_90_augmented.fasta
2.7.3.1_clustered_sequences_90_augmented.fasta
2.7.3.2_clustered_sequences_90_augmented.fasta
2.7.3.3_clustered_sequences_90_augmented.fasta
2.7.3.4_clustered_sequences_90_augmented.fasta
2.7.3.5_clustered_sequences_90_augmented.fasta
2.7.3.8_clustered_sequences_90_augmented.fasta
2.7.3.9_clustered_sequences_90_augmented.fasta
2.7.4.10_clustered_sequences_90_augmented.fasta
2.7.4.11_clustered_sequences_90_augmented.fasta
2.7.4.13_clustered_sequences_90_augmented.fasta
2.7.4.1

 54%|█████▍    | 20144/37060 [00:07<00:14, 1191.41it/s]

2.7.4.B1_clustered_sequences_90_augmented.fasta
2.7.4.B2_clustered_sequences_90_augmented.fasta
2.7.4.B4_clustered_sequences_90_augmented.fasta
2.7.5.1_clustered_sequences_90_augmented.fasta
2.7.5.2_clustered_sequences_90_augmented.fasta
2.7.5.3_clustered_sequences_90_augmented.fasta
2.7.5.4_clustered_sequences_90_augmented.fasta
2.7.5.5_clustered_sequences_90_augmented.fasta
2.7.5.6_clustered_sequences_90_augmented.fasta
2.7.5.7_clustered_sequences_90_augmented.fasta
2.7.6.1_clustered_sequences_90_augmented.fasta
2.7.6.2_clustered_sequences_90_augmented.fasta
2.7.6.3_clustered_sequences_90_augmented.fasta
2.7.6.5_clustered_sequences_90_augmented.fasta
2.7.7.100_clustered_sequences_90_augmented.fasta
2.7.7.101_clustered_sequences_90_augmented.fasta
2.7.7.102_clustered_sequences_90_augmented.fasta
2.7.7.103_clustered_sequences_90_augmented.fasta
2.7.7.10_clustered_sequences_90_augmented.fasta
2.7.7.12_clustered_sequences_90_augmented.fasta
2.7.7.13_clustered_sequences_90_augmented.fasta

 55%|█████▍    | 20289/37060 [00:07<00:14, 1178.68it/s]

2.7.7.50_clustered_sequences_90_augmented.fasta
2.7.7.51_clustered_sequences_90_augmented.fasta
2.7.7.52_clustered_sequences_90_augmented.fasta
2.7.7.53_clustered_sequences_90_augmented.fasta
2.7.7.54_clustered_sequences_90_augmented.fasta
2.7.7.55_clustered_sequences_90_augmented.fasta
2.7.7.56_clustered_sequences_90_augmented.fasta
2.7.7.58_clustered_sequences_90_augmented.fasta
2.7.7.59_clustered_sequences_90_augmented.fasta
2.7.7.5_clustered_sequences_90_augmented.fasta
2.7.7.60_clustered_sequences_90_augmented.fasta
2.7.7.61_clustered_sequences_90_augmented.fasta
2.7.7.62_clustered_sequences_90_augmented.fasta
2.7.7.63_clustered_sequences_90_augmented.fasta
2.7.7.64_clustered_sequences_90_augmented.fasta
2.7.7.65_clustered_sequences_90_augmented.fasta
2.7.7.66_clustered_sequences_90_augmented.fasta
2.7.7.67_clustered_sequences_90_augmented.fasta
2.7.7.68_clustered_sequences_90_augmented.fasta
2.7.7.69_clustered_sequences_90_augmented.fasta
2.7.7.6_clustered_sequences_90_augmented.

 56%|█████▌    | 20670/37060 [00:07<00:16, 1023.95it/s]

2.7.7.80_clustered_sequences_90_augmented.fasta
2.7.7.81_clustered_sequences_90_augmented.fasta
2.7.7.82_clustered_sequences_90_augmented.fasta
2.7.7.83_clustered_sequences_90_augmented.fasta
2.7.7.84_clustered_sequences_90_augmented.fasta
2.7.7.85_clustered_sequences_90_augmented.fasta
2.7.7.86_clustered_sequences_90_augmented.fasta
2.7.7.87_clustered_sequences_90_augmented.fasta
2.7.7.88_clustered_sequences_90_augmented.fasta
2.7.7.89_clustered_sequences_90_augmented.fasta
2.7.7.8_clustered_sequences_90_augmented.fasta
2.7.7.90_clustered_sequences_90_augmented.fasta
2.7.7.91_clustered_sequences_90_augmented.fasta
2.7.7.92_clustered_sequences_90_augmented.fasta
2.7.7.93_clustered_sequences_90_augmented.fasta
2.7.7.94_clustered_sequences_90_augmented.fasta
2.7.7.95_clustered_sequences_90_augmented.fasta
2.7.7.96_clustered_sequences_90_augmented.fasta
2.7.7.98_clustered_sequences_90_augmented.fasta
2.7.7.99_clustered_sequences_90_augmented.fasta
2.7.7.9_clustered_sequences_90_augmented.

 56%|█████▋    | 20900/37060 [00:07<00:13, 1224.70it/s]

2.8.1.5_clustered_sequences_90_augmented.fasta
2.8.1.6_clustered_sequences_90_augmented.fasta
2.8.1.7_clustered_sequences_90_augmented.fasta


 58%|█████▊    | 21400/37060 [00:08<00:12, 1227.36it/s]

2.8.1.8_clustered_sequences_90_augmented.fasta
2.8.1.9_clustered_sequences_90_augmented.fasta
2.8.1.B2_clustered_sequences_90_augmented.fasta
2.8.1.B3_clustered_sequences_90_augmented.fasta
2.8.2.10_clustered_sequences_90_augmented.fasta
2.8.2.11_clustered_sequences_90_augmented.fasta
2.8.2.12_clustered_sequences_90_augmented.fasta
2.8.2.13_clustered_sequences_90_augmented.fasta
2.8.2.14_clustered_sequences_90_augmented.fasta
2.8.2.15_clustered_sequences_90_augmented.fasta
2.8.2.16_clustered_sequences_90_augmented.fasta
2.8.2.17_clustered_sequences_90_augmented.fasta
2.8.2.18_clustered_sequences_90_augmented.fasta
2.8.2.1_clustered_sequences_90_augmented.fasta
2.8.2.20_clustered_sequences_90_augmented.fasta
2.8.2.21_clustered_sequences_90_augmented.fasta
2.8.2.22_clustered_sequences_90_augmented.fasta
2.8.2.23_clustered_sequences_90_augmented.fasta
2.8.2.24_clustered_sequences_90_augmented.fasta
2.8.2.25_clustered_sequences_90_augmented.fasta
2.8.2.26_clustered_sequences_90_augmented.f

 59%|█████▉    | 22008/37060 [00:08<00:09, 1647.52it/s]

3.1.1.77_clustered_sequences_90_augmented.fasta
3.1.1.78_clustered_sequences_90_augmented.fasta
3.1.1.79_clustered_sequences_90_augmented.fasta
3.1.1.7_clustered_sequences_90_augmented.fasta
3.1.1.80_clustered_sequences_90_augmented.fasta
3.1.1.81_clustered_sequences_90_augmented.fasta
3.1.1.82_clustered_sequences_90_augmented.fasta
3.1.1.83_clustered_sequences_90_augmented.fasta
3.1.1.84_clustered_sequences_90_augmented.fasta
3.1.1.85_clustered_sequences_90_augmented.fasta
3.1.1.86_clustered_sequences_90_augmented.fasta
3.1.1.87_clustered_sequences_90_augmented.fasta
3.1.1.88_clustered_sequences_90_augmented.fasta
3.1.1.89_clustered_sequences_90_augmented.fasta
3.1.1.8_clustered_sequences_90_augmented.fasta
3.1.1.90_clustered_sequences_90_augmented.fasta
3.1.1.91_clustered_sequences_90_augmented.fasta
3.1.1.92_clustered_sequences_90_augmented.fasta
3.1.1.93_clustered_sequences_90_augmented.fasta
3.1.1.94_clustered_sequences_90_augmented.fasta
3.1.1.95_clustered_sequences_90_augmented.

 60%|██████    | 22415/37060 [00:08<00:11, 1269.37it/s]

3.1.26.5_clustered_sequences_90_augmented.fasta
3.1.26.8_clustered_sequences_90_augmented.fasta
3.1.27.10_clustered_sequences_90_augmented.fasta
3.1.27.1_clustered_sequences_90_augmented.fasta
3.1.27.2_clustered_sequences_90_augmented.fasta
3.1.27.3_clustered_sequences_90_augmented.fasta
3.1.27.4_clustered_sequences_90_augmented.fasta
3.1.27.5_clustered_sequences_90_augmented.fasta
3.1.27.6_clustered_sequences_90_augmented.fasta
3.1.27.8_clustered_sequences_90_augmented.fasta
3.1.27.9_clustered_sequences_90_augmented.fasta
3.1.27.B1_clustered_sequences_90_augmented.fasta
3.1.3.100_clustered_sequences_90_augmented.fasta
3.1.3.101_clustered_sequences_90_augmented.fasta
3.1.3.102_clustered_sequences_90_augmented.fasta
3.1.3.103_clustered_sequences_90_augmented.fasta
3.1.3.104_clustered_sequences_90_augmented.fasta
3.1.3.105_clustered_sequences_90_augmented.fasta
3.1.3.10_clustered_sequences_90_augmented.fasta
3.1.3.11_clustered_sequences_90_augmented.fasta
3.1.3.12_clustered_sequences_90_

 64%|██████▎   | 23590/37060 [00:08<00:06, 2101.80it/s]

3.1.3.80_clustered_sequences_90_augmented.fasta
3.1.3.81_clustered_sequences_90_augmented.fasta
3.1.3.82_clustered_sequences_90_augmented.fasta
3.1.3.83_clustered_sequences_90_augmented.fasta
3.1.3.84_clustered_sequences_90_augmented.fasta
3.1.3.85_clustered_sequences_90_augmented.fasta
3.1.3.86_clustered_sequences_90_augmented.fasta
3.1.3.87_clustered_sequences_90_augmented.fasta
3.1.3.88_clustered_sequences_90_augmented.fasta
3.1.3.89_clustered_sequences_90_augmented.fasta
3.1.3.8_clustered_sequences_90_augmented.fasta
3.1.3.90_clustered_sequences_90_augmented.fasta
3.1.3.91_clustered_sequences_90_augmented.fasta
3.1.3.92_clustered_sequences_90_augmented.fasta
3.1.3.93_clustered_sequences_90_augmented.fasta
3.1.3.94_clustered_sequences_90_augmented.fasta
3.1.3.95_clustered_sequences_90_augmented.fasta
3.1.3.96_clustered_sequences_90_augmented.fasta
3.1.3.97_clustered_sequences_90_augmented.fasta
3.1.3.98_clustered_sequences_90_augmented.fasta
3.1.3.99_clustered_sequences_90_augmented

 65%|██████▍   | 23975/37060 [00:09<00:06, 2114.88it/s]

3.2.1.199_clustered_sequences_90_augmented.fasta
3.2.1.19_clustered_sequences_90_augmented.fasta
3.2.1.1_clustered_sequences_90_augmented.fasta
3.2.1.200_clustered_sequences_90_augmented.fasta
3.2.1.201_clustered_sequences_90_augmented.fasta
3.2.1.202_clustered_sequences_90_augmented.fasta
3.2.1.203_clustered_sequences_90_augmented.fasta
3.2.1.204_clustered_sequences_90_augmented.fasta
3.2.1.205_clustered_sequences_90_augmented.fasta
3.2.1.206_clustered_sequences_90_augmented.fasta
3.2.1.207_clustered_sequences_90_augmented.fasta
3.2.1.208_clustered_sequences_90_augmented.fasta
3.2.1.20_clustered_sequences_90_augmented.fasta
3.2.1.21_clustered_sequences_90_augmented.fasta
3.2.1.22_clustered_sequences_90_augmented.fasta
3.2.1.23_clustered_sequences_90_augmented.fasta
3.2.1.24_clustered_sequences_90_augmented.fasta
3.2.1.25_clustered_sequences_90_augmented.fasta
3.2.1.26_clustered_sequences_90_augmented.fasta
3.2.1.27_clustered_sequences_90_augmented.fasta
3.2.1.28_clustered_sequences_90

 67%|██████▋   | 24730/37060 [00:09<00:05, 2434.69it/s]

3.2.1.90_clustered_sequences_90_augmented.fasta
3.2.1.91_clustered_sequences_90_augmented.fasta
3.2.1.92_clustered_sequences_90_augmented.fasta
3.2.1.93_clustered_sequences_90_augmented.fasta
3.2.1.94_clustered_sequences_90_augmented.fasta
3.2.1.95_clustered_sequences_90_augmented.fasta
3.2.1.96_clustered_sequences_90_augmented.fasta
3.2.1.97_clustered_sequences_90_augmented.fasta
3.2.1.98_clustered_sequences_90_augmented.fasta
3.2.1.99_clustered_sequences_90_augmented.fasta
3.2.1.9_clustered_sequences_90_augmented.fasta
3.2.1.B16_clustered_sequences_90_augmented.fasta
3.2.1.B1_clustered_sequences_90_augmented.fasta
3.2.1.B23_clustered_sequences_90_augmented.fasta
3.2.1.B26_clustered_sequences_90_augmented.fasta
3.2.1.B28_clustered_sequences_90_augmented.fasta
3.2.1.B31_clustered_sequences_90_augmented.fasta
3.2.1.B32_clustered_sequences_90_augmented.fasta
3.2.1.B33_clustered_sequences_90_augmented.fasta
3.2.1.B34_clustered_sequences_90_augmented.fasta
3.2.1.B35_clustered_sequences_90_

 69%|██████▉   | 25710/37060 [00:09<00:03, 3218.98it/s]

3.4.13.6_clustered_sequences_90_augmented.fasta
3.4.13.7_clustered_sequences_90_augmented.fasta
3.4.13.8_clustered_sequences_90_augmented.fasta
3.4.13.9_clustered_sequences_90_augmented.fasta
3.4.14.10_clustered_sequences_90_augmented.fasta
3.4.14.11_clustered_sequences_90_augmented.fasta
3.4.14.12_clustered_sequences_90_augmented.fasta
3.4.14.13_clustered_sequences_90_augmented.fasta
3.4.14.1_clustered_sequences_90_augmented.fasta
3.4.14.2_clustered_sequences_90_augmented.fasta
3.4.14.3_clustered_sequences_90_augmented.fasta
3.4.14.4_clustered_sequences_90_augmented.fasta
3.4.14.5_clustered_sequences_90_augmented.fasta
3.4.14.6_clustered_sequences_90_augmented.fasta
3.4.14.7_clustered_sequences_90_augmented.fasta
3.4.14.8_clustered_sequences_90_augmented.fasta
3.4.14.9_clustered_sequences_90_augmented.fasta
3.4.15.1_clustered_sequences_90_augmented.fasta
3.4.15.2_clustered_sequences_90_augmented.fasta
3.4.15.3_clustered_sequences_90_augmented.fasta
3.4.15.5_clustered_sequences_90_augm

 74%|███████▎  | 27260/37060 [00:09<00:02, 4585.94it/s]

3.4.21.8_clustered_sequences_90_augmented.fasta
3.4.21.90_clustered_sequences_90_augmented.fasta
3.4.21.91_clustered_sequences_90_augmented.fasta
3.4.21.92_clustered_sequences_90_augmented.fasta
3.4.21.93_clustered_sequences_90_augmented.fasta
3.4.21.94_clustered_sequences_90_augmented.fasta
3.4.21.95_clustered_sequences_90_augmented.fasta
3.4.21.96_clustered_sequences_90_augmented.fasta
3.4.21.97_clustered_sequences_90_augmented.fasta
3.4.21.98_clustered_sequences_90_augmented.fasta
3.4.21.99_clustered_sequences_90_augmented.fasta
3.4.21.9_clustered_sequences_90_augmented.fasta
3.4.21.B10_clustered_sequences_90_augmented.fasta
3.4.21.B12_clustered_sequences_90_augmented.fasta
3.4.21.B1_clustered_sequences_90_augmented.fasta
3.4.21.B21_clustered_sequences_90_augmented.fasta
3.4.21.B24_clustered_sequences_90_augmented.fasta
3.4.21.B25_clustered_sequences_90_augmented.fasta
3.4.21.B26_clustered_sequences_90_augmented.fasta
3.4.21.B27_clustered_sequences_90_augmented.fasta
3.4.21.B28_clus

 76%|███████▌  | 28045/37060 [00:09<00:01, 5171.49it/s]

3.4.4.17_clustered_sequences_90_augmented.fasta
3.4.4.18_clustered_sequences_90_augmented.fasta
3.4.4.19_clustered_sequences_90_augmented.fasta
3.4.4.1_clustered_sequences_90_augmented.fasta
3.4.4.20_clustered_sequences_90_augmented.fasta
3.4.4.21_clustered_sequences_90_augmented.fasta
3.4.4.22_clustered_sequences_90_augmented.fasta
3.4.4.23_clustered_sequences_90_augmented.fasta
3.4.4.24_clustered_sequences_90_augmented.fasta
3.4.4.25_clustered_sequences_90_augmented.fasta
3.4.4.2_clustered_sequences_90_augmented.fasta
3.4.4.3_clustered_sequences_90_augmented.fasta
3.4.4.4_clustered_sequences_90_augmented.fasta
3.4.4.5_clustered_sequences_90_augmented.fasta
3.4.4.6_clustered_sequences_90_augmented.fasta
3.4.4.7_clustered_sequences_90_augmented.fasta
3.4.4.8_clustered_sequences_90_augmented.fasta
3.4.4.9_clustered_sequences_90_augmented.fasta
3.4.99.10_clustered_sequences_90_augmented.fasta
3.4.99.11_clustered_sequences_90_augmented.fasta
3.4.99.12_clustered_sequences_90_augmented.fast

 77%|███████▋  | 28687/37060 [00:10<00:03, 2709.93it/s]

3.5.3.7_clustered_sequences_90_augmented.fasta
3.5.3.8_clustered_sequences_90_augmented.fasta
3.5.3.9_clustered_sequences_90_augmented.fasta
3.5.4.10_clustered_sequences_90_augmented.fasta
3.5.4.11_clustered_sequences_90_augmented.fasta
3.5.4.12_clustered_sequences_90_augmented.fasta
3.5.4.13_clustered_sequences_90_augmented.fasta
3.5.4.14_clustered_sequences_90_augmented.fasta
3.5.4.15_clustered_sequences_90_augmented.fasta
3.5.4.16_clustered_sequences_90_augmented.fasta
3.5.4.17_clustered_sequences_90_augmented.fasta
3.5.4.18_clustered_sequences_90_augmented.fasta
3.5.4.19_clustered_sequences_90_augmented.fasta
3.5.4.1_clustered_sequences_90_augmented.fasta
3.5.4.20_clustered_sequences_90_augmented.fasta
3.5.4.21_clustered_sequences_90_augmented.fasta
3.5.4.22_clustered_sequences_90_augmented.fasta
3.5.4.23_clustered_sequences_90_augmented.fasta
3.5.4.24_clustered_sequences_90_augmented.fasta
3.5.4.25_clustered_sequences_90_augmented.fasta
3.5.4.26_clustered_sequences_90_augmented.fa

 79%|███████▊  | 29175/37060 [00:10<00:03, 2279.41it/s]

3.6.1.16_clustered_sequences_90_augmented.fasta
3.6.1.17_clustered_sequences_90_augmented.fasta
3.6.1.18_clustered_sequences_90_augmented.fasta
3.6.1.19_clustered_sequences_90_augmented.fasta
3.6.1.1_clustered_sequences_90_augmented.fasta
3.6.1.20_clustered_sequences_90_augmented.fasta
3.6.1.21_clustered_sequences_90_augmented.fasta
3.6.1.22_clustered_sequences_90_augmented.fasta
3.6.1.23_clustered_sequences_90_augmented.fasta
3.6.1.24_clustered_sequences_90_augmented.fasta
3.6.1.25_clustered_sequences_90_augmented.fasta
3.6.1.26_clustered_sequences_90_augmented.fasta
3.6.1.27_clustered_sequences_90_augmented.fasta
3.6.1.28_clustered_sequences_90_augmented.fasta
3.6.1.29_clustered_sequences_90_augmented.fasta
3.6.1.2_clustered_sequences_90_augmented.fasta
3.6.1.30_clustered_sequences_90_augmented.fasta
3.6.1.31_clustered_sequences_90_augmented.fasta
3.6.1.32_clustered_sequences_90_augmented.fasta
3.6.1.33_clustered_sequences_90_augmented.fasta
3.6.1.34_clustered_sequences_90_augmented.

 80%|███████▉  | 29567/37060 [00:10<00:03, 2325.52it/s]

3.6.3.32_clustered_sequences_90_augmented.fasta
3.6.3.33_clustered_sequences_90_augmented.fasta
3.6.3.34_clustered_sequences_90_augmented.fasta
3.6.3.35_clustered_sequences_90_augmented.fasta
3.6.3.36_clustered_sequences_90_augmented.fasta
3.6.3.38_clustered_sequences_90_augmented.fasta
3.6.3.39_clustered_sequences_90_augmented.fasta
3.6.3.3_clustered_sequences_90_augmented.fasta
3.6.3.40_clustered_sequences_90_augmented.fasta
3.6.3.41_clustered_sequences_90_augmented.fasta
3.6.3.42_clustered_sequences_90_augmented.fasta
3.6.3.43_clustered_sequences_90_augmented.fasta
3.6.3.44_clustered_sequences_90_augmented.fasta
3.6.3.45_clustered_sequences_90_augmented.fasta
3.6.3.47_clustered_sequences_90_augmented.fasta
3.6.3.48_clustered_sequences_90_augmented.fasta
3.6.3.49_clustered_sequences_90_augmented.fasta
3.6.3.4_clustered_sequences_90_augmented.fasta
3.6.3.50_clustered_sequences_90_augmented.fasta
3.6.3.51_clustered_sequences_90_augmented.fasta
3.6.3.52_clustered_sequences_90_augmented.

 81%|████████  | 29915/37060 [00:11<00:03, 1908.33it/s]

3.6.4.13_clustered_sequences_90_augmented.fasta
3.6.4.1_clustered_sequences_90_augmented.fasta
3.6.4.2_clustered_sequences_90_augmented.fasta
3.6.4.3_clustered_sequences_90_augmented.fasta
3.6.4.4_clustered_sequences_90_augmented.fasta
3.6.4.5_clustered_sequences_90_augmented.fasta
3.6.4.6_clustered_sequences_90_augmented.fasta
3.6.4.7_clustered_sequences_90_augmented.fasta
3.6.4.8_clustered_sequences_90_augmented.fasta
3.6.4.9_clustered_sequences_90_augmented.fasta
3.6.4.B10_clustered_sequences_90_augmented.fasta
3.6.4.B11_clustered_sequences_90_augmented.fasta
3.6.4.B1_clustered_sequences_90_augmented.fasta
3.6.4.B4_clustered_sequences_90_augmented.fasta
3.6.4.B6_clustered_sequences_90_augmented.fasta
3.6.4.B7_clustered_sequences_90_augmented.fasta
3.6.4.B8_clustered_sequences_90_augmented.fasta
3.6.4.B9_clustered_sequences_90_augmented.fasta
3.6.5.1_clustered_sequences_90_augmented.fasta
3.6.5.2_clustered_sequences_90_augmented.fasta
3.6.5.3_clustered_sequences_90_augmented.fasta
3.

 82%|████████▏ | 30550/37060 [00:11<00:03, 2164.92it/s]

4.1.1.46_clustered_sequences_90_augmented.fasta
4.1.1.47_clustered_sequences_90_augmented.fasta
4.1.1.48_clustered_sequences_90_augmented.fasta
4.1.1.49_clustered_sequences_90_augmented.fasta
4.1.1.4_clustered_sequences_90_augmented.fasta
4.1.1.50_clustered_sequences_90_augmented.fasta
4.1.1.51_clustered_sequences_90_augmented.fasta
4.1.1.52_clustered_sequences_90_augmented.fasta
4.1.1.53_clustered_sequences_90_augmented.fasta
4.1.1.55_clustered_sequences_90_augmented.fasta
4.1.1.56_clustered_sequences_90_augmented.fasta
4.1.1.57_clustered_sequences_90_augmented.fasta
4.1.1.58_clustered_sequences_90_augmented.fasta
4.1.1.59_clustered_sequences_90_augmented.fasta
4.1.1.5_clustered_sequences_90_augmented.fasta
4.1.1.61_clustered_sequences_90_augmented.fasta
4.1.1.62_clustered_sequences_90_augmented.fasta
4.1.1.63_clustered_sequences_90_augmented.fasta
4.1.1.64_clustered_sequences_90_augmented.fasta
4.1.1.65_clustered_sequences_90_augmented.fasta
4.1.1.66_clustered_sequences_90_augmented.

 84%|████████▍ | 31269/37060 [00:11<00:02, 2478.17it/s]

4.1.3.29_clustered_sequences_90_augmented.fasta
4.1.3.2_clustered_sequences_90_augmented.fasta
4.1.3.30_clustered_sequences_90_augmented.fasta
4.1.3.31_clustered_sequences_90_augmented.fasta
4.1.3.32_clustered_sequences_90_augmented.fasta
4.1.3.33_clustered_sequences_90_augmented.fasta
4.1.3.34_clustered_sequences_90_augmented.fasta
4.1.3.36_clustered_sequences_90_augmented.fasta
4.1.3.37_clustered_sequences_90_augmented.fasta
4.1.3.38_clustered_sequences_90_augmented.fasta
4.1.3.39_clustered_sequences_90_augmented.fasta
4.1.3.3_clustered_sequences_90_augmented.fasta
4.1.3.40_clustered_sequences_90_augmented.fasta
4.1.3.41_clustered_sequences_90_augmented.fasta
4.1.3.42_clustered_sequences_90_augmented.fasta
4.1.3.43_clustered_sequences_90_augmented.fasta
4.1.3.44_clustered_sequences_90_augmented.fasta
4.1.3.45_clustered_sequences_90_augmented.fasta
4.1.3.46_clustered_sequences_90_augmented.fasta
4.1.3.4_clustered_sequences_90_augmented.fasta
4.1.3.5_clustered_sequences_90_augmented.fa

 85%|████████▌ | 31562/37060 [00:11<00:02, 2290.46it/s]

4.2.1.164_clustered_sequences_90_augmented.fasta
4.2.1.165_clustered_sequences_90_augmented.fasta
4.2.1.166_clustered_sequences_90_augmented.fasta
4.2.1.167_clustered_sequences_90_augmented.fasta
4.2.1.168_clustered_sequences_90_augmented.fasta
4.2.1.169_clustered_sequences_90_augmented.fasta
4.2.1.16_clustered_sequences_90_augmented.fasta
4.2.1.170_clustered_sequences_90_augmented.fasta
4.2.1.171_clustered_sequences_90_augmented.fasta
4.2.1.172_clustered_sequences_90_augmented.fasta
4.2.1.174_clustered_sequences_90_augmented.fasta
4.2.1.17_clustered_sequences_90_augmented.fasta
4.2.1.18_clustered_sequences_90_augmented.fasta
4.2.1.19_clustered_sequences_90_augmented.fasta
4.2.1.1_clustered_sequences_90_augmented.fasta
4.2.1.20_clustered_sequences_90_augmented.fasta
4.2.1.21_clustered_sequences_90_augmented.fasta
4.2.1.22_clustered_sequences_90_augmented.fasta
4.2.1.23_clustered_sequences_90_augmented.fasta
4.2.1.24_clustered_sequences_90_augmented.fasta
4.2.1.25_clustered_sequences_90

 88%|████████▊ | 32525/37060 [00:12<00:01, 2527.24it/s]

4.2.1.48_clustered_sequences_90_augmented.fasta
4.2.1.49_clustered_sequences_90_augmented.fasta
4.2.1.4_clustered_sequences_90_augmented.fasta
4.2.1.50_clustered_sequences_90_augmented.fasta
4.2.1.51_clustered_sequences_90_augmented.fasta
4.2.1.52_clustered_sequences_90_augmented.fasta
4.2.1.53_clustered_sequences_90_augmented.fasta
4.2.1.54_clustered_sequences_90_augmented.fasta
4.2.1.55_clustered_sequences_90_augmented.fasta
4.2.1.56_clustered_sequences_90_augmented.fasta
4.2.1.57_clustered_sequences_90_augmented.fasta
4.2.1.58_clustered_sequences_90_augmented.fasta
4.2.1.59_clustered_sequences_90_augmented.fasta
4.2.1.5_clustered_sequences_90_augmented.fasta
4.2.1.60_clustered_sequences_90_augmented.fasta
4.2.1.61_clustered_sequences_90_augmented.fasta
4.2.1.63_clustered_sequences_90_augmented.fasta
4.2.1.64_clustered_sequences_90_augmented.fasta
4.2.1.65_clustered_sequences_90_augmented.fasta
4.2.1.66_clustered_sequences_90_augmented.fasta
4.2.1.68_clustered_sequences_90_augmented.

 90%|████████▉ | 33255/37060 [00:12<00:01, 3037.44it/s]

4.2.3.41_clustered_sequences_90_augmented.fasta
4.2.3.42_clustered_sequences_90_augmented.fasta
4.2.3.43_clustered_sequences_90_augmented.fasta
4.2.3.44_clustered_sequences_90_augmented.fasta
4.2.3.45_clustered_sequences_90_augmented.fasta
4.2.3.46_clustered_sequences_90_augmented.fasta
4.2.3.47_clustered_sequences_90_augmented.fasta
4.2.3.48_clustered_sequences_90_augmented.fasta
4.2.3.49_clustered_sequences_90_augmented.fasta
4.2.3.4_clustered_sequences_90_augmented.fasta
4.2.3.50_clustered_sequences_90_augmented.fasta
4.2.3.51_clustered_sequences_90_augmented.fasta
4.2.3.52_clustered_sequences_90_augmented.fasta
4.2.3.53_clustered_sequences_90_augmented.fasta
4.2.3.54_clustered_sequences_90_augmented.fasta
4.2.3.55_clustered_sequences_90_augmented.fasta
4.2.3.56_clustered_sequences_90_augmented.fasta
4.2.3.57_clustered_sequences_90_augmented.fasta
4.2.3.58_clustered_sequences_90_augmented.fasta
4.2.3.59_clustered_sequences_90_augmented.fasta
4.2.3.5_clustered_sequences_90_augmented.

 91%|█████████ | 33687/37060 [00:12<00:02, 1684.63it/s]

4.3.2.3_clustered_sequences_90_augmented.fasta
4.3.2.4_clustered_sequences_90_augmented.fasta
4.3.2.5_clustered_sequences_90_augmented.fasta
4.3.2.6_clustered_sequences_90_augmented.fasta
4.3.2.7_clustered_sequences_90_augmented.fasta
4.3.2.8_clustered_sequences_90_augmented.fasta
4.3.2.9_clustered_sequences_90_augmented.fasta
4.3.3.1_clustered_sequences_90_augmented.fasta
4.3.3.2_clustered_sequences_90_augmented.fasta
4.3.3.3_clustered_sequences_90_augmented.fasta
4.3.3.4_clustered_sequences_90_augmented.fasta
4.3.3.5_clustered_sequences_90_augmented.fasta
4.3.3.6_clustered_sequences_90_augmented.fasta
4.3.3.7_clustered_sequences_90_augmented.fasta
4.3.99.1_clustered_sequences_90_augmented.fasta
4.3.99.2_clustered_sequences_90_augmented.fasta
4.3.99.3_clustered_sequences_90_augmented.fasta
4.3.99.4_clustered_sequences_90_augmented.fasta
4.4.1.11_clustered_sequences_90_augmented.fasta
4.4.1.12_clustered_sequences_90_augmented.fasta
4.4.1.13_clustered_sequences_90_augmented.fasta
4.4.1.

 93%|█████████▎| 34316/37060 [00:12<00:01, 1829.22it/s]

5.1.1.20_clustered_sequences_90_augmented.fasta
5.1.1.21_clustered_sequences_90_augmented.fasta
5.1.1.22_clustered_sequences_90_augmented.fasta
5.1.1.23_clustered_sequences_90_augmented.fasta
5.1.1.3_clustered_sequences_90_augmented.fasta
5.1.1.4_clustered_sequences_90_augmented.fasta
5.1.1.5_clustered_sequences_90_augmented.fasta
5.1.1.6_clustered_sequences_90_augmented.fasta
5.1.1.7_clustered_sequences_90_augmented.fasta
5.1.1.8_clustered_sequences_90_augmented.fasta
5.1.1.9_clustered_sequences_90_augmented.fasta
5.1.2.1_clustered_sequences_90_augmented.fasta
5.1.2.2_clustered_sequences_90_augmented.fasta
5.1.2.3_clustered_sequences_90_augmented.fasta
5.1.2.4_clustered_sequences_90_augmented.fasta
5.1.2.7_clustered_sequences_90_augmented.fasta
5.1.3.10_clustered_sequences_90_augmented.fasta
5.1.3.11_clustered_sequences_90_augmented.fasta
5.1.3.12_clustered_sequences_90_augmented.fasta
5.1.3.13_clustered_sequences_90_augmented.fasta
5.1.3.14_clustered_sequences_90_augmented.fasta
5.1.

 93%|█████████▎| 34581/37060 [00:13<00:01, 1370.58it/s]

5.2.1.B4_clustered_sequences_90_augmented.fasta
5.3.1.10_clustered_sequences_90_augmented.fasta
5.3.1.11_clustered_sequences_90_augmented.fasta
5.3.1.12_clustered_sequences_90_augmented.fasta
5.3.1.13_clustered_sequences_90_augmented.fasta
5.3.1.14_clustered_sequences_90_augmented.fasta
5.3.1.15_clustered_sequences_90_augmented.fasta
5.3.1.16_clustered_sequences_90_augmented.fasta
5.3.1.17_clustered_sequences_90_augmented.fasta
5.3.1.18_clustered_sequences_90_augmented.fasta
5.3.1.19_clustered_sequences_90_augmented.fasta
5.3.1.1_clustered_sequences_90_augmented.fasta
5.3.1.21_clustered_sequences_90_augmented.fasta
5.3.1.22_clustered_sequences_90_augmented.fasta
5.3.1.23_clustered_sequences_90_augmented.fasta
5.3.1.24_clustered_sequences_90_augmented.fasta
5.3.1.25_clustered_sequences_90_augmented.fasta
5.3.1.26_clustered_sequences_90_augmented.fasta
5.3.1.27_clustered_sequences_90_augmented.fasta
5.3.1.28_clustered_sequences_90_augmented.fasta
5.3.1.29_clustered_sequences_90_augmented

 95%|█████████▍| 35027/37060 [00:13<00:01, 1659.99it/s]

5.4.2.12_clustered_sequences_90_augmented.fasta
5.4.2.13_clustered_sequences_90_augmented.fasta
5.4.2.1_clustered_sequences_90_augmented.fasta
5.4.2.2_clustered_sequences_90_augmented.fasta
5.4.2.3_clustered_sequences_90_augmented.fasta
5.4.2.4_clustered_sequences_90_augmented.fasta
5.4.2.5_clustered_sequences_90_augmented.fasta
5.4.2.6_clustered_sequences_90_augmented.fasta
5.4.2.7_clustered_sequences_90_augmented.fasta
5.4.2.8_clustered_sequences_90_augmented.fasta
5.4.2.9_clustered_sequences_90_augmented.fasta
5.4.3.10_clustered_sequences_90_augmented.fasta
5.4.3.11_clustered_sequences_90_augmented.fasta
5.4.3.1_clustered_sequences_90_augmented.fasta
5.4.3.2_clustered_sequences_90_augmented.fasta
5.4.3.3_clustered_sequences_90_augmented.fasta
5.4.3.4_clustered_sequences_90_augmented.fasta
5.4.3.5_clustered_sequences_90_augmented.fasta
5.4.3.6_clustered_sequences_90_augmented.fasta
5.4.3.7_clustered_sequences_90_augmented.fasta
5.4.3.8_clustered_sequences_90_augmented.fasta
5.4.3.9_c

 96%|█████████▌| 35525/37060 [00:13<00:00, 2004.96it/s]

5.4.99.B2_clustered_sequences_90_augmented.fasta
5.4.99.B38_clustered_sequences_90_augmented.fasta
5.4.99.B39_clustered_sequences_90_augmented.fasta
5.4.99.B40_clustered_sequences_90_augmented.fasta
5.4.99.B41_clustered_sequences_90_augmented.fasta
5.4.99.B4_clustered_sequences_90_augmented.fasta
5.5.1.10_clustered_sequences_90_augmented.fasta
5.5.1.11_clustered_sequences_90_augmented.fasta
5.5.1.12_clustered_sequences_90_augmented.fasta
5.5.1.13_clustered_sequences_90_augmented.fasta
5.5.1.14_clustered_sequences_90_augmented.fasta
5.5.1.15_clustered_sequences_90_augmented.fasta
5.5.1.16_clustered_sequences_90_augmented.fasta
5.5.1.17_clustered_sequences_90_augmented.fasta
5.5.1.18_clustered_sequences_90_augmented.fasta
5.5.1.19_clustered_sequences_90_augmented.fasta
5.5.1.1_clustered_sequences_90_augmented.fasta
5.5.1.20_clustered_sequences_90_augmented.fasta
5.5.1.21_clustered_sequences_90_augmented.fasta
5.5.1.23_clustered_sequences_90_augmented.fasta
5.5.1.24_clustered_sequences_90

 97%|█████████▋| 36090/37060 [00:14<00:00, 1170.05it/s]

6.2.1.18_clustered_sequences_90_augmented.fasta
6.2.1.19_clustered_sequences_90_augmented.fasta
6.2.1.1_clustered_sequences_90_augmented.fasta
6.2.1.20_clustered_sequences_90_augmented.fasta
6.2.1.21_clustered_sequences_90_augmented.fasta
6.2.1.22_clustered_sequences_90_augmented.fasta
6.2.1.24_clustered_sequences_90_augmented.fasta
6.2.1.25_clustered_sequences_90_augmented.fasta
6.2.1.26_clustered_sequences_90_augmented.fasta
6.2.1.27_clustered_sequences_90_augmented.fasta
6.2.1.28_clustered_sequences_90_augmented.fasta
6.2.1.29_clustered_sequences_90_augmented.fasta
6.2.1.2_clustered_sequences_90_augmented.fasta
6.2.1.30_clustered_sequences_90_augmented.fasta
6.2.1.31_clustered_sequences_90_augmented.fasta
6.2.1.32_clustered_sequences_90_augmented.fasta
6.2.1.33_clustered_sequences_90_augmented.fasta
6.2.1.34_clustered_sequences_90_augmented.fasta
6.2.1.35_clustered_sequences_90_augmented.fasta
6.2.1.36_clustered_sequences_90_augmented.fasta
6.2.1.37_clustered_sequences_90_augmented.

 98%|█████████▊| 36319/37060 [00:14<00:00, 1241.66it/s]

6.3.2.20_clustered_sequences_90_augmented.fasta
6.3.2.21_clustered_sequences_90_augmented.fasta
6.3.2.22_clustered_sequences_90_augmented.fasta
6.3.2.23_clustered_sequences_90_augmented.fasta
6.3.2.25_clustered_sequences_90_augmented.fasta
6.3.2.26_clustered_sequences_90_augmented.fasta
6.3.2.27_clustered_sequences_90_augmented.fasta
6.3.2.28_clustered_sequences_90_augmented.fasta
6.3.2.29_clustered_sequences_90_augmented.fasta
6.3.2.2_clustered_sequences_90_augmented.fasta
6.3.2.30_clustered_sequences_90_augmented.fasta
6.3.2.31_clustered_sequences_90_augmented.fasta
6.3.2.32_clustered_sequences_90_augmented.fasta
6.3.2.33_clustered_sequences_90_augmented.fasta
6.3.2.34_clustered_sequences_90_augmented.fasta
6.3.2.35_clustered_sequences_90_augmented.fasta
6.3.2.36_clustered_sequences_90_augmented.fasta
6.3.2.37_clustered_sequences_90_augmented.fasta
6.3.2.38_clustered_sequences_90_augmented.fasta
6.3.2.39_clustered_sequences_90_augmented.fasta
6.3.2.3_clustered_sequences_90_augmented.

 99%|█████████▊| 36523/37060 [00:14<00:00, 1056.12it/s]

6.3.3.5_clustered_sequences_90_augmented.fasta
6.3.3.6_clustered_sequences_90_augmented.fasta
6.3.3.7_clustered_sequences_90_augmented.fasta
6.3.4.10_clustered_sequences_90_augmented.fasta
6.3.4.11_clustered_sequences_90_augmented.fasta
6.3.4.12_clustered_sequences_90_augmented.fasta
6.3.4.13_clustered_sequences_90_augmented.fasta
6.3.4.14_clustered_sequences_90_augmented.fasta
6.3.4.15_clustered_sequences_90_augmented.fasta
6.3.4.16_clustered_sequences_90_augmented.fasta
6.3.4.18_clustered_sequences_90_augmented.fasta
6.3.4.19_clustered_sequences_90_augmented.fasta
6.3.4.1_clustered_sequences_90_augmented.fasta
6.3.4.20_clustered_sequences_90_augmented.fasta
6.3.4.21_clustered_sequences_90_augmented.fasta
6.3.4.22_clustered_sequences_90_augmented.fasta
6.3.4.23_clustered_sequences_90_augmented.fasta
6.3.4.24_clustered_sequences_90_augmented.fasta
6.3.4.2_clustered_sequences_90_augmented.fasta
6.3.4.3_clustered_sequences_90_augmented.fasta
6.3.4.4_clustered_sequences_90_augmented.fasta

 99%|█████████▉| 36689/37060 [00:15<00:00, 899.68it/s] 

6.3.5.4_clustered_sequences_90_augmented.fasta
6.3.5.5_clustered_sequences_90_augmented.fasta
6.3.5.6_clustered_sequences_90_augmented.fasta
6.3.5.7_clustered_sequences_90_augmented.fasta
6.3.5.8_clustered_sequences_90_augmented.fasta
6.3.5.9_clustered_sequences_90_augmented.fasta
6.4.1.1_clustered_sequences_90_augmented.fasta
6.4.1.2_clustered_sequences_90_augmented.fasta
6.4.1.3_clustered_sequences_90_augmented.fasta
6.4.1.4_clustered_sequences_90_augmented.fasta
6.4.1.5_clustered_sequences_90_augmented.fasta
6.4.1.6_clustered_sequences_90_augmented.fasta
6.4.1.7_clustered_sequences_90_augmented.fasta
6.4.1.8_clustered_sequences_90_augmented.fasta
6.4.1.9_clustered_sequences_90_augmented.fasta
6.5.1.1_clustered_sequences_90_augmented.fasta
6.5.1.2_clustered_sequences_90_augmented.fasta
6.5.1.3_clustered_sequences_90_augmented.fasta
6.5.1.4_clustered_sequences_90_augmented.fasta
6.5.1.5_clustered_sequences_90_augmented.fasta
6.5.1.6_clustered_sequences_90_augmented.fasta
6.5.1.7_clust

100%|█████████▉| 37010/37060 [00:15<00:00, 620.97it/s]

7.1.1.3_clustered_sequences_90_augmented.fasta
7.1.1.4_clustered_sequences_90_augmented.fasta
7.1.1.5_clustered_sequences_90_augmented.fasta
7.1.1.6_clustered_sequences_90_augmented.fasta
7.1.1.7_clustered_sequences_90_augmented.fasta
7.1.1.8_clustered_sequences_90_augmented.fasta
7.1.2.1_clustered_sequences_90_augmented.fasta
7.1.2.2_clustered_sequences_90_augmented.fasta
7.1.3.1_clustered_sequences_90_augmented.fasta
7.2.1.1_clustered_sequences_90_augmented.fasta
7.2.1.2_clustered_sequences_90_augmented.fasta
7.2.1.3_clustered_sequences_90_augmented.fasta
7.2.2.10_clustered_sequences_90_augmented.fasta
7.2.2.11_clustered_sequences_90_augmented.fasta
7.2.2.12_clustered_sequences_90_augmented.fasta
7.2.2.13_clustered_sequences_90_augmented.fasta
7.2.2.14_clustered_sequences_90_augmented.fasta
7.2.2.15_clustered_sequences_90_augmented.fasta
7.2.2.16_clustered_sequences_90_augmented.fasta
7.2.2.17_clustered_sequences_90_augmented.fasta
7.2.2.18_clustered_sequences_90_augmented.fasta
7.2.

100%|██████████| 37060/37060 [00:15<00:00, 2346.55it/s]

7.6.2.12_clustered_sequences_90_augmented.fasta
7.6.2.1_clustered_sequences_90_augmented.fasta
7.6.2.2_clustered_sequences_90_augmented.fasta
7.6.2.4_clustered_sequences_90_augmented.fasta
7.6.2.5_clustered_sequences_90_augmented.fasta
7.6.2.6_clustered_sequences_90_augmented.fasta
7.6.2.7_clustered_sequences_90_augmented.fasta
7.6.2.8_clustered_sequences_90_augmented.fasta
7.6.2.9_clustered_sequences_90_augmented.fasta





### Go through Pfam file to find the domains

In [3]:
def get_doms(data, filepath):
    '''
    Try to find domains for the identifiers.
    Uses pre-computed Pfam file.
    '''
    all_uid = set(data.keys())
    start_found = False
    counter = 0
    with open(filepath, 'r', encoding="latin-1") as f: #latin-1 to deal with a small number of byte codes in the file
        for line in f:
            # find beginning of a pfam domain record
            if line.startswith('#=GF AC'):
                if start_found is False: # if I find two starts in a row something went wrong
                    pfam = line.split()[2].rstrip()
                    start_found = True
                else:
                    raise ValueError

            elif line.startswith('#'): # skip other annotation fields
                continue

            elif line.startswith('//'): # if I find the end before the start something went wrong
                if start_found is True:
                    start_found = False
                else:
                    raise ValueError

            else:
                uid = line.split('.')[0]
                if uid in all_uid:
                    data[uid].append(pfam)
                    counter += 1

    print('found {} of {}'.format(counter, len(all_uid)))
    return data



def write_file(data, filepath):
    '''
    Write data to disk.
    '''
    outlines = []
    for uid in sorted(data.keys()):
        outlines.append('{}\t{}'.format(uid, ','.join(data[uid])))

    with open(filepath, 'w') as f:
        f.write('\n'.join(outlines))


        
def load_file(filepath):
    '''
    Get data from disk.
    '''
    data =  {}
    with open(filepath, 'r') as f:
        for line in f:
            parts = line.strip().split('\t')
            uid = parts[0]

            if len(parts) == 1:
                data[uid] = []
            else:
                domains = parts[1]
                data[uid] = domains
    return data
        
        
# parse the fasta files and get the uniprot identifiers
outfile = join(INTERMEDIATE, 'BRENDA_for_paper', 'dom_temp_data.tsv')
if not exists(outfile):
    data = get_identifiers()

    # parse the pfam file and get domains for the identifiers
    data = get_doms(data, join('brenda_domains', 'Pfam-A.full.uniprot'))

    # save data
    write_file(data, outfile)
    
    
# how many do not have a domain?
data = load_file(outfile)
print('{} sequences missing domain prediction'.format(list(data.values()).count([])))

172863 sequences missing domain prediction


### Query UniProt for missing domains, but only for the characterized sequences

In [None]:
def get_characterized():
    # get a dictionary of characterized identifiers from BRENDA
    characterized = {}
    with open(join(INTERMEDIATE, 'BRENDA_for_paper', 'parsed_info', 'ec_data_uid_orgs.tsv'), 'r') as f:
        f.readline()

        for line in f:
            ec, org, uid = line.rstrip().split('\t')

            if characterized.get(ec) is None:
                characterized[ec] = set([])
            characterized[ec].add(uid)
            
    # get all characterized identifiers from SwissProt
    with open(join(INTERMEDIATE, 'swissprot_2020_02', 'SwissProt-2020_02-protein-evidence.tsv'), 'r') as f:
        f.readline()
        
        for line in f:
            uid, ec, org, orgid = line.strip().split('\t')
            
            if characterized.get(ec) is None:
                characterized[ec] = set([])
            characterized[ec].add(uid)
            
    return characterized


def dlfile(folder, filename, url):
    '''
    Download a web page if the corresponding file does not exist.
    '''

    # Open the url
    try:
        out_path = join(folder, filename)
        if exists(out_path):
            print('already on disk ' + url)
            return True
            
        elif not exists(out_path):
            f = urlopen(url)
            print("downloading " + url)

            # Open local file for writing
            with open(out_path, "wb") as local_file:
                local_file.write(f.read())
            time.sleep(0.5)
            return True

    #handle errors
    except HTTPError as e:
        print("HTTP Error:", e.code, url)
        return False
    
    except URLError as e:
        print("URL Error:", e.reason, url)
        return False
        

def get_pfam_from_file(filepath):
    '''
    Parse out pfam data from a file
    '''
    # open the uniprot and uniparc pages and append them
    with open(filepath, 'r') as f:
        document = f.read()

        
    # search through the combined document for identifiers
    m = re.findall('(PF[0-9]{5}|CL[0-9]{4})', document)

    # loop through the search result and keep unique identifiers
    pfam_ids = set([])
    for pid in m:
        if pid == '':
            continue
                
        pfam_ids.add(pid)
        
    return pfam_ids
        
    
    
def get_pfam_for_uids(uids, filepath):
    '''
    Query the UniProt database to get protein domains for 
    a list of protein identifiers.
    '''

    data = {'uid':[], 'pfam':[]}

    for uid in tqdm(uids):

        # download uniprot file
        uniprot_url = 'https://www.uniprot.org/uniprot/'
        result = dlfile(folder=filepath, filename='%s_uniprot.html' % uid, url=uniprot_url+uid)
        if result is False:
            continue

        # query uniprot file
        pfam_ids = get_pfam_from_file(join(filepath, '%s_uniprot.html' % uid))


        # download uniparc overview file
        uniparc_url = 'https://www.uniprot.org/uniparc/?query=%s' % uid
        overview_filename = join(filepath, '%s_uniparc_overview.html' % uid)
        result = dlfile(folder=filepath, filename='%s_uniparc_overview.html' % uid, url=uniparc_url)
        if result is False:
            continue

        # find the reference for the alternate identifier and download that file
        with open(overview_filename, 'r') as f:
            document = f.read()

            m = re.search('class="entryID"><a href="/uniparc/([a-zA-Z0-9]+)">', document)
            new_target_url = 'https://www.uniprot.org/uniparc/%s' % m.group(1)
            result = dlfile(folder=filepath, filename='%s_uniparc.html' % uid, url=new_target_url)
            if result is False:
                continue

        # query uniparc file
        pfam_ids2 = get_pfam_from_file(join(filepath, '%s_uniparc.html' % uid))


        data['uid'].append(uid)
        data['pfam'].append(','.join(sorted(list(pfam_ids.union(pfam_ids2)))))

    
    return pd.DataFrame(data)




# get characterized identfiers
characterized = get_characterized()
uids = set([])
for ec in characterized.keys():
    uids.update(characterized[ec])
len(uids)

# which identifiers are considered characterized but have not predicted domain?
missing = uids - set(data.keys())
print(len(missing))

# now download these
filepath = join(RAW_EXTERNAL, 'pfam')
get_pfam_for_uids(sorted(list(missing), reverse=True), filepath)

  0%|          | 0/337 [00:00<?, ?it/s]

337
downloading https://www.uniprot.org/uniprot/W5QJZ5
downloading https://www.uniprot.org/uniparc/?query=W5QJZ5
downloading https://www.uniprot.org/uniparc/UPI0003EC8843


  0%|          | 1/337 [00:07<43:41,  7.80s/it]

downloading https://www.uniprot.org/uniprot/V5TF65
downloading https://www.uniprot.org/uniparc/?query=V5TF65
downloading https://www.uniprot.org/uniparc/UPI0002DC68A7


  1%|          | 2/337 [00:15<42:45,  7.66s/it]

downloading https://www.uniprot.org/uniprot/V5TDZ4
downloading https://www.uniprot.org/uniparc/?query=V5TDZ4
downloading https://www.uniprot.org/uniparc/UPI0003167E2A


  1%|          | 3/337 [00:21<41:04,  7.38s/it]

downloading https://www.uniprot.org/uniprot/V5TD11
downloading https://www.uniprot.org/uniparc/?query=V5TD11
downloading https://www.uniprot.org/uniparc/UPI0003D843B9


  1%|          | 4/337 [00:29<40:35,  7.31s/it]

downloading https://www.uniprot.org/uniprot/U5NE19
downloading https://www.uniprot.org/uniparc/?query=U5NE19
downloading https://www.uniprot.org/uniparc/UPI0003BA04D8


  1%|▏         | 5/337 [00:35<39:21,  7.11s/it]

downloading https://www.uniprot.org/uniprot/Q9ZI86
downloading https://www.uniprot.org/uniparc/?query=Q9ZI86
downloading https://www.uniprot.org/uniparc/UPI000012E7A0


  2%|▏         | 6/337 [00:42<38:23,  6.96s/it]

downloading https://www.uniprot.org/uniprot/Q9Y6E7
downloading https://www.uniprot.org/uniparc/?query=Q9Y6E7
downloading https://www.uniprot.org/uniparc/UPI0000071F11


  2%|▏         | 7/337 [00:50<40:10,  7.31s/it]

downloading https://www.uniprot.org/uniprot/Q9WYW0
downloading https://www.uniprot.org/uniparc/?query=Q9WYW0
downloading https://www.uniprot.org/uniparc/UPI00000D39F3


  2%|▏         | 8/337 [00:57<39:52,  7.27s/it]

downloading https://www.uniprot.org/uniprot/Q9W5E0
downloading https://www.uniprot.org/uniparc/?query=Q9W5E0
downloading https://www.uniprot.org/uniparc/UPI000007F6B5


  3%|▎         | 9/337 [01:10<49:36,  9.08s/it]

downloading https://www.uniprot.org/uniprot/Q9VK34
downloading https://www.uniprot.org/uniparc/?query=Q9VK34
downloading https://www.uniprot.org/uniparc/UPI000007CC5D


  3%|▎         | 10/337 [01:18<46:25,  8.52s/it]

downloading https://www.uniprot.org/uniprot/Q9VFK6
downloading https://www.uniprot.org/uniparc/?query=Q9VFK6
downloading https://www.uniprot.org/uniparc/UPI00001247DE


  3%|▎         | 11/337 [01:25<44:37,  8.21s/it]

downloading https://www.uniprot.org/uniprot/Q9VAQ1
downloading https://www.uniprot.org/uniparc/?query=Q9VAQ1
downloading https://www.uniprot.org/uniparc/UPI0000079982


  4%|▎         | 12/337 [01:32<42:23,  7.83s/it]

downloading https://www.uniprot.org/uniprot/Q9UW95
downloading https://www.uniprot.org/uniparc/?query=Q9UW95
downloading https://www.uniprot.org/uniparc/UPI000006B230


  4%|▍         | 13/337 [01:39<40:31,  7.51s/it]

downloading https://www.uniprot.org/uniprot/Q9UMN6
downloading https://www.uniprot.org/uniparc/?query=Q9UMN6
downloading https://www.uniprot.org/uniparc/UPI00001376B5


  4%|▍         | 14/337 [01:48<42:41,  7.93s/it]

downloading https://www.uniprot.org/uniprot/Q9UKV8
downloading https://www.uniprot.org/uniparc/?query=Q9UKV8
downloading https://www.uniprot.org/uniparc/UPI000012D07E


  4%|▍         | 15/337 [01:56<43:47,  8.16s/it]

downloading https://www.uniprot.org/uniprot/Q9UJ83
