In [1]:
#Importing Packages
import networkx as nx
import pandas as pd
import requests
from bs4 import BeautifulSoup
import numpy as np
import time
import os
from tqdm import tqdm

In [2]:
#Importing Signor Edges Database
P = nx.read_edgelist('parse_data/parse.biogrid.edgelist', encoding ='unicode_escape')
len(P.nodes)

22835

In [3]:
#Reading Nomenclature
nomenclature =['G','A','V','L','I','M','P','F','W','S','T','C','Y','N','Q','D','E','K','R','H']
hydro_score = [-0.4,1.8,4.2,3.8,4.5,1.9,-1.6,2.8,-0.9,-0.8,-0.7,2.5,-1.3,-3.5,-3.5,-3.5,-3.5,-3.9,-4.5,-3.2]

In [4]:
#Classifying Nomenclature in Gene Type
tiny =['A','C','G','S','T']
small = ['A','C','D','G','N','P','S','T','V']
aliphatic = ['I','L','V']
aromatic = ['F','H','W','Y']
nonpolar =['A','C','F','G','I','L','M','P','V','W','Y']
poscharged =['K','R','H']
negcharged = ['D','E']
charged = ['K','R','H','D','E']

In [5]:
#Identifying Protein Functions
GPCR = ['KW-0297']
ion = ['KW-1071','KW-0851','KW-0107','KW-0869','KW-0407','KW-0631','KW-0894']
kinase = ['KW-0418','KW-0723','KW-0829']
protease = ['KW-0031','KW-0064','KW-0121','KW-0224','KW-0482','KW-0645','KW-0720','KW-0788','KW-0888']
elem = GPCR+ion+kinase+protease

In [6]:
#Setting up Variables
error_nodes = []

indexes = []
gene_name = []
l = []
m=[]

aac_tiny = []
aac_small = []
aac_aliphatic = []
aac_aromatic = []
aac_nonpolar = []
aac_pos = []
aac_neg = []
aac_chr = []

G = []
A = []
V = []
L = []
I = []
M = []
Proline = []
F = []
W = []
S = []
T = []
C = []
Y = []
N = []
Q = []
D = []
E = []
K = []
R = []
H = []

gpcr_b = []
ionize_b = []
kin_b = []
prot_b = []

signalpep_b = []

N_count = []
O_count = []
serine_count = []
tyrosine_count = []
threonine_count = []
tsites = []

b_count = []
h_count = []
t_count = []

In [7]:
#Web Scraping for biological data
start_time = time.time()
traversed = 0
for node in tqdm(P.nodes()):
    link = 'https://www.uniprot.org/uniprot/' + str(node) +".xml"
    traversed += 1
    indexes.append(node)
    while True:
        try:
            db = requests.get(link)
            sp = BeautifulSoup(db.text, 'xml')
            
            #Parsing Protein Name
            name = str(sp.find('name',{'type': 'primary'}))
            name_parserA = name.find("\">")
            name_parserB = name.find("</")
            primary_name = name[name_parserA + len("\">") : name_parserB]
            gene_name.append(primary_name)
            #print('Name:', primary_name)
                               
            #Finding Protein Sequence and Information
            seq_info = sp.find_all('sequence')
            
            #Identifying length of sequence
            l_parserA = str(seq_info).find("length")
            l_parserB = str(seq_info).find("\" mass")
            length = str(seq_info)[l_parserA+len("length=\""):l_parserB]
            l.append(length)
            #print('Length:', length)
                               
            #Identifying mass of protein
            m_parserA = str(seq_info).find("mass")
            m_parserB = str(seq_info).find("\" modified")
            mass = str(seq_info)[m_parserA+len("mass=\""):m_parserB]
            m.append(mass)
            #print('Mass:', mass,'\n')
                               
            #Extracting Parsed Sequence
            s_parserA = str(seq_info).find("\">")
            s_parserB = str(seq_info).find("</")
            sequence = str(seq_info)[s_parserA+len("\">"):s_parserB]
            
            #Initializing Sequence Classifiers
            counts = [] 
            tinys = 0
            smalls = 0
            aliphatics = 0
            aromatics = 0
            nonpolars = 0
            poschargeds = 0
            negchargeds = 0
            chargeds = 0
            
            #Sequence Classifiers
            for x in nomenclature:
                c = sequence.count(x)
                counts.append(c)
                portion = c/int(length)
                if x in tiny:
                    tinys += portion
                if x in small:
                    smalls += portion
                if x in aliphatic:
                    aliphatics += portion
                if x in aromatic:
                    aromatics += portion
                if x in nonpolar:
                    nonpolars += portion
                if x in poscharged:
                    poschargeds += portion
                if x in negcharged:
                    negchargeds += portion
                if x in charged:
                    chargeds += portion
            
            #Saving Sequence Classifier Data
            aac_tiny.append(tinys)
            aac_small.append(smalls)
            aac_aliphatic.append(aliphatics)
            aac_aromatic.append(aromatics)
            aac_nonpolar.append(nonpolars)
            aac_pos.append(poschargeds)
            aac_neg.append(negchargeds)
            aac_chr.append(chargeds)
                               
            #Calculating hydroscores            
            hydro = 0
            sad = 0
            while sad<len(nomenclature):
                hydro += (hydro_score[sad]*counts[sad])
                sad += 1
            #print('Hydroscore:', hydro)
                               
            #Calculating Gene Frequencies
            density = np.array(counts)/int(length)
            G.append(density[0])
            A.append(density[1])
            V.append(density[2])
            L.append(density[3])
            I.append(density[4])
            M.append(density[5])
            Proline.append(density[6])
            F.append(density[7])
            W.append(density[8])
            S.append(density[9])
            T.append(density[10])
            C.append(density[11])
            Y.append(density[12])
            N.append(density[13])
            Q.append(density[14])
            D.append(density[15])
            E.append(density[16])
            K.append(density[17])
            R.append(density[18])
            H.append(density[19])
            #print(G[-1],H[-1])
                               
            #Classifying Protein Family Membership
            family = sp.find_all('keyword')
            gpcr = 0
            ionize = 0
            kin = 0
            prot = 0

            for keys in family:
                word = str(keys)[13:20]
                if word in GPCR:
                    gpcr = 1
                if word in ion:
                    ionize = 1
                if word in kinase:
                    kin = 1
                if word in protease:
                    prot = 1

            gpcr_b.append(gpcr)
            ionize_b.append(ionize)
            kin_b.append(kin)
            prot_b.append(prot)
            #print('GPCR:',gpcr)
            #print('Ionize:',ionize)
            #print('Kinase:',kin)
            #print('Protease:',prot)
                     
            #Determining Signal Peptide Membership                   
            signal_pep = 0
            signal = sp.find_all('feature', {'type' : 'signal peptide'})
            if len(signal)> 0:
                signal_pep = 1
            signalpep_b.append(signal_pep)
            #print('Signal Peptide:', signal_pep)
            
            #Counting Number of Alpha Helical Regions                   
            structure = sp.find_all('feature', {'type' : 'transmembrane region', 'description' :"Helical"})
            alpha_helical = len(structure)
            #print('Number of alphahelical transmembrane regions:', alpha_helical)
                               
            #Counting Number of Glycosylation Sites                   
            glyco_site = sp.find_all('feature', {'type' : 'glycosylation site'})
            N_glyc = 0
            O_glyc = 0
            glyco_l = 0

            for glycs in glyco_site:
                glyco_l +=1
                glyco_type = str(glycs)[22:54]
                if "N-linked" in glyco_type:
                    N_glyc += 1
                if "O-linked" in glyco_type:
                    O_glyc += 1


            if glyco_l != 0:
                #print("Normalized N-linked glycolysis:", N_glyc/glyco_l)
                N_count.append(N_glyc/glyco_l)
                #print("Normalized O-linked glycolysis:", O_glyc/glyco_l)
                O_count.append(O_glyc/glyco_l)
            else:
                N_count.append(0)
                O_count.append(0)
            
            phospo_site = sp.find_all('feature', {'type' : 'modified residue'})
            serine = 0
            threonine = 0
            tyrosine = 0
            phospho_l = 0

            #Counting Phosporilation Sites and Classifying them                   
            for phosps in phospo_site:
                phospho_l +=1
                phospho_type = str(phosps)[22:60]
                if "threonine" in phospho_type:
                    threonine += 1
                if "serine" in phospho_type:
                    serine += 1
                if "tyrosine" in phospho_type:
                    tyrosine += 1

            if phospho_l != 0:
                #print("Normalized Phosphorylation via serine:",serine/phospho_l)
                serine_count.append(serine/phospho_l)
                #print("Normalized Phosphorylation via threonine:",threonine/phospho_l)
                threonine_count.append(threonine/phospho_l)
                #print("Normalized Phosphorylation via tyrosine:",tyrosine/phospho_l)
                tyrosine_count.append(tyrosine/phospho_l)
            else:
                #print("Normalized Phosphorylation via serine: 0")
                serine_count.append(0)
                #print("Normalized Phosphorylation via threonine: 0")
                threonine_count.append(0)
                #print("Normalized Phosphorylation via tyrosine: 0")
                tyrosine_count.append(0)
                               
            total_sites = glyco_l + phospho_l
            #print("Total Sites:", total_sites)
            tsites.append(total_sites)
            
            #Beta Helix Turn Count
            beta = sp.find_all('feature', {'type' : 'strand'}) # DSSP secondary structure element codes : E and B
            beta_count = len(beta)
            helix = sp.find_all('feature', {'type' : 'helix'}) # DSSP secondary structure element codes : H, G, and I
            helix_count = len(helix)
            turn = sp.find_all('feature', {'type' : 'turn'}) # DSSP secondary structure element codes : T
            turn_count = len(turn)
            structure_count = beta_count + helix_count + turn_count
            if structure_count != 0 :
                # print("Normalized Beta count:", beta_count/structure_count)
                b_count.append(beta_count/structure_count)
                # print("Normalized Helix count:", helix_count/structure_count)
                h_count.append(helix_count/structure_count)
                # print("Normalized Turn count:", turn_count/structure_count)
                t_count.append(turn_count/structure_count)
            else:
                # print("Normalized Beta count:", beta_count/structure_count)
                b_count.append(0)
                # print("Normalized Helix count:", helix_count/structure_count)
                h_count.append(0)
                # print("Normalized Turn count:", turn_count/structure_count)
                t_count.append(0)
                               
        except requests.exceptions.Timeout:
            print('Timeout:', link)
            continue
        except requests.exceptions.TooManyRedirects:
            print('Redirected:', link)
            continue
        except requests.exceptions.ConnectionError:
            print('Retrying connection')
            continue
        except ValueError:
            print('Error found at node:', node, '... removing from biofeatures')
            error_nodes.append(node)
            del indexes[-1]
            del gene_name[-1]
            del l[-1]
            del m[-1]
            #break
        break

    if traversed % 50 == 0:
        check_time = time.time()-start_time
        print((traversed/len(P.nodes))*100,"% at ", check_time/60,"minutes.")

  0%|                                                         | 50/22835 [01:49<13:17:47,  2.10s/it]

0.21896211955331726 % at  1.8205041289329529 minutes.


  0%|▏                                                       | 100/22835 [03:28<11:30:39,  1.82s/it]

0.4379242391066345 % at  3.477032208442688 minutes.


  1%|▎                                                       | 150/22835 [04:55<11:03:06,  1.75s/it]

0.6568863586599518 % at  4.922714896996816 minutes.


  1%|▍                                                       | 200/22835 [06:16<10:15:57,  1.63s/it]

0.875848478213269 % at  6.273908845583597 minutes.


  1%|▌                                                        | 250/22835 [07:42<9:49:43,  1.57s/it]

1.0948105977665865 % at  7.704007311662038 minutes.


  1%|▋                                                        | 300/22835 [09:01<9:32:13,  1.52s/it]

1.3137727173199036 % at  9.028699743747712 minutes.


  2%|▊                                                        | 350/22835 [10:24<9:53:18,  1.58s/it]

1.532734836873221 % at  10.401311588287353 minutes.


  2%|▉                                                        | 400/22835 [11:44<9:59:10,  1.60s/it]

1.751696956426538 % at  11.748788658777872 minutes.


  2%|█                                                       | 450/22835 [13:16<15:45:21,  2.53s/it]

1.9706590759798552 % at  13.271709096431731 minutes.


  2%|█▏                                                      | 500/22835 [14:35<10:17:32,  1.66s/it]

2.189621195533173 % at  14.59601817925771 minutes.


  2%|█▎                                                       | 550/22835 [15:54<9:46:02,  1.58s/it]

2.40858331508649 % at  15.910526696840922 minutes.


  3%|█▍                                                       | 600/22835 [17:13<9:54:34,  1.60s/it]

2.6275454346398073 % at  17.218368164698283 minutes.


  3%|█▌                                                       | 650/22835 [18:31<9:15:53,  1.50s/it]

2.8465075541931246 % at  18.533226704597475 minutes.


  3%|█▋                                                       | 700/22835 [19:50<9:40:28,  1.57s/it]

3.065469673746442 % at  19.84845193227132 minutes.


  3%|█▊                                                       | 750/22835 [21:09<8:52:26,  1.45s/it]

3.2844317932997593 % at  21.16689401070277 minutes.


  4%|█▉                                                       | 800/22835 [22:29<9:11:30,  1.50s/it]

3.503393912853076 % at  22.489236311117807 minutes.


  4%|██                                                       | 850/22835 [23:49<9:35:40,  1.57s/it]

3.722356032406394 % at  23.828079557418825 minutes.


  4%|██▏                                                      | 900/22835 [25:08<9:11:53,  1.51s/it]

3.9413181519597105 % at  25.134070920944215 minutes.


  4%|██▎                                                      | 950/22835 [26:25<9:26:05,  1.55s/it]

4.160280271513028 % at  26.431695139408113 minutes.


  4%|██▍                                                    | 1000/22835 [27:39<10:17:03,  1.70s/it]

4.379242391066346 % at  27.666849116484325 minutes.


  5%|██▌                                                     | 1050/22835 [28:55<9:24:56,  1.56s/it]

4.598204510619663 % at  28.926587839921314 minutes.


  5%|██▋                                                     | 1100/22835 [30:17<8:45:18,  1.45s/it]

4.81716663017298 % at  30.289765810966493 minutes.


  5%|██▋                                                     | 1107/22835 [30:27<8:21:53,  1.39s/it]

Error found at node: Q9Y2S0 ... removing from biofeatures


  5%|██▊                                                     | 1150/22835 [31:32<9:35:17,  1.59s/it]

5.036128749726297 % at  31.533770294984183 minutes.


  5%|██▉                                                     | 1200/22835 [32:42<8:13:49,  1.37s/it]

5.2550908692796146 % at  32.71035426060359 minutes.


  5%|███                                                     | 1250/22835 [33:52<8:46:08,  1.46s/it]

5.4740529888329315 % at  33.870437276363376 minutes.


  6%|███▏                                                    | 1300/22835 [34:58<7:43:09,  1.29s/it]

5.693015108386249 % at  34.97525047063827 minutes.


  6%|███▎                                                    | 1350/22835 [36:07<8:03:49,  1.35s/it]

5.911977227939567 % at  36.12284944057465 minutes.


  6%|███▍                                                    | 1400/22835 [37:22<9:01:22,  1.52s/it]

6.130939347492884 % at  37.38208813269933 minutes.


  6%|███▌                                                    | 1450/22835 [38:40<9:44:26,  1.64s/it]

6.349901467046201 % at  38.67569545904795 minutes.


  7%|███▋                                                    | 1500/22835 [39:57<9:41:54,  1.64s/it]

6.568863586599519 % at  39.96105231046677 minutes.


  7%|███▊                                                    | 1550/22835 [41:12<9:26:26,  1.60s/it]

6.787825706152835 % at  41.21270723342896 minutes.


  7%|███▉                                                    | 1600/22835 [42:26<8:12:57,  1.39s/it]

7.006787825706152 % at  42.43807731866836 minutes.


  7%|████                                                    | 1650/22835 [43:40<8:54:54,  1.51s/it]

7.22574994525947 % at  43.67259792884191 minutes.


  7%|████▏                                                   | 1700/22835 [44:55<8:28:01,  1.44s/it]

7.444712064812788 % at  44.92083599170049 minutes.


  8%|████▎                                                   | 1750/22835 [46:09<8:45:37,  1.50s/it]

7.663674184366105 % at  46.16155695915222 minutes.


  8%|████▍                                                   | 1800/22835 [47:24<8:43:36,  1.49s/it]

7.882636303919421 % at  47.41691209475199 minutes.


  8%|████▍                                                  | 1850/22835 [48:42<10:38:31,  1.83s/it]

8.101598423472739 % at  48.704069046179455 minutes.


  8%|████▋                                                   | 1900/22835 [49:57<9:11:54,  1.58s/it]

8.320560543026057 % at  49.955690634250644 minutes.


  9%|████▊                                                   | 1950/22835 [51:10<8:46:10,  1.51s/it]

8.539522662579374 % at  51.172726913293204 minutes.


  9%|████▉                                                   | 2000/22835 [52:27<8:57:56,  1.55s/it]

8.758484782132692 % at  52.458067095279695 minutes.


  9%|█████                                                   | 2050/22835 [53:41<8:31:58,  1.48s/it]

8.977446901686008 % at  53.688070782025655 minutes.


  9%|█████▏                                                  | 2100/22835 [54:55<7:57:38,  1.38s/it]

9.196409021239326 % at  54.918274478117624 minutes.


  9%|█████▎                                                  | 2150/22835 [56:08<8:11:27,  1.43s/it]

9.415371140792644 % at  56.14607804218928 minutes.


 10%|█████▍                                                  | 2200/22835 [57:22<8:02:43,  1.40s/it]

9.63433326034596 % at  57.37438162565231 minutes.


 10%|█████▌                                                  | 2250/22835 [58:38<8:51:15,  1.55s/it]

9.853295379899278 % at  58.63683717250824 minutes.


 10%|█████▋                                                  | 2300/22835 [59:56<8:50:11,  1.55s/it]

10.072257499452594 % at  59.93909498850505 minutes.


 10%|█████▋                                                  | 2301/22835 [59:57<7:52:58,  1.38s/it]

Error found at node: P62158 ... removing from biofeatures


 10%|█████▌                                                | 2350/22835 [1:01:09<8:16:56,  1.46s/it]

10.291219619005911 % at  61.150597616036734 minutes.


 11%|█████▋                                                | 2400/22835 [1:02:24<8:38:40,  1.52s/it]

10.510181738559229 % at  62.40106913646062 minutes.


 11%|█████▊                                                | 2450/22835 [1:03:38<8:12:53,  1.45s/it]

10.729143858112547 % at  63.64539030790329 minutes.


 11%|█████▉                                                | 2500/22835 [1:05:00<8:59:09,  1.59s/it]

10.948105977665863 % at  65.00416802565256 minutes.


 11%|██████                                                | 2550/22835 [1:06:18<8:07:35,  1.44s/it]

11.16706809721918 % at  66.30654251972834 minutes.


 11%|██████▏                                               | 2600/22835 [1:07:38<8:50:01,  1.57s/it]

11.386030216772498 % at  67.6339517712593 minutes.


 12%|██████▎                                               | 2650/22835 [1:08:54<8:47:00,  1.57s/it]

11.604992336325816 % at  68.90537449916204 minutes.


 12%|██████▎                                               | 2671/22835 [1:09:26<7:15:02,  1.29s/it]

Error found at node: Q13748 ... removing from biofeatures


 12%|██████▍                                               | 2700/22835 [1:10:11<8:54:08,  1.59s/it]

11.823954455879134 % at  70.20026522477468 minutes.


 12%|██████▎                                              | 2728/22835 [1:11:00<15:22:33,  2.75s/it]

Error found at node: - ... removing from biofeatures


 12%|██████▍                                              | 2750/22835 [1:11:36<10:20:02,  1.85s/it]

12.04291657543245 % at  71.61321270465851 minutes.


 12%|██████▌                                               | 2800/22835 [1:12:56<8:12:49,  1.48s/it]

12.261878694985768 % at  72.93635505437851 minutes.


 12%|██████▋                                               | 2850/22835 [1:14:17<8:34:43,  1.55s/it]

12.480840814539086 % at  74.28579890330633 minutes.


 13%|██████▊                                               | 2900/22835 [1:15:36<8:41:55,  1.57s/it]

12.699802934092402 % at  75.61182475090027 minutes.


 13%|██████▉                                               | 2950/22835 [1:16:57<8:42:30,  1.58s/it]

12.91876505364572 % at  76.95348482131958 minutes.


 13%|███████                                               | 3000/22835 [1:18:16<9:20:11,  1.69s/it]

13.137727173199037 % at  78.28351089557012 minutes.


 13%|███████▏                                              | 3050/22835 [1:19:33<8:08:59,  1.48s/it]

13.356689292752355 % at  79.56215069293975 minutes.


 14%|███████▎                                              | 3100/22835 [1:20:56<8:24:12,  1.53s/it]

13.57565141230567 % at  80.9465465426445 minutes.


 14%|███████▍                                              | 3150/22835 [1:22:15<8:45:57,  1.60s/it]

13.794613531858987 % at  82.25282125870386 minutes.


 14%|███████▌                                              | 3200/22835 [1:23:30<9:00:05,  1.65s/it]

14.013575651412305 % at  83.5048262000084 minutes.


 14%|███████▋                                              | 3250/22835 [1:24:43<8:14:28,  1.51s/it]

14.232537770965623 % at  84.72587937513987 minutes.


 14%|███████▊                                              | 3300/22835 [1:25:58<8:49:20,  1.63s/it]

14.45149989051894 % at  85.97190064589182 minutes.


 15%|███████▉                                              | 3350/22835 [1:27:12<8:02:29,  1.49s/it]

14.670462010072258 % at  87.20730464061101 minutes.


 15%|████████                                              | 3400/22835 [1:28:28<8:00:46,  1.48s/it]

14.889424129625576 % at  88.48329428831737 minutes.


 15%|████████▏                                             | 3450/22835 [1:29:42<7:05:17,  1.32s/it]

15.108386249178892 % at  89.71664816538492 minutes.


 15%|████████▎                                             | 3500/22835 [1:31:02<7:37:09,  1.42s/it]

15.32734836873221 % at  91.04797431230546 minutes.


 16%|████████▍                                             | 3550/22835 [1:32:16<8:17:59,  1.55s/it]

15.546310488285528 % at  92.28317829370499 minutes.


 16%|████████▌                                             | 3600/22835 [1:33:32<8:25:26,  1.58s/it]

15.765272607838842 % at  93.53708334763844 minutes.


 16%|████████▋                                             | 3650/22835 [1:34:48<9:02:52,  1.70s/it]

15.98423472739216 % at  94.81080620288849 minutes.


 16%|████████▋                                             | 3700/22835 [1:36:10<8:40:05,  1.63s/it]

16.203196846945477 % at  96.16885054111481 minutes.


 16%|████████▊                                             | 3719/22835 [1:36:38<7:04:51,  1.33s/it]

Error found at node: P08107 ... removing from biofeatures


 16%|████████▊                                             | 3750/22835 [1:37:27<8:08:15,  1.54s/it]

16.422158966498795 % at  97.45525745550792 minutes.


 17%|████████▉                                             | 3773/22835 [1:38:06<8:06:16,  1.53s/it]

Error found at node: Q08619 ... removing from biofeatures


 17%|████████▉                                             | 3774/22835 [1:38:07<7:14:15,  1.37s/it]

Error found at node: P15092 ... removing from biofeatures


 17%|████████▉                                             | 3800/22835 [1:38:49<8:23:17,  1.59s/it]

16.641121086052113 % at  98.82565250396729 minutes.


 17%|█████████                                             | 3850/22835 [1:40:10<8:35:36,  1.63s/it]

16.86008320560543 % at  100.17704646587372 minutes.


 17%|█████████▏                                            | 3900/22835 [1:41:27<7:32:40,  1.43s/it]

17.07904532515875 % at  101.4664868871371 minutes.


 17%|█████████▎                                            | 3950/22835 [1:42:50<9:02:59,  1.73s/it]

17.298007444712066 % at  102.84154886802038 minutes.


 18%|█████████▍                                            | 4000/22835 [1:44:09<7:42:55,  1.47s/it]

17.516969564265384 % at  104.1575074672699 minutes.


 18%|█████████▌                                            | 4050/22835 [1:45:25<8:10:22,  1.57s/it]

17.7359316838187 % at  105.41936297814051 minutes.


 18%|█████████▋                                            | 4100/22835 [1:46:39<7:16:25,  1.40s/it]

17.954893803372016 % at  106.66290076971055 minutes.


 18%|█████████▊                                            | 4150/22835 [1:47:55<7:36:00,  1.46s/it]

18.173855922925334 % at  107.92207278807957 minutes.


 18%|█████████▉                                            | 4200/22835 [1:49:12<8:14:57,  1.59s/it]

18.392818042478652 % at  109.21626347700754 minutes.


 19%|██████████                                            | 4250/22835 [1:50:29<7:08:12,  1.38s/it]

18.61178016203197 % at  110.49358653624853 minutes.


 19%|██████████▏                                           | 4300/22835 [1:51:45<7:30:50,  1.46s/it]

18.830742281585287 % at  111.75460866689681 minutes.


 19%|██████████▎                                           | 4350/22835 [1:53:02<9:07:16,  1.78s/it]

19.049704401138605 % at  113.03706535100937 minutes.


 19%|██████████▍                                           | 4400/22835 [1:54:19<7:22:22,  1.44s/it]

19.26866652069192 % at  114.32433897654215 minutes.


 19%|██████████▌                                           | 4450/22835 [1:55:34<7:31:31,  1.47s/it]

19.487628640245237 % at  115.57606057326 minutes.


 20%|██████████▋                                           | 4500/22835 [1:56:52<7:33:46,  1.48s/it]

19.706590759798555 % at  116.87113464673361 minutes.


 20%|██████████▊                                           | 4550/22835 [1:58:11<7:36:17,  1.50s/it]

19.92555287935187 % at  118.19987730979919 minutes.


 20%|██████████▉                                           | 4600/22835 [1:59:27<7:17:33,  1.44s/it]

20.144514998905187 % at  119.46251619656881 minutes.


 20%|██████████▉                                           | 4650/22835 [2:00:43<7:36:42,  1.51s/it]

20.363477118458505 % at  120.73282219171524 minutes.


 21%|███████████                                           | 4700/22835 [2:02:01<8:02:15,  1.60s/it]

20.582439238011823 % at  122.02331266800563 minutes.


 21%|███████████▏                                          | 4750/22835 [2:03:16<8:04:53,  1.61s/it]

20.80140135756514 % at  123.27063400745392 minutes.


 21%|███████████▎                                          | 4800/22835 [2:04:33<7:00:51,  1.40s/it]

21.020363477118458 % at  124.55322403907776 minutes.


 21%|███████████▍                                          | 4850/22835 [2:05:48<7:24:18,  1.48s/it]

21.239325596671776 % at  125.80571234226227 minutes.


 21%|███████████▌                                          | 4900/22835 [2:07:02<7:04:25,  1.42s/it]

21.458287716225094 % at  127.04301644166311 minutes.


 22%|███████████▋                                          | 4950/22835 [2:08:20<7:29:54,  1.51s/it]

21.67724983577841 % at  128.33819052378337 minutes.


 22%|███████████▊                                          | 5000/22835 [2:09:36<7:26:29,  1.50s/it]

21.896211955331726 % at  129.61193004846572 minutes.


 22%|███████████▉                                          | 5050/22835 [2:10:53<7:21:04,  1.49s/it]

22.115174074885044 % at  130.88485285043717 minutes.


 22%|████████████                                          | 5100/22835 [2:12:08<7:39:09,  1.55s/it]

22.33413619443836 % at  132.13895791769028 minutes.


 23%|████████████▏                                         | 5150/22835 [2:13:27<7:18:57,  1.49s/it]

22.55309831399168 % at  133.45114963452022 minutes.


 23%|████████████▎                                         | 5200/22835 [2:14:42<7:46:19,  1.59s/it]

22.772060433544997 % at  134.7085215528806 minutes.


 23%|████████████▍                                         | 5250/22835 [2:15:55<7:27:20,  1.53s/it]

22.991022553098315 % at  135.91735736131668 minutes.


 23%|████████████▌                                         | 5300/22835 [2:17:10<6:59:44,  1.44s/it]

23.209984672651633 % at  137.17082905371984 minutes.


 23%|████████████▋                                         | 5350/22835 [2:18:24<6:46:31,  1.40s/it]

23.42894679220495 % at  138.40544967254002 minutes.


 24%|████████████▊                                         | 5400/22835 [2:19:38<7:09:05,  1.48s/it]

23.647908911758268 % at  139.6433038075765 minutes.


 24%|████████████▉                                         | 5450/22835 [2:20:50<7:10:38,  1.49s/it]

23.866871031311582 % at  140.84195569753646 minutes.


 24%|█████████████                                         | 5500/22835 [2:22:02<6:52:32,  1.43s/it]

24.0858331508649 % at  142.04617457389833 minutes.


 24%|█████████████                                         | 5550/22835 [2:23:15<8:38:23,  1.80s/it]

24.304795270418218 % at  143.25182687044145 minutes.


 25%|█████████████▏                                        | 5600/22835 [2:24:25<6:55:47,  1.45s/it]

24.523757389971536 % at  144.41889362335206 minutes.


 25%|█████████████▎                                        | 5614/22835 [2:24:45<6:29:28,  1.36s/it]

Error found at node: P0CZ20 ... removing from biofeatures


 25%|█████████████▎                                        | 5650/22835 [2:25:38<6:20:46,  1.33s/it]

24.742719509524854 % at  145.64029681285223 minutes.


 25%|█████████████▍                                        | 5700/22835 [2:26:50<6:20:02,  1.33s/it]

24.96168162907817 % at  146.84919929504395 minutes.


 25%|█████████████▌                                        | 5750/22835 [2:28:03<7:33:03,  1.59s/it]

25.180643748631486 % at  148.06095193624498 minutes.


 25%|█████████████▋                                        | 5800/22835 [2:29:19<7:48:04,  1.65s/it]

25.399605868184803 % at  149.32394084135692 minutes.


 26%|█████████████▊                                        | 5850/22835 [2:30:31<6:30:11,  1.38s/it]

25.61856798773812 % at  150.52574291626613 minutes.


 26%|█████████████▉                                        | 5900/22835 [2:31:44<6:25:17,  1.37s/it]

25.83753010729144 % at  151.7470294356346 minutes.


 26%|██████████████                                        | 5950/22835 [2:32:58<7:15:01,  1.55s/it]

26.056492226844757 % at  152.97538302342096 minutes.


 26%|██████████████▏                                       | 6000/22835 [2:34:13<6:32:25,  1.40s/it]

26.275454346398075 % at  154.2286213715871 minutes.


 26%|██████████████▎                                       | 6050/22835 [2:35:26<6:33:26,  1.41s/it]

26.494416465951392 % at  155.44560765028 minutes.


 27%|██████████████▍                                       | 6100/22835 [2:36:40<6:53:41,  1.48s/it]

26.71337858550471 % at  156.67962822914123 minutes.


 27%|██████████████▌                                       | 6150/22835 [2:37:55<6:54:34,  1.49s/it]

26.932340705058028 % at  157.9253161430359 minutes.


 27%|██████████████▋                                       | 6200/22835 [2:39:11<6:57:20,  1.51s/it]

27.15130282461134 % at  159.19012182156246 minutes.


 27%|██████████████▊                                       | 6250/22835 [2:40:22<6:36:12,  1.43s/it]

27.370264944164656 % at  160.37200608650843 minutes.


 28%|██████████████▉                                       | 6300/22835 [2:41:37<6:48:19,  1.48s/it]

27.589227063717974 % at  161.6304280638695 minutes.


 28%|███████████████                                       | 6350/22835 [2:42:55<7:26:25,  1.62s/it]

27.808189183271292 % at  162.92923568487168 minutes.


 28%|███████████████▏                                      | 6400/22835 [2:44:15<6:31:32,  1.43s/it]

28.02715130282461 % at  164.26602881352107 minutes.


 28%|███████████████▎                                      | 6450/22835 [2:45:31<6:46:21,  1.49s/it]

28.246113422377928 % at  165.51803375879925 minutes.


 28%|███████████████▎                                      | 6500/22835 [2:46:46<6:48:17,  1.50s/it]

28.465075541931245 % at  166.7708220799764 minutes.


 29%|███████████████▍                                      | 6550/22835 [2:48:01<6:22:42,  1.41s/it]

28.684037661484563 % at  168.01911014318466 minutes.


 29%|███████████████▌                                      | 6600/22835 [2:49:16<7:23:47,  1.64s/it]

28.90299978103788 % at  169.2829657673836 minutes.


 29%|███████████████▋                                      | 6650/22835 [2:50:27<6:06:21,  1.36s/it]

29.1219619005912 % at  170.46671680609384 minutes.


 29%|███████████████▊                                      | 6700/22835 [2:51:41<6:52:37,  1.53s/it]

29.340924020144517 % at  171.68388642072676 minutes.


 30%|███████████████▉                                      | 6750/22835 [2:52:57<6:14:35,  1.40s/it]

29.559886139697834 % at  172.963942972819 minutes.


 30%|████████████████                                      | 6800/22835 [2:54:11<6:43:32,  1.51s/it]

29.778848259251152 % at  174.18676291306812 minutes.


 30%|████████████████▏                                     | 6850/22835 [2:55:30<7:01:17,  1.58s/it]

29.997810378804466 % at  175.5117053627968 minutes.


 30%|████████████████▎                                     | 6900/22835 [2:56:38<6:05:23,  1.38s/it]

30.216772498357784 % at  176.6379697839419 minutes.


 30%|████████████████▍                                     | 6950/22835 [2:57:50<6:43:09,  1.52s/it]

30.435734617911102 % at  177.83802175124487 minutes.


 31%|████████████████▌                                     | 7000/22835 [2:59:02<6:19:37,  1.44s/it]

30.65469673746442 % at  179.03769037326177 minutes.


 31%|████████████████▋                                     | 7050/22835 [3:00:15<6:30:29,  1.48s/it]

30.873658857017737 % at  180.26117701530455 minutes.


 31%|████████████████▊                                     | 7100/22835 [3:01:28<6:28:52,  1.48s/it]

31.092620976571055 % at  181.47626318534216 minutes.


 31%|████████████████▉                                     | 7150/22835 [3:02:44<7:59:25,  1.83s/it]

31.31158309612437 % at  182.73478516340256 minutes.


 32%|█████████████████                                     | 7200/22835 [3:03:56<6:16:36,  1.45s/it]

31.530545215677684 % at  183.94305427471798 minutes.


 32%|█████████████████▏                                    | 7250/22835 [3:05:06<6:01:20,  1.39s/it]

31.749507335231 % at  185.10197055737177 minutes.


 32%|█████████████████▎                                    | 7300/22835 [3:06:18<6:22:33,  1.48s/it]

31.96846945478432 % at  186.31258980433145 minutes.


 32%|█████████████████▍                                    | 7350/22835 [3:07:28<5:39:37,  1.32s/it]

32.18743157433764 % at  187.4780231277148 minutes.


 32%|█████████████████▍                                    | 7400/22835 [3:08:40<6:14:24,  1.46s/it]

32.406393693890955 % at  188.6779417594274 minutes.


 33%|█████████████████▌                                    | 7450/22835 [3:09:52<6:30:05,  1.52s/it]

32.62535581344427 % at  189.8683431784312 minutes.


 33%|█████████████████▋                                    | 7500/22835 [3:11:05<6:29:16,  1.52s/it]

32.84431793299759 % at  191.09097977876664 minutes.


 33%|█████████████████▊                                    | 7550/22835 [3:12:17<5:42:35,  1.34s/it]

33.06328005255091 % at  192.28866494894027 minutes.


 33%|█████████████████▉                                    | 7600/22835 [3:13:26<5:58:36,  1.41s/it]

33.282242172104226 % at  193.44006414016087 minutes.


 34%|██████████████████                                    | 7650/22835 [3:14:38<6:16:46,  1.49s/it]

33.501204291657544 % at  194.648166569074 minutes.


 34%|██████████████████▏                                   | 7700/22835 [3:15:50<6:11:12,  1.47s/it]

33.72016641121086 % at  195.84868524074554 minutes.


 34%|██████████████████▎                                   | 7750/22835 [3:17:00<5:18:11,  1.27s/it]

33.93912853076418 % at  197.0093516230583 minutes.


 34%|██████████████████▍                                   | 7800/22835 [3:18:10<5:46:24,  1.38s/it]

34.1580906503175 % at  198.1749516248703 minutes.


 34%|██████████████████▌                                   | 7850/22835 [3:19:20<5:32:30,  1.33s/it]

34.377052769870815 % at  199.33750145037968 minutes.


 35%|██████████████████▋                                   | 7900/22835 [3:20:30<5:50:40,  1.41s/it]

34.59601488942413 % at  200.50415151119233 minutes.


 35%|██████████████████▊                                   | 7950/22835 [3:21:42<5:46:56,  1.40s/it]

34.81497700897745 % at  201.70095330079397 minutes.


 35%|██████████████████▉                                   | 8000/22835 [3:22:53<5:55:02,  1.44s/it]

35.03393912853077 % at  202.88530437548954 minutes.


 35%|███████████████████                                   | 8050/22835 [3:24:06<6:29:46,  1.58s/it]

35.252901248084086 % at  204.11127450068793 minutes.


 35%|███████████████████▏                                  | 8092/22835 [3:25:09<5:39:51,  1.38s/it]

Error found at node: P01233 ... removing from biofeatures


 35%|███████████████████▏                                  | 8097/22835 [3:25:16<5:22:19,  1.31s/it]

Error found at node: Q3TI53 ... removing from biofeatures


 35%|███████████████████▏                                  | 8098/22835 [3:25:17<4:57:10,  1.21s/it]

Error found at node: Q9P0W5 ... removing from biofeatures


 35%|███████████████████▏                                  | 8100/22835 [3:25:20<5:36:38,  1.37s/it]

35.4718633676374 % at  205.34114484389622 minutes.


 36%|███████████████████▎                                  | 8150/22835 [3:26:40<6:18:39,  1.55s/it]

35.690825487190715 % at  206.67583785057067 minutes.


 36%|███████████████████▍                                  | 8200/22835 [3:27:58<6:33:13,  1.61s/it]

35.90978760674403 % at  207.98287927309673 minutes.


 36%|███████████████████▌                                  | 8250/22835 [3:29:17<6:08:04,  1.51s/it]

36.12874972629735 % at  209.29540434678395 minutes.


 36%|███████████████████▋                                  | 8300/22835 [3:30:35<6:22:26,  1.58s/it]

36.34771184585067 % at  210.59222851594288 minutes.


 37%|███████████████████▋                                  | 8350/22835 [3:31:53<5:45:44,  1.43s/it]

36.566673965403986 % at  211.90018666585286 minutes.


 37%|███████████████████▊                                  | 8400/22835 [3:33:17<7:30:00,  1.87s/it]

36.785636084957304 % at  213.2854325612386 minutes.


 37%|███████████████████▉                                  | 8450/22835 [3:34:32<5:49:37,  1.46s/it]

37.00459820451062 % at  214.53702081839245 minutes.


 37%|████████████████████                                  | 8500/22835 [3:35:50<6:42:45,  1.69s/it]

37.22356032406394 % at  215.84284550348917 minutes.


 37%|████████████████████▏                                 | 8550/22835 [3:37:06<5:18:34,  1.34s/it]

37.44252244361726 % at  217.10121748050054 minutes.


 38%|████████████████████▎                                 | 8600/22835 [3:38:19<5:20:56,  1.35s/it]

37.661484563170575 % at  218.3282209912936 minutes.


 38%|████████████████████▍                                 | 8650/22835 [3:39:31<5:33:01,  1.41s/it]

37.88044668272389 % at  219.52153924703597 minutes.


 38%|████████████████████▌                                 | 8700/22835 [3:40:43<5:26:02,  1.38s/it]

38.09940880227721 % at  220.72730820973715 minutes.


 38%|████████████████████▋                                 | 8750/22835 [3:41:54<5:43:51,  1.46s/it]

38.31837092183052 % at  221.91582619349163 minutes.


 38%|████████████████████▊                                 | 8784/22835 [3:42:42<5:17:35,  1.36s/it]

Error found at node: Q5VU13 ... removing from biofeatures


 39%|████████████████████▊                                 | 8800/22835 [3:43:06<5:49:17,  1.49s/it]

38.53733304138384 % at  223.10232739051182 minutes.


 39%|████████████████████▉                                 | 8850/22835 [3:44:21<5:29:35,  1.41s/it]

38.75629516093716 % at  224.35336561600369 minutes.


 39%|█████████████████████                                 | 8900/22835 [3:45:34<5:30:33,  1.42s/it]

38.975257280490474 % at  225.58255258401235 minutes.


 39%|█████████████████████▏                                | 8950/22835 [3:46:48<5:22:31,  1.39s/it]

39.19421940004379 % at  226.8155397772789 minutes.


 39%|█████████████████████▎                                | 9000/22835 [3:47:57<5:07:14,  1.33s/it]

39.41318151959711 % at  227.96077194611232 minutes.


 40%|█████████████████████▍                                | 9050/22835 [3:49:06<5:12:49,  1.36s/it]

39.63214363915043 % at  229.10987100203832 minutes.


 40%|█████████████████████▌                                | 9100/22835 [3:50:14<5:32:55,  1.45s/it]

39.85110575870374 % at  230.24118570884067 minutes.


 40%|█████████████████████▋                                | 9150/22835 [3:51:26<5:05:52,  1.34s/it]

40.070067878257056 % at  231.4420710603396 minutes.


 40%|█████████████████████▊                                | 9200/22835 [3:52:38<5:28:19,  1.44s/it]

40.289029997810374 % at  232.64202303091685 minutes.


 41%|█████████████████████▊                                | 9250/22835 [3:53:53<6:08:38,  1.63s/it]

40.50799211736369 % at  233.89386129776636 minutes.


 41%|█████████████████████▉                                | 9300/22835 [3:55:07<5:34:54,  1.48s/it]

40.72695423691701 % at  235.12688182195026 minutes.


 41%|██████████████████████                                | 9350/22835 [3:56:23<4:50:25,  1.29s/it]

40.94591635647033 % at  236.38630385398864 minutes.


 41%|██████████████████████▏                               | 9372/22835 [3:56:53<4:24:13,  1.18s/it]

Error found at node: P62204 ... removing from biofeatures


 41%|██████████████████████▏                               | 9400/22835 [3:57:32<4:48:58,  1.29s/it]

41.164878476023645 % at  237.53690299987792 minutes.


 41%|██████████████████████▎                               | 9450/22835 [3:58:41<5:08:53,  1.38s/it]

41.38384059557696 % at  238.69815275271733 minutes.


 42%|██████████████████████▍                               | 9500/22835 [3:59:53<5:31:11,  1.49s/it]

41.60280271513028 % at  239.89440451065698 minutes.


 42%|██████████████████████▌                               | 9550/22835 [4:01:11<5:17:05,  1.43s/it]

41.8217648346836 % at  241.19162870645522 minutes.


 42%|██████████████████████▋                               | 9600/22835 [4:02:22<5:32:25,  1.51s/it]

42.040726954236916 % at  242.37704650561014 minutes.


 42%|██████████████████████▊                               | 9650/22835 [4:03:34<5:17:47,  1.45s/it]

42.259689073790234 % at  243.56979806423186 minutes.


 42%|██████████████████████▉                               | 9700/22835 [4:04:43<4:55:33,  1.35s/it]

42.47865119334355 % at  244.7190137942632 minutes.


 43%|███████████████████████                               | 9750/22835 [4:05:50<5:03:15,  1.39s/it]

42.69761331289687 % at  245.83891117970148 minutes.


 43%|███████████████████████▏                              | 9800/22835 [4:07:03<5:13:32,  1.44s/it]

42.91657543245019 % at  247.05573077996573 minutes.


 43%|███████████████████████▎                              | 9850/22835 [4:08:13<4:49:20,  1.34s/it]

43.135537552003505 % at  248.2285145243009 minutes.


 43%|███████████████████████▍                              | 9900/22835 [4:09:24<5:24:42,  1.51s/it]

43.35449967155682 % at  249.4104154586792 minutes.


 44%|███████████████████████▌                              | 9950/22835 [4:10:36<5:16:14,  1.47s/it]

43.573461791110134 % at  250.6143343170484 minutes.


 44%|███████████████████████▏                             | 10000/22835 [4:11:50<5:07:15,  1.44s/it]

43.79242391066345 % at  251.844538017114 minutes.


 44%|███████████████████████▎                             | 10050/22835 [4:13:05<5:23:59,  1.52s/it]

44.01138603021677 % at  253.09074262777963 minutes.


 44%|███████████████████████▍                             | 10100/22835 [4:14:16<5:11:02,  1.47s/it]

44.23034814977009 % at  254.2739603082339 minutes.


 44%|███████████████████████▌                             | 10150/22835 [4:15:28<5:06:50,  1.45s/it]

44.449310269323405 % at  255.47301222085952 minutes.


 45%|███████████████████████▋                             | 10200/22835 [4:16:40<6:06:30,  1.74s/it]

44.66827238887672 % at  256.6731641968091 minutes.


 45%|███████████████████████▊                             | 10250/22835 [4:17:51<4:31:55,  1.30s/it]

44.88723450843004 % at  257.860615448157 minutes.


 45%|███████████████████████▉                             | 10300/22835 [4:19:02<4:30:56,  1.30s/it]

45.10619662798336 % at  259.0490167538325 minutes.


 45%|████████████████████████                             | 10350/22835 [4:20:10<4:29:48,  1.30s/it]

45.325158747536676 % at  260.1764312386513 minutes.


 46%|████████████████████████▏                            | 10400/22835 [4:21:20<4:54:56,  1.42s/it]

45.544120867089994 % at  261.3498483578364 minutes.


 46%|████████████████████████▎                            | 10450/22835 [4:22:33<4:55:19,  1.43s/it]

45.76308298664331 % at  262.55461726586026 minutes.


 46%|████████████████████████▎                            | 10500/22835 [4:23:44<4:37:28,  1.35s/it]

45.98204510619663 % at  263.74568539063137 minutes.


 46%|████████████████████████▍                            | 10550/22835 [4:24:56<5:22:10,  1.57s/it]

46.20100722574995 % at  264.9413371125857 minutes.


 46%|████████████████████████▌                            | 10600/22835 [4:26:07<4:40:07,  1.37s/it]

46.419969345303265 % at  266.1309384862582 minutes.


 47%|████████████████████████▋                            | 10650/22835 [4:27:21<4:36:49,  1.36s/it]

46.63893146485658 % at  267.36265893379846 minutes.


 47%|████████████████████████▊                            | 10700/22835 [4:28:33<5:55:33,  1.76s/it]

46.8578935844099 % at  268.55667723019917 minutes.


 47%|████████████████████████▊                            | 10708/22835 [4:28:43<4:14:47,  1.26s/it]

Error found at node: P33765 ... removing from biofeatures


 47%|████████████████████████▉                            | 10750/22835 [4:29:41<4:50:19,  1.44s/it]

47.07685570396322 % at  269.69399228096006 minutes.


 47%|█████████████████████████                            | 10800/22835 [4:30:47<4:30:12,  1.35s/it]

47.295817823516536 % at  270.7952552715937 minutes.


 48%|█████████████████████████▏                           | 10850/22835 [4:31:57<4:34:26,  1.37s/it]

47.514779943069854 % at  271.95757174889246 minutes.


 48%|█████████████████████████▎                           | 10900/22835 [4:33:05<4:16:42,  1.29s/it]

47.733742062623165 % at  273.09347005287805 minutes.


 48%|█████████████████████████▍                           | 10950/22835 [4:34:14<4:12:46,  1.28s/it]

47.95270418217648 % at  274.2443858822187 minutes.


 48%|█████████████████████████▌                           | 11000/22835 [4:35:24<4:55:50,  1.50s/it]

48.1716663017298 % at  275.41508617798485 minutes.


 48%|█████████████████████████▋                           | 11050/22835 [4:36:35<4:39:13,  1.42s/it]

48.39062842128312 % at  276.5969537734985 minutes.


 49%|█████████████████████████▊                           | 11100/22835 [4:37:45<4:38:44,  1.43s/it]

48.609590540836436 % at  277.7582035263379 minutes.


 49%|█████████████████████████▉                           | 11150/22835 [4:38:56<4:26:46,  1.37s/it]

48.828552660389754 % at  278.9338541030884 minutes.


 49%|█████████████████████████▉                           | 11200/22835 [4:40:06<4:24:37,  1.36s/it]

49.04751477994307 % at  280.11038806438444 minutes.


 49%|██████████████████████████                           | 11250/22835 [4:41:21<4:50:55,  1.51s/it]

49.26647689949639 % at  281.3581927657127 minutes.


 49%|██████████████████████████▏                          | 11300/22835 [4:42:31<4:20:50,  1.36s/it]

49.48543901904971 % at  282.5210926175117 minutes.


 50%|██████████████████████████▎                          | 11350/22835 [4:43:40<4:22:25,  1.37s/it]

49.704401138603025 % at  283.67855881849925 minutes.


 50%|██████████████████████████▍                          | 11400/22835 [4:44:50<4:10:40,  1.32s/it]

49.92336325815634 % at  284.83909186522163 minutes.


 50%|██████████████████████████▌                          | 11450/22835 [4:45:58<4:25:12,  1.40s/it]

50.14232537770965 % at  285.97450680732726 minutes.


 50%|██████████████████████████▌                          | 11451/22835 [4:45:59<4:02:06,  1.28s/it]

Error found at node: P62161 ... removing from biofeatures


 50%|██████████████████████████▋                          | 11500/22835 [4:47:07<4:04:07,  1.29s/it]

50.36128749726297 % at  287.1176055192947 minutes.


 51%|██████████████████████████▊                          | 11550/22835 [4:48:14<4:06:36,  1.31s/it]

50.58024961681629 % at  288.2463534156481 minutes.


 51%|██████████████████████████▉                          | 11600/22835 [4:49:22<4:18:26,  1.38s/it]

50.79921173636961 % at  289.3777681271235 minutes.


 51%|███████████████████████████                          | 11650/22835 [4:50:33<4:19:49,  1.39s/it]

51.018173855922925 % at  290.55338536898296 minutes.


 51%|███████████████████████████▏                         | 11700/22835 [4:51:43<4:10:09,  1.35s/it]

51.23713597547624 % at  291.73331952492396 minutes.


 51%|███████████████████████████▎                         | 11750/22835 [4:52:52<4:10:02,  1.35s/it]

51.45609809502956 % at  292.8827352643013 minutes.


 52%|███████████████████████████▍                         | 11800/22835 [4:54:00<4:34:50,  1.49s/it]

51.67506021458288 % at  294.01339993874234 minutes.


 52%|███████████████████████████▌                         | 11850/22835 [4:55:09<4:25:14,  1.45s/it]

51.89402233413619 % at  295.161982301871 minutes.


 52%|███████████████████████████▌                         | 11900/22835 [4:56:17<4:05:30,  1.35s/it]

52.11298445368951 % at  296.2875633438428 minutes.


 52%|███████████████████████████▋                         | 11942/22835 [4:57:20<8:12:07,  2.71s/it]

Error found at node: O60930 ... removing from biofeatures


 52%|███████████████████████████▋                         | 11950/22835 [4:57:31<4:18:45,  1.43s/it]

52.331946573242824 % at  297.51725034713746 minutes.


 53%|███████████████████████████▊                         | 12000/22835 [4:58:42<4:19:20,  1.44s/it]

52.55090869279615 % at  298.7033681909243 minutes.


 53%|███████████████████████████▉                         | 12050/22835 [4:59:50<3:59:26,  1.33s/it]

52.76987081234946 % at  299.8469502647718 minutes.


 53%|████████████████████████████                         | 12100/22835 [5:00:59<4:02:40,  1.36s/it]

52.988832931902785 % at  300.99393253326417 minutes.


 53%|████████████████████████████▏                        | 12150/22835 [5:02:08<4:05:44,  1.38s/it]

53.207795051456095 % at  302.148881928126 minutes.


 53%|████████████████████████████▎                        | 12200/22835 [5:03:16<3:50:50,  1.30s/it]

53.42675717100942 % at  303.2787132183711 minutes.


 54%|████████████████████████████▍                        | 12250/22835 [5:04:25<3:50:23,  1.31s/it]

53.64571929056273 % at  304.428712328275 minutes.


 54%|████████████████████████████▌                        | 12300/22835 [5:05:35<4:17:33,  1.47s/it]

53.864681410116056 % at  305.5886953433355 minutes.


 54%|████████████████████████████▋                        | 12350/22835 [5:06:50<4:49:38,  1.66s/it]

54.08364352966937 % at  306.8365500489871 minutes.


 54%|████████████████████████████▊                        | 12400/22835 [5:07:56<4:04:20,  1.40s/it]

54.30260564922268 % at  307.94011316696805 minutes.


 55%|████████████████████████████▉                        | 12450/22835 [5:09:02<3:34:27,  1.24s/it]

54.521567768776 % at  309.0426262259483 minutes.


 55%|█████████████████████████████                        | 12500/22835 [5:10:11<3:55:58,  1.37s/it]

54.74052988832931 % at  310.19802564779917 minutes.


 55%|█████████████████████████████▏                       | 12550/22835 [5:11:21<3:57:32,  1.39s/it]

54.95949200788264 % at  311.3629422744115 minutes.


 55%|█████████████████████████████▏                       | 12600/22835 [5:12:30<3:59:22,  1.40s/it]

55.17845412743595 % at  312.50717438459395 minutes.


 55%|█████████████████████████████▎                       | 12650/22835 [5:13:38<3:50:23,  1.36s/it]

55.39741624698927 % at  313.6420726339022 minutes.


 56%|█████████████████████████████▍                       | 12700/22835 [5:14:49<4:15:57,  1.52s/it]

55.616378366542584 % at  314.82437359491985 minutes.


 56%|█████████████████████████████▌                       | 12750/22835 [5:16:16<4:19:35,  1.54s/it]

55.83534048609591 % at  316.2741231799126 minutes.


 56%|█████████████████████████████▋                       | 12800/22835 [5:17:44<4:40:45,  1.68s/it]

56.05430260564922 % at  317.73675683736803 minutes.


 56%|█████████████████████████████▊                       | 12850/22835 [5:19:02<4:41:47,  1.69s/it]

56.273264725202544 % at  319.03874797026316 minutes.


 56%|█████████████████████████████▉                       | 12900/22835 [5:20:25<4:25:14,  1.60s/it]

56.492226844755855 % at  320.42829411824545 minutes.


 56%|█████████████████████████████▉                       | 12901/22835 [5:20:27<4:27:03,  1.61s/it]

Error found at node: A8MYJ7 ... removing from biofeatures


 57%|██████████████████████████████                       | 12950/22835 [5:21:45<4:28:13,  1.63s/it]

Error found at node: Q6P549 ... removing from biofeatures
56.71118896430918 % at  321.76543726523715 minutes.


 57%|██████████████████████████████▏                      | 13000/22835 [5:23:09<5:02:06,  1.84s/it]

56.93015108386249 % at  323.16621738672256 minutes.


 57%|██████████████████████████████▎                      | 13050/22835 [5:24:33<4:53:23,  1.80s/it]

57.149113203415816 % at  324.565547422568 minutes.


 57%|██████████████████████████████▍                      | 13100/22835 [5:25:54<4:23:16,  1.62s/it]

57.368075322969126 % at  325.9044239997864 minutes.


 58%|██████████████████████████████▌                      | 13150/22835 [5:27:13<3:38:16,  1.35s/it]

57.58703744252245 % at  327.2273663361867 minutes.


 58%|██████████████████████████████▋                      | 13200/22835 [5:28:35<3:50:52,  1.44s/it]

57.80599956207576 % at  328.5845772981644 minutes.


 58%|██████████████████████████████▋                      | 13217/22835 [5:29:01<3:36:22,  1.35s/it]

Error found at node: Q2TAL8 ... removing from biofeatures


 58%|██████████████████████████████▊                      | 13250/22835 [5:29:55<4:32:15,  1.70s/it]

58.02496168162909 % at  329.92458727757133 minutes.


 58%|██████████████████████████████▊                      | 13300/22835 [5:31:17<4:12:38,  1.59s/it]

58.2439238011824 % at  331.29981593290967 minutes.


 58%|██████████████████████████████▉                      | 13350/22835 [5:32:42<3:59:38,  1.52s/it]

58.46288592073571 % at  332.7103966116905 minutes.


 59%|███████████████████████████████                      | 13400/22835 [5:34:09<3:58:54,  1.52s/it]

58.68184804028903 % at  334.1656465133031 minutes.


 59%|███████████████████████████████▏                     | 13450/22835 [5:35:39<4:06:36,  1.58s/it]

58.900810159842344 % at  335.6649156014125 minutes.


 59%|███████████████████████████████▎                     | 13500/22835 [5:37:01<4:38:31,  1.79s/it]

59.11977227939567 % at  337.02689350446065 minutes.


 59%|███████████████████████████████▍                     | 13550/22835 [5:38:21<4:19:42,  1.68s/it]

59.33873439894898 % at  338.36535338958106 minutes.


 60%|███████████████████████████████▌                     | 13600/22835 [5:39:47<3:36:42,  1.41s/it]

59.557696518502304 % at  339.7942517836889 minutes.


 60%|███████████████████████████████▋                     | 13650/22835 [5:41:13<4:07:31,  1.62s/it]

59.776658638055615 % at  341.22430024941764 minutes.


 60%|███████████████████████████████▊                     | 13688/22835 [5:42:13<4:05:24,  1.61s/it]

Error found at node: Q9Y2B4 ... removing from biofeatures


 60%|███████████████████████████████▊                     | 13697/22835 [5:42:28<3:56:13,  1.55s/it]

Error found at node: P30042 ... removing from biofeatures


 60%|███████████████████████████████▊                     | 13700/22835 [5:42:34<4:14:29,  1.67s/it]

59.99562075760893 % at  342.5678437590599 minutes.


 60%|███████████████████████████████▉                     | 13750/22835 [5:43:55<3:36:31,  1.43s/it]

60.21458287716225 % at  343.9325884898504 minutes.


 60%|████████████████████████████████                     | 13800/22835 [5:45:20<4:20:28,  1.73s/it]

60.43354499671557 % at  345.3375688433647 minutes.


 61%|████████████████████████████████▏                    | 13850/22835 [5:46:45<4:14:26,  1.70s/it]

60.652507116268886 % at  346.7653171737989 minutes.


 61%|████████████████████████████████▎                    | 13900/22835 [5:48:11<3:47:41,  1.53s/it]

60.871469235822204 % at  348.196915725867 minutes.


 61%|████████████████████████████████▍                    | 13950/22835 [5:49:38<4:29:40,  1.82s/it]

61.09043135537552 % at  349.64316511154175 minutes.


 61%|████████████████████████████████▍                    | 14000/22835 [5:51:13<5:01:19,  2.05s/it]

61.30939347492884 % at  351.226455672582 minutes.


 62%|████████████████████████████████▌                    | 14050/22835 [5:52:45<4:01:32,  1.65s/it]

61.52835559448215 % at  352.7609101017316 minutes.


 62%|████████████████████████████████▋                    | 14100/22835 [5:54:18<3:47:54,  1.57s/it]

61.747317714035475 % at  354.30081484715146 minutes.


 62%|████████████████████████████████▊                    | 14150/22835 [5:55:43<3:51:59,  1.60s/it]

61.966279833588786 % at  355.7189292907715 minutes.


 62%|████████████████████████████████▉                    | 14175/22835 [5:56:24<4:18:12,  1.79s/it]

Error found at node: Q61937 ... removing from biofeatures


 62%|████████████████████████████████▉                    | 14189/22835 [5:56:50<4:17:56,  1.79s/it]

Error found at node: Q6TDU7 ... removing from biofeatures


 62%|████████████████████████████████▉                    | 14200/22835 [5:57:11<4:06:06,  1.71s/it]

62.18524195314211 % at  357.18487980763115 minutes.


 62%|█████████████████████████████████                    | 14250/22835 [5:58:33<3:27:59,  1.45s/it]

62.40420407269542 % at  358.5623752593994 minutes.


 63%|█████████████████████████████████▏                   | 14287/22835 [5:59:29<3:18:51,  1.40s/it]

Error found at node: O60344 ... removing from biofeatures


 63%|█████████████████████████████████▏                   | 14300/22835 [5:59:48<3:22:41,  1.42s/it]

62.62316619224874 % at  359.81274678309757 minutes.


 63%|█████████████████████████████████▏                   | 14303/22835 [5:59:53<3:34:48,  1.51s/it]

Error found at node: Q3BBV1 ... removing from biofeatures


 63%|█████████████████████████████████▎                   | 14350/22835 [6:01:10<3:55:15,  1.66s/it]

62.84212831180206 % at  361.17185784975686 minutes.


 63%|█████████████████████████████████▍                   | 14400/22835 [6:02:33<4:31:25,  1.93s/it]

63.06109043135537 % at  362.56293741464617 minutes.


 63%|█████████████████████████████████▍                   | 14428/22835 [6:03:24<3:27:42,  1.48s/it]

Error found at node: Q580R0 ... removing from biofeatures


 63%|█████████████████████████████████▌                   | 14450/22835 [6:04:00<4:17:03,  1.84s/it]

63.28005255090869 % at  364.0125036597252 minutes.


 63%|█████████████████████████████████▋                   | 14500/22835 [6:05:20<4:00:53,  1.73s/it]

63.499014670462 % at  365.34667996962867 minutes.


 64%|█████████████████████████████████▊                   | 14550/22835 [6:06:37<3:31:37,  1.53s/it]

63.71797679001533 % at  366.62156955401105 minutes.


 64%|█████████████████████████████████▉                   | 14600/22835 [6:08:00<3:43:47,  1.63s/it]

63.93693890956864 % at  368.00844887892407 minutes.


 64%|██████████████████████████████████                   | 14650/22835 [6:09:14<3:17:22,  1.45s/it]

64.15590102912196 % at  369.24465291897457 minutes.


 64%|██████████████████████████████████                   | 14700/22835 [6:10:37<3:54:18,  1.73s/it]

64.37486314867527 % at  370.6202816009521 minutes.


 65%|██████████████████████████████████▏                  | 14750/22835 [6:11:56<3:36:03,  1.60s/it]

64.5938252682286 % at  371.94247389237086 minutes.


 65%|██████████████████████████████████▎                  | 14800/22835 [6:13:15<3:14:50,  1.45s/it]

64.81278738778191 % at  373.259332549572 minutes.


 65%|██████████████████████████████████▍                  | 14850/22835 [6:14:37<3:21:46,  1.52s/it]

65.03174950733523 % at  374.6305443048477 minutes.


 65%|██████████████████████████████████▌                  | 14900/22835 [6:16:02<3:21:13,  1.52s/it]

65.25071162688855 % at  376.03847483793896 minutes.


 65%|██████████████████████████████████▋                  | 14950/22835 [6:17:26<3:19:25,  1.52s/it]

65.46967374644187 % at  377.4449386159579 minutes.


 66%|██████████████████████████████████▊                  | 15000/22835 [6:18:52<3:45:54,  1.73s/it]

65.68863586599518 % at  378.88330422242484 minutes.


 66%|██████████████████████████████████▉                  | 15050/22835 [6:20:19<3:16:48,  1.52s/it]

65.9075979855485 % at  380.3321704228719 minutes.


 66%|███████████████████████████████████                  | 15100/22835 [6:21:46<3:51:59,  1.80s/it]

66.12656010510182 % at  381.7718861023585 minutes.


 66%|███████████████████████████████████▏                 | 15150/22835 [6:23:10<3:35:35,  1.68s/it]

66.34552222465514 % at  383.1722828666369 minutes.


 67%|███████████████████████████████████▎                 | 15200/22835 [6:24:28<3:06:32,  1.47s/it]

66.56448434420845 % at  384.4690403699875 minutes.


 67%|███████████████████████████████████▍                 | 15250/22835 [6:25:49<3:15:10,  1.54s/it]

66.78344646376176 % at  385.81860089699427 minutes.


 67%|███████████████████████████████████▌                 | 15300/22835 [6:27:11<3:42:44,  1.77s/it]

67.00240858331509 % at  387.18774587313334 minutes.


 67%|███████████████████████████████████▋                 | 15350/22835 [6:28:37<3:31:36,  1.70s/it]

67.2213707028684 % at  388.61801100969313 minutes.


 67%|███████████████████████████████████▋                 | 15400/22835 [6:29:58<3:12:51,  1.56s/it]

67.44033282242172 % at  389.97273850043615 minutes.


 68%|███████████████████████████████████▊                 | 15450/22835 [6:31:15<3:19:58,  1.62s/it]

67.65929494197503 % at  391.25162831147514 minutes.


 68%|███████████████████████████████████▉                 | 15500/22835 [6:32:35<3:31:05,  1.73s/it]

67.87825706152836 % at  392.59505515495937 minutes.


 68%|████████████████████████████████████                 | 15550/22835 [6:34:01<3:07:52,  1.55s/it]

68.09721918108167 % at  394.0203366716703 minutes.


 68%|████████████████████████████████████▏                | 15600/22835 [6:35:22<3:33:31,  1.77s/it]

68.316181300635 % at  395.3671970407168 minutes.


 69%|████████████████████████████████████▎                | 15650/22835 [6:36:45<3:19:42,  1.67s/it]

68.5351434201883 % at  396.7530429760615 minutes.


 69%|████████████████████████████████████▎                | 15652/22835 [6:36:47<2:57:07,  1.48s/it]

Error found at node: Q9UBA6 ... removing from biofeatures


 69%|████████████████████████████████████▍                | 15700/22835 [6:38:02<3:26:44,  1.74s/it]

68.75410553974163 % at  398.0468336423238 minutes.


 69%|████████████████████████████████████▌                | 15750/22835 [6:39:24<3:00:41,  1.53s/it]

68.97306765929494 % at  399.4163286407789 minutes.


 69%|████████████████████████████████████▋                | 15784/22835 [6:40:21<2:44:14,  1.40s/it]

Error found at node: Q9BZG9 ... removing from biofeatures


 69%|████████████████████████████████████▋                | 15800/22835 [6:40:48<3:36:42,  1.85s/it]

69.19202977884827 % at  400.80899162689843 minutes.


 69%|████████████████████████████████████▊                | 15850/22835 [6:42:09<3:14:50,  1.67s/it]

69.41099189840158 % at  402.1658359050751 minutes.


 70%|████████████████████████████████████▉                | 15900/22835 [6:43:31<2:52:32,  1.49s/it]

69.6299540179549 % at  403.51731320222217 minutes.


 70%|█████████████████████████████████████                | 15950/22835 [6:44:49<2:46:33,  1.45s/it]

69.84891613750821 % at  404.82973826726277 minutes.


 70%|█████████████████████████████████████▏               | 16000/22835 [6:46:08<3:01:53,  1.60s/it]

70.06787825706154 % at  406.1423800150553 minutes.


 70%|█████████████████████████████████████▎               | 16050/22835 [6:47:29<2:39:12,  1.41s/it]

70.28684037661485 % at  407.4959074338277 minutes.


 71%|█████████████████████████████████████▎               | 16100/22835 [6:48:39<2:30:39,  1.34s/it]

70.50580249616817 % at  408.6648409605026 minutes.


 71%|█████████████████████████████████████▍               | 16150/22835 [6:49:51<2:47:53,  1.51s/it]

70.72476461572148 % at  409.8526588956515 minutes.


 71%|█████████████████████████████████████▌               | 16200/22835 [6:51:14<2:55:35,  1.59s/it]

70.9437267352748 % at  411.23762144645053 minutes.


 71%|█████████████████████████████████████▋               | 16250/22835 [6:52:33<2:48:51,  1.54s/it]

71.16268885482812 % at  412.5600804209709 minutes.


 71%|█████████████████████████████████████▊               | 16300/22835 [6:53:56<2:42:32,  1.49s/it]

71.38165097438143 % at  413.93372565110525 minutes.


 72%|█████████████████████████████████████▉               | 16350/22835 [6:55:17<2:40:30,  1.49s/it]

71.60061309393475 % at  415.29312007427217 minutes.


 72%|██████████████████████████████████████               | 16400/22835 [6:56:39<2:51:19,  1.60s/it]

71.81957521348806 % at  416.6579814751943 minutes.


 72%|██████████████████████████████████████▏              | 16450/22835 [6:57:58<3:00:42,  1.70s/it]

72.03853733304139 % at  417.9679063955943 minutes.


 72%|██████████████████████████████████████▏              | 16466/22835 [6:58:22<2:26:39,  1.38s/it]

Error found at node: A1A4T8 ... removing from biofeatures


 72%|██████████████████████████████████████▎              | 16500/22835 [6:59:13<2:57:48,  1.68s/it]

72.2574994525947 % at  419.22277816931404 minutes.


 72%|██████████████████████████████████████▍              | 16550/22835 [7:00:29<2:41:21,  1.54s/it]

72.47646157214803 % at  420.49261747201285 minutes.


 73%|██████████████████████████████████████▌              | 16600/22835 [7:01:47<2:40:23,  1.54s/it]

72.69542369170134 % at  421.79502529700596 minutes.


 73%|██████████████████████████████████████▋              | 16650/22835 [7:03:07<2:38:15,  1.54s/it]

72.91438581125466 % at  423.1315017382304 minutes.


 73%|██████████████████████████████████████▊              | 16700/22835 [7:04:25<2:33:42,  1.50s/it]

73.13334793080797 % at  424.4187586983045 minutes.


 73%|██████████████████████████████████████▉              | 16750/22835 [7:05:43<2:37:42,  1.56s/it]

73.3523100503613 % at  425.72711686293286 minutes.


 74%|██████████████████████████████████████▉              | 16800/22835 [7:07:00<2:21:42,  1.41s/it]

73.57127216991461 % at  427.0153738816579 minutes.


 74%|███████████████████████████████████████              | 16850/22835 [7:08:19<2:34:38,  1.55s/it]

73.79023428946793 % at  428.31739835341773 minutes.


 74%|███████████████████████████████████████▏             | 16900/22835 [7:09:34<2:31:46,  1.53s/it]

74.00919640902124 % at  429.5802872538567 minutes.


 74%|███████████████████████████████████████▎             | 16950/22835 [7:10:55<3:02:45,  1.86s/it]

74.22815852857457 % at  430.92819768190384 minutes.


 74%|███████████████████████████████████████▍             | 17000/22835 [7:12:12<2:40:57,  1.66s/it]

74.44712064812788 % at  432.21663804451623 minutes.


 75%|███████████████████████████████████████▌             | 17050/22835 [7:13:30<2:26:01,  1.51s/it]

74.6660827676812 % at  433.51279551585515 minutes.


 75%|███████████████████████████████████████▋             | 17100/22835 [7:14:49<2:35:02,  1.62s/it]

74.88504488723451 % at  434.83218764861425 minutes.


 75%|███████████████████████████████████████▊             | 17150/22835 [7:16:06<2:22:44,  1.51s/it]

75.10400700678782 % at  436.1013269027074 minutes.


 75%|███████████████████████████████████████▉             | 17200/22835 [7:17:26<2:40:22,  1.71s/it]

75.32296912634115 % at  437.44483708143235 minutes.


 76%|████████████████████████████████████████             | 17250/22835 [7:18:42<2:28:42,  1.60s/it]

75.54193124589446 % at  438.71012611786523 minutes.


 76%|████████████████████████████████████████▏            | 17300/22835 [7:20:04<2:15:28,  1.47s/it]

75.76089336544779 % at  440.06825379927955 minutes.


 76%|████████████████████████████████████████▎            | 17350/22835 [7:21:21<2:15:03,  1.48s/it]

75.9798554850011 % at  441.3557441075643 minutes.


 76%|████████████████████████████████████████▍            | 17400/22835 [7:22:41<2:23:47,  1.59s/it]

76.19881760455442 % at  442.6947040200233 minutes.


 76%|████████████████████████████████████████▌            | 17450/22835 [7:23:56<2:14:15,  1.50s/it]

76.41777972410773 % at  443.93572500546776 minutes.


 77%|████████████████████████████████████████▌            | 17500/22835 [7:25:18<2:23:25,  1.61s/it]

76.63674184366104 % at  445.31420384645463 minutes.


 77%|████████████████████████████████████████▋            | 17550/22835 [7:26:35<2:20:07,  1.59s/it]

76.85570396321437 % at  446.5928436477979 minutes.


 77%|████████████████████████████████████████▊            | 17600/22835 [7:27:54<2:38:12,  1.81s/it]

77.07466608276768 % at  447.90761884848274 minutes.


 77%|████████████████████████████████████████▉            | 17650/22835 [7:29:11<2:14:05,  1.55s/it]

77.293628202321 % at  449.19830934206647 minutes.


 78%|█████████████████████████████████████████            | 17700/22835 [7:30:33<2:12:16,  1.55s/it]

77.51259032187431 % at  450.56073726812997 minutes.


 78%|█████████████████████████████████████████▏           | 17750/22835 [7:31:53<2:00:26,  1.42s/it]

77.73155244142764 % at  451.88587972720467 minutes.


 78%|█████████████████████████████████████████▎           | 17800/22835 [7:33:11<2:07:50,  1.52s/it]

77.95051456098095 % at  453.1917210817337 minutes.


 78%|█████████████████████████████████████████▍           | 17850/22835 [7:34:33<2:31:14,  1.82s/it]

78.16947668053427 % at  454.5590159575144 minutes.


 78%|█████████████████████████████████████████▌           | 17900/22835 [7:35:49<2:01:40,  1.48s/it]

78.38843880008758 % at  455.8200547496478 minutes.


 79%|█████████████████████████████████████████▋           | 17950/22835 [7:37:03<1:50:17,  1.35s/it]

78.6074009196409 % at  457.0648759484291 minutes.


 79%|█████████████████████████████████████████▊           | 18000/22835 [7:38:20<1:50:42,  1.37s/it]

78.82636303919422 % at  458.3484326998393 minutes.


 79%|█████████████████████████████████████████▉           | 18050/22835 [7:39:40<2:04:56,  1.57s/it]

79.04532515874753 % at  459.6816256205241 minutes.


 79%|██████████████████████████████████████████           | 18100/22835 [7:40:59<2:05:32,  1.59s/it]

79.26428727830086 % at  460.9852168440819 minutes.


 79%|██████████████████████████████████████████▏          | 18150/22835 [7:42:15<1:52:09,  1.44s/it]

79.48324939785417 % at  462.2536727309227 minutes.


 80%|██████████████████████████████████████████▏          | 18200/22835 [7:43:30<1:52:57,  1.46s/it]

79.70221151740748 % at  463.5066610654195 minutes.


 80%|██████████████████████████████████████████▎          | 18250/22835 [7:44:46<1:54:28,  1.50s/it]

79.9211736369608 % at  464.76818321943284 minutes.


 80%|██████████████████████████████████████████▍          | 18300/22835 [7:45:57<1:53:35,  1.50s/it]

80.14013575651411 % at  465.9527676423391 minutes.


 80%|██████████████████████████████████████████▌          | 18350/22835 [7:47:11<1:57:05,  1.57s/it]

80.35909787606744 % at  467.1974388321241 minutes.


 81%|██████████████████████████████████████████▋          | 18400/22835 [7:48:33<2:12:03,  1.79s/it]

80.57805999562075 % at  468.5662004550298 minutes.


 81%|██████████████████████████████████████████▊          | 18450/22835 [7:49:52<2:02:57,  1.68s/it]

80.79702211517407 % at  469.86804158290226 minutes.


 81%|██████████████████████████████████████████▉          | 18500/22835 [7:51:11<1:57:53,  1.63s/it]

81.01598423472738 % at  471.1860336343447 minutes.


 81%|███████████████████████████████████████████          | 18550/22835 [7:52:22<1:41:28,  1.42s/it]

81.23494635428071 % at  472.3679845730464 minutes.


 81%|███████████████████████████████████████████▏         | 18600/22835 [7:53:46<2:05:39,  1.78s/it]

81.45390847383402 % at  473.77033144632975 minutes.


 81%|███████████████████████████████████████████▏         | 18601/22835 [7:53:47<1:50:21,  1.56s/it]

Error found at node: Q5JXX5 ... removing from biofeatures


 82%|███████████████████████████████████████████▎         | 18650/22835 [7:55:05<1:43:00,  1.48s/it]

81.67287059338734 % at  475.0922570586205 minutes.


 82%|███████████████████████████████████████████▍         | 18700/22835 [7:56:22<1:41:18,  1.47s/it]

81.89183271294065 % at  476.37954735358556 minutes.


 82%|███████████████████████████████████████████▌         | 18750/22835 [7:57:39<1:39:57,  1.47s/it]

82.11079483249398 % at  477.66170401970544 minutes.


 82%|███████████████████████████████████████████▋         | 18800/22835 [7:59:00<2:05:37,  1.87s/it]

82.32975695204729 % at  479.0036141037941 minutes.


 83%|███████████████████████████████████████████▊         | 18850/22835 [8:00:17<1:45:20,  1.59s/it]

82.54871907160062 % at  480.29480462471645 minutes.


 83%|███████████████████████████████████████████▊         | 18900/22835 [8:01:35<1:38:43,  1.51s/it]

82.76768119115393 % at  481.5874785621961 minutes.


 83%|███████████████████████████████████████████▉         | 18950/22835 [8:02:54<1:46:57,  1.65s/it]

82.98664331070725 % at  482.91337106227877 minutes.


 83%|████████████████████████████████████████████         | 19000/22835 [8:04:08<1:30:11,  1.41s/it]

83.20560543026056 % at  484.15009179910027 minutes.


 83%|████████████████████████████████████████████▏        | 19050/22835 [8:05:22<1:37:33,  1.55s/it]

83.42456754981387 % at  485.3766286253929 minutes.


 84%|████████████████████████████████████████████▎        | 19100/22835 [8:06:38<1:30:29,  1.45s/it]

83.6435296693672 % at  486.64768465360004 minutes.


 84%|████████████████████████████████████████████▍        | 19150/22835 [8:08:03<1:45:14,  1.71s/it]

83.86249178892051 % at  488.0584820191065 minutes.


 84%|████████████████████████████████████████████▌        | 19200/22835 [8:09:21<1:30:08,  1.49s/it]

84.08145390847383 % at  489.3608898440997 minutes.


 84%|████████████████████████████████████████████▋        | 19250/22835 [8:10:40<1:27:50,  1.47s/it]

84.30041602802714 % at  490.6691480000814 minutes.


 85%|████████████████████████████████████████████▊        | 19300/22835 [8:11:52<1:22:28,  1.40s/it]

84.51937814758047 % at  491.882450735569 minutes.


 85%|████████████████████████████████████████████▉        | 19350/22835 [8:13:13<1:32:06,  1.59s/it]

84.73834026713378 % at  493.2257775704066 minutes.


 85%|█████████████████████████████████████████████        | 19400/22835 [8:14:31<1:33:55,  1.64s/it]

84.9573023866871 % at  494.52505188385646 minutes.


 85%|█████████████████████████████████████████████▏       | 19450/22835 [8:15:49<1:27:58,  1.56s/it]

85.17626450624041 % at  495.8205593109131 minutes.


 85%|█████████████████████████████████████████████▎       | 19500/22835 [8:17:06<1:22:52,  1.49s/it]

85.39522662579374 % at  497.101865931352 minutes.


 86%|█████████████████████████████████████████████▍       | 19550/22835 [8:18:21<1:21:33,  1.49s/it]

85.61418874534705 % at  498.3566877047221 minutes.


 86%|█████████████████████████████████████████████▍       | 19556/22835 [8:18:30<1:16:24,  1.40s/it]

Error found at node: A2BFH1 ... removing from biofeatures


 86%|█████████████████████████████████████████████▍       | 19600/22835 [8:19:35<1:16:05,  1.41s/it]

85.83315086490038 % at  499.5865580479304 minutes.


 86%|█████████████████████████████████████████████▌       | 19650/22835 [8:20:51<1:17:36,  1.46s/it]

86.05211298445369 % at  500.86668126980464 minutes.


 86%|█████████████████████████████████████████████▋       | 19700/22835 [8:22:07<1:12:24,  1.39s/it]

86.27107510400701 % at  502.1300035278002 minutes.


 86%|█████████████████████████████████████████████▊       | 19750/22835 [8:23:25<1:20:46,  1.57s/it]

86.49003722356032 % at  503.4266610264778 minutes.


 87%|█████████████████████████████████████████████▉       | 19800/22835 [8:24:43<1:10:20,  1.39s/it]

86.70899934311365 % at  504.7239018877347 minutes.


 87%|██████████████████████████████████████████████       | 19850/22835 [8:25:59<1:09:52,  1.40s/it]

86.92796146266696 % at  505.9953746120135 minutes.


 87%|██████████████████████████████████████████████▏      | 19900/22835 [8:27:17<1:09:29,  1.42s/it]

87.14692358222027 % at  507.29468226035436 minutes.


 87%|██████████████████████████████████████████████▎      | 19950/22835 [8:28:33<1:05:01,  1.35s/it]

87.36588570177359 % at  508.55440431435903 minutes.


 88%|██████████████████████████████████████████████▍      | 20000/22835 [8:29:47<1:11:51,  1.52s/it]

87.5848478213269 % at  509.7877915263176 minutes.


 88%|██████████████████████████████████████████████▌      | 20050/22835 [8:31:01<1:06:43,  1.44s/it]

87.80380994088023 % at  511.02609568834305 minutes.


 88%|██████████████████████████████████████████████▋      | 20100/22835 [8:32:17<1:05:51,  1.44s/it]

88.02277206043354 % at  512.2958683133126 minutes.


 88%|██████████████████████████████████████████████▊      | 20150/22835 [8:33:35<1:11:43,  1.60s/it]

88.24173417998686 % at  513.5877255360285 minutes.


 88%|██████████████████████████████████████████████▉      | 20200/22835 [8:34:50<1:04:43,  1.47s/it]

88.46069629954017 % at  514.8357635895411 minutes.


 89%|███████████████████████████████████████████████      | 20250/22835 [8:36:09<1:06:07,  1.53s/it]

88.6796584190935 % at  516.1624394694965 minutes.


 89%|████████████████████████████████████████████████▉      | 20300/22835 [8:37:26<58:00,  1.37s/it]

88.89862053864681 % at  517.4479796648026 minutes.


 89%|███████████████████████████████████████████████▏     | 20350/22835 [8:38:43<1:03:41,  1.54s/it]

89.11758265820013 % at  518.7200690905253 minutes.


 89%|█████████████████████████████████████████████████      | 20388/22835 [8:39:38<48:24,  1.19s/it]

Error found at node: Q30KQ2 ... removing from biofeatures


 89%|█████████████████████████████████████████████████▏     | 20400/22835 [8:39:53<48:36,  1.20s/it]

89.33654477775345 % at  519.8876858750979 minutes.


 90%|███████████████████████████████████████████████▍     | 20450/22835 [8:40:54<1:06:16,  1.67s/it]

89.55550689730677 % at  520.916178035736 minutes.


 90%|█████████████████████████████████████████████████▍     | 20500/22835 [8:42:08<56:36,  1.45s/it]

89.77446901686008 % at  522.1430315375328 minutes.


 90%|█████████████████████████████████████████████████▍     | 20550/22835 [8:43:19<53:16,  1.40s/it]

89.9934311364134 % at  523.3204488833745 minutes.


 90%|█████████████████████████████████████████████████▌     | 20600/22835 [8:44:29<49:17,  1.32s/it]

90.21239325596672 % at  524.4939160029094 minutes.


 90%|█████████████████████████████████████████████████▋     | 20650/22835 [8:45:39<48:50,  1.34s/it]

90.43135537552004 % at  525.6623661677042 minutes.


 91%|█████████████████████████████████████████████████▊     | 20700/22835 [8:46:49<50:29,  1.42s/it]

90.65031749507335 % at  526.8258493820827 minutes.


 91%|█████████████████████████████████████████████████▉     | 20750/22835 [8:48:01<49:33,  1.43s/it]

90.86927961462668 % at  528.0189842939377 minutes.


 91%|██████████████████████████████████████████████████     | 20800/22835 [8:49:11<44:24,  1.31s/it]

91.08824173417999 % at  529.1840009291967 minutes.


 91%|██████████████████████████████████████████████████▏    | 20850/22835 [8:50:21<44:30,  1.35s/it]

91.3072038537333 % at  530.3610182483991 minutes.


 92%|██████████████████████████████████████████████████▎    | 20900/22835 [8:51:33<46:32,  1.44s/it]

91.52616597328662 % at  531.5536364634831 minutes.


 92%|██████████████████████████████████████████████████▍    | 20950/22835 [8:52:54<49:38,  1.58s/it]

91.74512809283993 % at  532.9026802897454 minutes.


 92%|██████████████████████████████████████████████████▌    | 21000/22835 [8:54:09<46:09,  1.51s/it]

91.96409021239326 % at  534.1589854796728 minutes.


 92%|██████████████████████████████████████████████████▋    | 21050/22835 [8:55:22<47:24,  1.59s/it]

92.18305233194657 % at  535.3759217540423 minutes.


 92%|██████████████████████████████████████████████████▊    | 21100/22835 [8:56:41<45:54,  1.59s/it]

92.4020144514999 % at  536.6901635885239 minutes.


 93%|██████████████████████████████████████████████████▉    | 21150/22835 [8:57:53<40:12,  1.43s/it]

92.6209765710532 % at  537.8971992929777 minutes.


 93%|███████████████████████████████████████████████████    | 21200/22835 [8:59:10<37:09,  1.36s/it]

92.83993869060653 % at  539.1798393209775 minutes.


 93%|███████████████████████████████████████████████████▏   | 21245/22835 [9:00:17<34:58,  1.32s/it]

Error found at node: Q6NSH3 ... removing from biofeatures


 93%|███████████████████████████████████████████████████▏   | 21250/22835 [9:00:25<39:40,  1.50s/it]

93.05890081015984 % at  540.4172767678897 minutes.


 93%|███████████████████████████████████████████████████▎   | 21300/22835 [9:01:35<40:26,  1.58s/it]

93.27786292971317 % at  541.6001944263776 minutes.


 93%|███████████████████████████████████████████████████▍   | 21350/22835 [9:02:54<37:29,  1.51s/it]

93.49682504926648 % at  542.9042190114657 minutes.


 94%|███████████████████████████████████████████████████▍   | 21361/22835 [9:03:09<32:04,  1.31s/it]

Error found at node: Q9D365 ... removing from biofeatures


 94%|███████████████████████████████████████████████████▌   | 21400/22835 [9:04:09<37:39,  1.57s/it]

93.7157871688198 % at  544.1503569523494 minutes.


 94%|███████████████████████████████████████████████████▋   | 21450/22835 [9:05:22<31:57,  1.38s/it]

93.93474928837311 % at  545.3732769012452 minutes.


 94%|███████████████████████████████████████████████████▊   | 21500/22835 [9:06:27<28:10,  1.27s/it]

94.15371140792644 % at  546.4552554527919 minutes.


 94%|███████████████████████████████████████████████████▉   | 21550/22835 [9:07:35<27:55,  1.30s/it]

94.37267352747975 % at  547.5840033491453 minutes.


 95%|████████████████████████████████████████████████████   | 21600/22835 [9:08:41<28:19,  1.38s/it]

94.59163564703307 % at  548.6885831912358 minutes.


 95%|████████████████████████████████████████████████████   | 21603/22835 [9:08:44<25:00,  1.22s/it]

Error found at node: Q10157 ... removing from biofeatures


 95%|████████████████████████████████████████████████████   | 21612/22835 [9:08:54<23:26,  1.15s/it]

Error found at node: P05732 ... removing from biofeatures


 95%|████████████████████████████████████████████████████   | 21615/22835 [9:08:58<23:14,  1.14s/it]

Error found at node: O42867 ... removing from biofeatures


 95%|████████████████████████████████████████████████████   | 21634/22835 [9:09:22<24:48,  1.24s/it]

Error found at node: P08093 ... removing from biofeatures


 95%|████████████████████████████████████████████████████▏  | 21643/22835 [9:09:33<23:59,  1.21s/it]

Error found at node: P79013 ... removing from biofeatures


 95%|████████████████████████████████████████████████████▏  | 21645/22835 [9:09:36<23:42,  1.20s/it]

Error found at node: O14150 ... removing from biofeatures


 95%|████████████████████████████████████████████████████▏  | 21648/22835 [9:09:39<23:50,  1.20s/it]

Error found at node: O60144 ... removing from biofeatures


 95%|████████████████████████████████████████████████████▏  | 21650/22835 [9:09:42<22:57,  1.16s/it]

Error found at node: O94754 ... removing from biofeatures
94.81059776658638 % at  549.7035079081853 minutes.


 95%|████████████████████████████████████████████████████▏  | 21654/22835 [9:09:47<24:17,  1.23s/it]

Error found at node: O14469 ... removing from biofeatures


 95%|████████████████████████████████████████████████████▏  | 21655/22835 [9:09:48<23:25,  1.19s/it]

Error found at node: P79057 ... removing from biofeatures


 95%|████████████████████████████████████████████████████▎  | 21700/22835 [9:10:48<24:22,  1.29s/it]

95.02955988613971 % at  550.8027207811673 minutes.


 95%|████████████████████████████████████████████████████▎  | 21701/22835 [9:10:49<23:20,  1.23s/it]

Error found at node: Q10119 ... removing from biofeatures


 95%|████████████████████████████████████████████████████▍  | 21750/22835 [9:11:55<24:29,  1.35s/it]

95.24852200569302 % at  551.9317853609721 minutes.


 95%|████████████████████████████████████████████████████▌  | 21800/22835 [9:13:01<22:21,  1.30s/it]

95.46748412524633 % at  553.0176641345024 minutes.


 96%|████████████████████████████████████████████████████▋  | 21850/22835 [9:14:06<21:36,  1.32s/it]

95.68644624479965 % at  554.1145102024078 minutes.


 96%|████████████████████████████████████████████████████▋  | 21900/22835 [9:15:11<19:23,  1.24s/it]

95.90540836435297 % at  555.1995555957159 minutes.


 96%|████████████████████████████████████████████████████▊  | 21950/22835 [9:16:17<19:58,  1.35s/it]

96.12437048390629 % at  556.2925847848256 minutes.


 96%|████████████████████████████████████████████████████▉  | 21959/22835 [9:16:29<18:33,  1.27s/it]

Error found at node: O75000 ... removing from biofeatures


 96%|████████████████████████████████████████████████████▉  | 22000/22835 [9:17:24<21:37,  1.55s/it]

96.3433326034596 % at  557.4149323145549 minutes.


 97%|█████████████████████████████████████████████████████  | 22050/22835 [9:18:37<17:39,  1.35s/it]

96.56229472301293 % at  558.6174177567165 minutes.


 97%|█████████████████████████████████████████████████████▏ | 22100/22835 [9:19:51<16:42,  1.36s/it]

96.78125684256624 % at  559.8656558156014 minutes.


 97%|█████████████████████████████████████████████████████▎ | 22150/22835 [9:21:04<17:00,  1.49s/it]

97.00021896211956 % at  561.079125225544 minutes.


 97%|█████████████████████████████████████████████████████▎ | 22154/22835 [9:21:10<15:05,  1.33s/it]

Error found at node: P22752 ... removing from biofeatures


 97%|█████████████████████████████████████████████████████▍ | 22200/22835 [9:22:24<15:44,  1.49s/it]

97.21918108167287 % at  562.4007174809774 minutes.


 97%|█████████████████████████████████████████████████████▌ | 22250/22835 [9:23:36<13:34,  1.39s/it]

97.4381432012262 % at  563.6151202718416 minutes.


 98%|█████████████████████████████████████████████████████▋ | 22300/22835 [9:24:49<13:07,  1.47s/it]

97.65710532077951 % at  564.8202225367228 minutes.


 98%|█████████████████████████████████████████████████████▊ | 22350/22835 [9:26:00<12:39,  1.57s/it]

97.87606744033283 % at  566.0169743180275 minutes.


 98%|█████████████████████████████████████████████████████▉ | 22400/22835 [9:27:13<11:08,  1.54s/it]

98.09502955988614 % at  567.2275935610136 minutes.


 98%|██████████████████████████████████████████████████████ | 22450/22835 [9:28:29<09:47,  1.53s/it]

98.31399167943945 % at  568.4897990902265 minutes.


 99%|██████████████████████████████████████████████████████▏| 22500/22835 [9:29:45<07:49,  1.40s/it]

98.53295379899278 % at  569.7603050907453 minutes.


 99%|██████████████████████████████████████████████████████▎| 22550/22835 [9:31:01<06:52,  1.45s/it]

98.75191591854609 % at  571.0235773483912 minutes.


 99%|██████████████████████████████████████████████████████▍| 22600/22835 [9:32:12<05:18,  1.36s/it]

98.97087803809941 % at  572.2156788627307 minutes.


 99%|██████████████████████████████████████████████████████▌| 22650/22835 [9:33:29<04:44,  1.54s/it]

99.18984015765272 % at  573.4838680664699 minutes.


 99%|██████████████████████████████████████████████████████▋| 22700/22835 [9:34:43<03:45,  1.67s/it]

99.40880227720605 % at  574.7272891879081 minutes.


100%|██████████████████████████████████████████████████████▊| 22750/22835 [9:36:00<02:15,  1.59s/it]

99.62776439675936 % at  576.0017287453015 minutes.


100%|██████████████████████████████████████████████████████▉| 22800/22835 [9:37:17<01:02,  1.78s/it]

99.84672651631269 % at  577.2935526331265 minutes.


100%|███████████████████████████████████████████████████████| 22835/22835 [9:38:15<00:00,  1.52s/it]


In [8]:
biodata = pd.DataFrame()
biodata['UniprotID'] = indexes
biodata['Gene'] = gene_name
biodata['Length'] = l
biodata['Mass'] = m
biodata['Tiny Frequency'] = aac_tiny
biodata['Small Frequency'] = aac_small
biodata['Aliphatic Frequency'] = aac_aliphatic
biodata['Aromatic Frequency'] = aac_aromatic
biodata['Nonpolar Frequency'] = aac_nonpolar
biodata['Postive Charge Frequency'] = aac_pos
biodata['Negative Charge Frequency'] = aac_neg
biodata['Charged Frequency'] = aac_chr

biodata['G']=G
biodata['A']=A
biodata['V']=V
biodata['L']=L
biodata['I']=I
biodata['M']=M
biodata['P']=Proline
biodata['F']=F
biodata['W']=W
biodata['S']=S
biodata['T']=T
biodata['C']=C
biodata['Y']=Y
biodata['N']=N
biodata['Q']=Q
biodata['D']=D
biodata['E']=E
biodata['K']=K
biodata['R']=R
biodata['H']=H


biodata['GPCR'] = gpcr_b
biodata['Ionize'] = ionize_b
biodata['Kinease'] = kin_b
biodata['Protease'] = prot_b

biodata['SignalPeptide'] = signalpep_b
biodata['N-links'] = N_count
biodata['O-links'] = O_count
biodata['serine-links'] = serine_count
biodata['tyrosine-links'] = tyrosine_count
biodata['threonine-links'] = threonine_count
biodata['Binding Links'] = tsites
biodata['Helix'] = b_count
biodata['Beta'] = h_count
biodata['Turn'] = t_count

biodata.to_csv('parse_data/parse.biogrid.bio', sep='\t', encoding='unicode_escape')

In [9]:
with open('parse_data/parse.biogridv2.bio', 'w') as filehandle:
    filehandle.writelines('UniprotID\tGene\tLength\tMass\tTiny\tSmall\tAliphatic\tAromatic\tNonpolar\tPositive\tNegative\tCharged\tG\tA\tV\tL\tI\tM\tP\tF\tW\tS\tT\tC\tY\tN\tQ\tD\tE\tK\tR\tH\tGPCR\tIonize\tKinease\tProtease\tSignalPeptide\tNlink\tOlink\tSlink\tTylink\tThlink\tBindingLink\tHelix\tBeta\tTurn\n')
    for i in range(len(biodata['Gene'])):
        filehandle.writelines("%s\t" % indexes[i])
        filehandle.writelines("%s\t" % gene_name[i])
        filehandle.writelines("%s\t" % l[i])
        filehandle.writelines("%s\t" % m[i])
        filehandle.writelines("%s\t" % aac_tiny[i])
        filehandle.writelines("%s\t" % aac_small[i])
        filehandle.writelines("%s\t" % aac_aliphatic[i])
        filehandle.writelines("%s\t" % aac_aromatic[i])
        filehandle.writelines("%s\t" % aac_nonpolar[i])
        filehandle.writelines("%s\t" % aac_pos[i])
        filehandle.writelines("%s\t" % aac_neg[i])
        filehandle.writelines("%s\t" % aac_chr[i])

        filehandle.writelines("%s\t" % G[i])
        filehandle.writelines("%s\t" % A[i])
        filehandle.writelines("%s\t" % V[i])
        filehandle.writelines("%s\t" % L[i])
        filehandle.writelines("%s\t" % I[i])
        filehandle.writelines("%s\t" % M[i])
        filehandle.writelines("%s\t" % Proline[i])
        filehandle.writelines("%s\t" % F[i])
        filehandle.writelines("%s\t" % W[i])
        filehandle.writelines("%s\t" % S[i])
        filehandle.writelines("%s\t" % T[i])
        filehandle.writelines("%s\t" % C[i])
        filehandle.writelines("%s\t" % Y[i])
        filehandle.writelines("%s\t" % N[i])
        filehandle.writelines("%s\t" % Q[i])
        filehandle.writelines("%s\t" % D[i])
        filehandle.writelines("%s\t" % E[i])
        filehandle.writelines("%s\t" % K[i])
        filehandle.writelines("%s\t" % R[i])
        filehandle.writelines("%s\t" % H[i])


        filehandle.writelines("%s\t" % gpcr_b[i])
        filehandle.writelines("%s\t" % ionize_b[i])
        filehandle.writelines("%s\t" % kin_b[i])
        filehandle.writelines("%s\t" % prot_b[i])
        filehandle.writelines("%s\t" % signalpep_b[i])
        filehandle.writelines("%s\t" % N_count[i])
        filehandle.writelines("%s\t" % O_count[i])
        filehandle.writelines("%s\t" % serine_count[i])
        filehandle.writelines("%s\t" % tyrosine_count[i])
        filehandle.writelines("%s\t" % threonine_count[i])
        filehandle.writelines("%s\t" % tsites[i])
        filehandle.writelines("%s\t" % b_count[i])
        filehandle.writelines("%s\t" % h_count[i])
        filehandle.writelines("%s\n" % t_count[i])