In [2]:
import pandas as pd
from functools import partial
from pathlib import Path
from sklearn.preprocessing import MultiLabelBinarizer
from collections import defaultdict, Counter
import os
import zipfile
import shutil
import seaborn as sns
import matplotlib.pyplot as plt

**Parse NARDINI output zip files and rename**

In [3]:
rename_dict = { "µ": "POLAR", "h": "HYDROPHOBIC", "+": "POSITIVE", "-": "NEGATIVE", "π": "AROMATIC", "A": "ALANINE", "P": "PROLINE", "G": "GLYCINE" }

def parse_zipped_nardini_files(DATA_DIR):
    all_rows = []

    for fname in os.listdir(DATA_DIR):
        print(f"Processing {fname}")
        if fname.endswith(".zip") and fname.startswith("nardini-data-"):
            zip_path = os.path.join(DATA_DIR, fname)

            tmp_extract_dir = os.path.join(DATA_DIR, "temp_extract")
            if not os.path.exists(tmp_extract_dir):
                os.makedirs(tmp_extract_dir)
                print(f"Created temp dir: {tmp_extract_dir}")

            with zipfile.ZipFile(zip_path, "r") as zip_ref:
                zip_ref.extractall(tmp_extract_dir)
                print(f"Extracted {zip_path} to {tmp_extract_dir}")
            
            seq_file = os.path.join(tmp_extract_dir, "sequences.tsv")
            zscore_original_file = os.path.join(tmp_extract_dir, "zscore-original-sequence-seq1.tsv")

            sequences_df = pd.read_csv(seq_file, sep=r"\s+")

            zscore_original_df = pd.read_csv(zscore_original_file, sep=r"\s+")
            columns_list = zscore_original_df.columns.tolist()

            matrix_values = {}
            for row_label in range(len(zscore_original_df)):
                for col_label in columns_list:

                    colname = f"zsc_{rename_dict[columns_list[row_label]]}_{rename_dict[col_label]}"
                    matrix_values[colname] = zscore_original_df.loc[row_label, col_label]
            
            for _, seq_row in sequences_df.iterrows():
                row_dict = {}

                row_dict["original_seq"] = seq_row["original_seq"]
                
                row_dict.update(matrix_values)
                
                all_rows.append(row_dict)
            
            shutil.rmtree(tmp_extract_dir)

    final_df = pd.DataFrame(all_rows)

    return final_df

DATA_DIR = "analysis/data_curation_for_modeling/nardini/zip_files"
compartments = parse_zipped_nardini_files(DATA_DIR)
compartments.drop_duplicates(subset=["original_seq"], inplace=True)
compartments

Processing nardini-data-J3PYUYSDSX.zip
Created temp dir: /ceph/hpc/home/novljanj/data_storage/projects/nucleolus_enriched_proteins/analysis/curated/data_curation_for_modeling/nardini/zip_files/temp_extract
Extracted /ceph/hpc/home/novljanj/data_storage/projects/nucleolus_enriched_proteins/analysis/curated/data_curation_for_modeling/nardini/zip_files/nardini-data-J3PYUYSDSX.zip to /ceph/hpc/home/novljanj/data_storage/projects/nucleolus_enriched_proteins/analysis/curated/data_curation_for_modeling/nardini/zip_files/temp_extract
Processing nardini-data-2008PFIVGO.zip
Created temp dir: /ceph/hpc/home/novljanj/data_storage/projects/nucleolus_enriched_proteins/analysis/curated/data_curation_for_modeling/nardini/zip_files/temp_extract
Extracted /ceph/hpc/home/novljanj/data_storage/projects/nucleolus_enriched_proteins/analysis/curated/data_curation_for_modeling/nardini/zip_files/nardini-data-2008PFIVGO.zip to /ceph/hpc/home/novljanj/data_storage/projects/nucleolus_enriched_proteins/analysis/cu

Unnamed: 0,original_seq,zsc_POLAR_POLAR,zsc_POLAR_HYDROPHOBIC,zsc_POLAR_POSITIVE,zsc_POLAR_NEGATIVE,zsc_POLAR_AROMATIC,zsc_POLAR_ALANINE,zsc_POLAR_PROLINE,zsc_POLAR_GLYCINE,zsc_HYDROPHOBIC_POLAR,...,zsc_PROLINE_PROLINE,zsc_PROLINE_GLYCINE,zsc_GLYCINE_POLAR,zsc_GLYCINE_HYDROPHOBIC,zsc_GLYCINE_POSITIVE,zsc_GLYCINE_NEGATIVE,zsc_GLYCINE_AROMATIC,zsc_GLYCINE_ALANINE,zsc_GLYCINE_PROLINE,zsc_GLYCINE_GLYCINE
0,MAPKVTSELLRQLRQAMRNSEYVAEPIQAYIIPSGDAHQSEYIAPC...,-2.300450,-0.747840,-2.011040,-1.284370,0.000000,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000
1,MIASHMIACLFTELNQNQVQKVDQYLYHMRLSDETLLEISRRFRKE...,-0.221958,-0.350609,1.587110,0.401881,0.000000,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000
2,MAAQGEPQVQFKLVLVGDGGTGKTTFVKRHLTGEFEKKYVATLGVE...,0.664441,0.189203,-0.004798,1.803270,-1.018720,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000
3,MAAGAAAALAFLNQESRARAGGVGGLRVPAPVTMDSFFFGCELSGH...,-0.318727,-0.688580,0.000000,0.531681,0.000000,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000
4,MAGEKAPDTKEKKPAAKKAGSDAAASRPRAAKVAKKVHPKGKKPKK...,-0.776902,-0.410184,0.989622,0.000000,0.000000,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
243,MANSERTFIAIKPDGVQRGLVGEIIKRFEQKGFRLVGLKFLQASED...,-0.226995,0.717701,-0.592705,-0.881279,0.000000,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000
244,MPMILGYWNVRGLTHPIRMLLEYTDSSYDEKRYTMGDAPDFDRSQW...,-0.296399,-0.625747,-0.380932,-0.083412,-0.788685,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000
245,MPTSVPRGAPFLLLPPLLMLSAVLAVPVDRAAPPQEDSQATETPDT...,-0.349458,-0.230156,0.238397,-0.481198,0.000000,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000
246,MQPASAKWYDRRDYVFIEFCVEDSKDVNVNFEKSKLTFSCLGGSDN...,-0.070728,-1.064000,-0.479305,0.460238,0.000000,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000


**Load nucleolar protein data with localCIDER scores**

In [5]:
representations_45S_47S_df_with_cider = pd.read_csv("data/curated_data_for_modeling/45S_47S_localcider_features.tsv", sep="\t")
representations_45S_47S_df_with_cider["Sequence"] = representations_45S_47S_df_with_cider["Sequence"].apply(lambda x: x.replace("U", "C"))
representations_45S_47S_df_with_cider

Unnamed: 0,Majority.protein.IDs,Reviewed,Protein ID,Sequence,Label,Length,length,FCR,NCPR,isoelectric_point,...,frac_M,frac_N,frac_P,frac_Q,frac_R,frac_S,frac_T,frac_V,frac_W,frac_Y
0,A0A2I3BPG9;P83882,reviewed,P83882,MVNVPKTRRTFCKKCGKHQPHKVTQYKKGKDSLYAQGKRRYDRKQS...,S47,106,106,0.367925,0.254717,10.71875,...,0.018868,0.018868,0.037736,0.066038,0.103774,0.028302,0.056604,0.056604,0.000000,0.037736
1,B2RY56,reviewed,B2RY56,MSFPPHLNRPPMGIPALPPGIPPPQFPGFPPPVPPGTPMIPVPMSI...,S45,838,838,0.478520,-0.010740,7.00000,...,0.019093,0.023866,0.071599,0.028640,0.130072,0.051313,0.028640,0.038186,0.007160,0.015513
2,E9Q8D0,reviewed,E9Q8D0,MKCHYEALGVRRDASEEELKKAYRKLALRWHPDKNLDNAAEAAEQF...,S47,531,531,0.386064,-0.024482,5.25000,...,0.020716,0.048964,0.028249,0.054614,0.062147,0.060264,0.037665,0.043315,0.011299,0.032015
3,O08528,reviewed,O08528,MIASHMIACLFTELNQNQVQKVDQYLYHMRLSDETLLEISRRFRKE...,S47,917,917,0.272628,-0.019629,6.12500,...,0.041439,0.028353,0.022901,0.033806,0.068702,0.047983,0.056707,0.079607,0.005453,0.017448
4,O08810,reviewed,O08810,MDTDLYDEFGNYIGPELDSDEDDDELGRETKDLDEDEDEDEDDVGE...,S45,971,971,0.263646,-0.057673,4.59375,...,0.021627,0.027806,0.059732,0.030896,0.042225,0.050463,0.069001,0.083419,0.008239,0.031926
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
197,Q9WVA4,reviewed,Q9WVA4,MANRGPSYGLSREVQQKIEKQYDADLEQILIQWITTQCREDVGQPQ...,S45,199,199,0.221106,0.010050,8.75000,...,0.040201,0.060302,0.045226,0.110553,0.050251,0.050251,0.045226,0.040201,0.020101,0.025126
198,Q9Z0U1,reviewed,Q9Z0U1,MEEVIWEQYTVTLQKDSKRGFGIAVSGGRDNPHFENGETSIVISDV...,S47,1167,1167,0.309340,-0.007712,7.00000,...,0.017138,0.029135,0.065124,0.047986,0.096829,0.095973,0.044559,0.052271,0.005998,0.028278
199,Q9Z199;P63271,reviewed,Q9Z199,MALETVPKDLRHLRACLLCSLVKTIDQFEYDGCDNCDAYLQMKGNR...,S47,117,117,0.256410,0.017094,8.31250,...,0.042735,0.034188,0.034188,0.034188,0.068376,0.094017,0.051282,0.085470,0.017094,0.042735
200,Q9Z1N5,reviewed,Q9Z1N5,MAENDVDNELLDYEDDEVETAAGADGTEAPAKKDVKGSYVSIHSSG...,S45,428,428,0.282710,-0.030374,5.25000,...,0.035047,0.037383,0.035047,0.053738,0.058411,0.046729,0.037383,0.084112,0.000000,0.023364


**Merge and save**

In [6]:
representations_45S_47S_df_w_nardini = representations_45S_47S_df_with_cider.merge(compartments, left_on="Sequence", right_on="original_seq", how="inner")
representations_45S_47S_df_w_nardini.drop(columns=["original_seq"], inplace=True)
pd.set_option('display.max_columns', None)
print(f"Added {len(representations_45S_47S_df_w_nardini.columns.difference(representations_45S_47S_df_with_cider.columns))} columns to the dataframe")
representations_45S_47S_df_w_nardini.to_csv("data/curated_data_for_modeling/45S_47S_localcider_features_w_nardini.tsv", sep="\t", index=False)
representations_45S_47S_df_w_nardini

Added 64 columns to the dataframe


Unnamed: 0,Majority.protein.IDs,Reviewed,Protein ID,Sequence,Label,Length,length,FCR,NCPR,isoelectric_point,molecular_weight,count_negative,count_positive,count_neutral,fraction_negative,fraction_positive,fraction_expanding,fraction_disorder_promoting,kappa,Omega,mean_net_charge,mean_hydropathy,uversky_hydropathy,PPII_propensity,delta,delta_max,frac_A,frac_C,frac_D,frac_E,frac_F,frac_G,frac_H,frac_I,frac_K,frac_L,frac_M,frac_N,frac_P,frac_Q,frac_R,frac_S,frac_T,frac_V,frac_W,frac_Y,zsc_POLAR_POLAR,zsc_POLAR_HYDROPHOBIC,zsc_POLAR_POSITIVE,zsc_POLAR_NEGATIVE,zsc_POLAR_AROMATIC,zsc_POLAR_ALANINE,zsc_POLAR_PROLINE,zsc_POLAR_GLYCINE,zsc_HYDROPHOBIC_POLAR,zsc_HYDROPHOBIC_HYDROPHOBIC,zsc_HYDROPHOBIC_POSITIVE,zsc_HYDROPHOBIC_NEGATIVE,zsc_HYDROPHOBIC_AROMATIC,zsc_HYDROPHOBIC_ALANINE,zsc_HYDROPHOBIC_PROLINE,zsc_HYDROPHOBIC_GLYCINE,zsc_POSITIVE_POLAR,zsc_POSITIVE_HYDROPHOBIC,zsc_POSITIVE_POSITIVE,zsc_POSITIVE_NEGATIVE,zsc_POSITIVE_AROMATIC,zsc_POSITIVE_ALANINE,zsc_POSITIVE_PROLINE,zsc_POSITIVE_GLYCINE,zsc_NEGATIVE_POLAR,zsc_NEGATIVE_HYDROPHOBIC,zsc_NEGATIVE_POSITIVE,zsc_NEGATIVE_NEGATIVE,zsc_NEGATIVE_AROMATIC,zsc_NEGATIVE_ALANINE,zsc_NEGATIVE_PROLINE,zsc_NEGATIVE_GLYCINE,zsc_AROMATIC_POLAR,zsc_AROMATIC_HYDROPHOBIC,zsc_AROMATIC_POSITIVE,zsc_AROMATIC_NEGATIVE,zsc_AROMATIC_AROMATIC,zsc_AROMATIC_ALANINE,zsc_AROMATIC_PROLINE,zsc_AROMATIC_GLYCINE,zsc_ALANINE_POLAR,zsc_ALANINE_HYDROPHOBIC,zsc_ALANINE_POSITIVE,zsc_ALANINE_NEGATIVE,zsc_ALANINE_AROMATIC,zsc_ALANINE_ALANINE,zsc_ALANINE_PROLINE,zsc_ALANINE_GLYCINE,zsc_PROLINE_POLAR,zsc_PROLINE_HYDROPHOBIC,zsc_PROLINE_POSITIVE,zsc_PROLINE_NEGATIVE,zsc_PROLINE_AROMATIC,zsc_PROLINE_ALANINE,zsc_PROLINE_PROLINE,zsc_PROLINE_GLYCINE,zsc_GLYCINE_POLAR,zsc_GLYCINE_HYDROPHOBIC,zsc_GLYCINE_POSITIVE,zsc_GLYCINE_NEGATIVE,zsc_GLYCINE_AROMATIC,zsc_GLYCINE_ALANINE,zsc_GLYCINE_PROLINE,zsc_GLYCINE_GLYCINE
0,A0A2I3BPG9;P83882,reviewed,P83882,MVNVPKTRRTFCKKCGKHQPHKVTQYKKGKDSLYAQGKRRYDRKQS...,S47,106,106,0.367925,0.254717,10.71875,12443.3,6,33,67,0.056604,0.311321,0.405660,0.698113,0.204268,0.097174,0.254717,3.329245,0.369916,0.388491,0.050159,0.245557,0.028302,0.047170,0.028302,0.028302,0.037736,0.084906,0.028302,0.037736,0.207547,0.047170,0.018868,0.018868,0.037736,0.066038,0.103774,0.028302,0.056604,0.056604,0.000000,0.037736,0.384755,1.106210,-0.039945,0.000000,0.0,0.0,0.0,0.0,0.0,1.793180,1.767160,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.474723,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,B2RY56,reviewed,B2RY56,MSFPPHLNRPPMGIPALPPGIPPPQFPGFPPPVPPGTPMIPVPMSI...,S45,838,838,0.478520,-0.010740,7.00000,99562.1,205,196,437,0.244630,0.233890,0.550119,0.756563,0.144026,0.245905,0.010740,3.035322,0.337258,0.409296,0.068646,0.476619,0.050119,0.007160,0.069212,0.175418,0.017900,0.034606,0.013126,0.047733,0.103819,0.066826,0.019093,0.023866,0.071599,0.028640,0.130072,0.051313,0.028640,0.038186,0.007160,0.015513,2.276070,1.071790,6.755620,6.685310,0.0,0.0,0.0,0.0,0.0,4.185990,5.902830,7.751220,0.0,0.0,0.0,0.0,0.0,0.0,5.861610,-0.071817,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.203210,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,E9Q8D0,reviewed,E9Q8D0,MKCHYEALGVRRDASEEELKKAYRKLALRWHPDKNLDNAAEAAEQF...,S47,531,531,0.386064,-0.024482,5.25000,61743.1,109,96,326,0.205273,0.180791,0.414313,0.713748,0.214982,0.127963,0.024482,3.245574,0.360619,0.373277,0.082000,0.381425,0.088512,0.018832,0.067797,0.137476,0.032015,0.035782,0.022599,0.013183,0.118644,0.065913,0.020716,0.048964,0.028249,0.054614,0.062147,0.060264,0.037665,0.043315,0.011299,0.032015,-0.044095,0.579716,1.771940,0.655098,0.0,0.0,0.0,0.0,0.0,1.109910,2.121380,0.497183,0.0,0.0,0.0,0.0,0.0,0.0,2.600840,3.062290,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.083950,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,O08528,reviewed,O08528,MIASHMIACLFTELNQNQVQKVDQYLYHMRLSDETLLEISRRFRKE...,S47,917,917,0.272628,-0.019629,6.12500,102551.9,134,116,667,0.146129,0.126499,0.295529,0.605234,0.179976,0.175217,0.019629,4.308833,0.478759,0.338615,0.048470,0.269316,0.061069,0.028353,0.067612,0.078517,0.041439,0.086150,0.023991,0.051254,0.057797,0.101418,0.041439,0.028353,0.022901,0.033806,0.068702,0.047983,0.056707,0.079607,0.005453,0.017448,-0.221958,-0.350609,1.587110,0.401881,0.0,0.0,0.0,0.0,0.0,-0.425071,-1.872150,-1.188650,0.0,0.0,0.0,0.0,0.0,0.0,0.764731,0.999563,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.942663,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,O08810,reviewed,O08810,MDTDLYDEFGNYIGPELDSDEDDDELGRETKDLDEDEDEDEDDVGE...,S45,971,971,0.263646,-0.057673,4.59375,109375.8,156,100,715,0.160659,0.102987,0.323378,0.608651,0.253074,0.163445,0.057673,4.252317,0.472480,0.366282,0.064453,0.254682,0.048404,0.017508,0.079300,0.081359,0.043254,0.060762,0.025747,0.063852,0.060762,0.093718,0.021627,0.027806,0.059732,0.030896,0.042225,0.050463,0.069001,0.083419,0.008239,0.031926,-1.616590,-1.760150,0.737694,2.313390,0.0,0.0,0.0,0.0,0.0,-1.579780,-1.076140,-0.631602,0.0,0.0,0.0,0.0,0.0,0.0,1.351090,6.031710,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.229370,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
197,Q9WVA4,reviewed,Q9WVA4,MANRGPSYGLSREVQQKIEKQYDADLEQILIQWITTQCREDVGQPQ...,S45,199,199,0.221106,0.010050,8.75000,22399.8,21,23,155,0.105528,0.115578,0.266332,0.628141,0.136078,0.253163,0.010050,3.862814,0.429202,0.367387,0.028275,0.207788,0.060302,0.015075,0.050251,0.055276,0.035176,0.095477,0.000000,0.055276,0.065327,0.080402,0.040201,0.060302,0.045226,0.110553,0.050251,0.050251,0.045226,0.040201,0.020101,0.025126,-1.677620,-2.216770,-1.115520,-1.076530,0.0,0.0,0.0,0.0,0.0,-0.742727,-0.463737,-0.388699,0.0,0.0,0.0,0.0,0.0,0.0,-1.384870,-1.254200,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.348401,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
198,Q9Z0U1,reviewed,Q9Z0U1,MEEVIWEQYTVTLQKDSKRGFGIAVSGGRDNPHFENGETSIVISDV...,S47,1167,1167,0.309340,-0.007712,7.00000,131299.3,185,176,806,0.158526,0.150814,0.374464,0.722365,0.171822,0.120323,0.007712,3.590231,0.398915,0.373188,0.052813,0.307369,0.064267,0.003428,0.066838,0.091688,0.029991,0.073693,0.021422,0.042845,0.053985,0.068552,0.017138,0.029135,0.065124,0.047986,0.096829,0.095973,0.044559,0.052271,0.005998,0.028278,0.892245,1.402760,-2.064670,0.143907,0.0,0.0,0.0,0.0,0.0,2.258340,0.470184,1.759030,0.0,0.0,0.0,0.0,0.0,0.0,-0.606575,0.767168,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.049540,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
199,Q9Z199;P63271,reviewed,Q9Z199,MALETVPKDLRHLRACLLCSLVKTIDQFEYDGCDNCDAYLQMKGNR...,S47,117,117,0.256410,0.017094,8.31250,13196.1,14,16,87,0.119658,0.136752,0.290598,0.598291,0.191856,0.154823,0.017094,4.152137,0.461349,0.347949,0.044999,0.234545,0.059829,0.042735,0.076923,0.042735,0.025641,0.059829,0.008547,0.034188,0.068376,0.076923,0.042735,0.034188,0.034188,0.034188,0.068376,0.094017,0.051282,0.085470,0.017094,0.042735,-1.744770,-0.588727,-0.434231,-0.978409,0.0,0.0,0.0,0.0,0.0,-0.409237,-1.325100,-0.168758,0.0,0.0,0.0,0.0,0.0,0.0,-0.177725,0.261571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.256864,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
200,Q9Z1N5,reviewed,Q9Z1N5,MAENDVDNELLDYEDDEVETAAGADGTEAPAKKDVKGSYVSIHSSG...,S45,428,428,0.282710,-0.030374,5.25000,49043.0,67,54,307,0.156542,0.126168,0.317757,0.588785,0.197201,0.192259,0.030374,4.225467,0.469496,0.359556,0.054327,0.275490,0.060748,0.018692,0.074766,0.081776,0.056075,0.046729,0.025701,0.060748,0.067757,0.095794,0.035047,0.037383,0.035047,0.053738,0.058411,0.046729,0.037383,0.084112,0.000000,0.023364,-1.546780,-2.127610,-1.146430,0.589316,0.0,0.0,0.0,0.0,0.0,-1.368530,-0.843831,-0.077345,0.0,0.0,0.0,0.0,0.0,0.0,0.180992,1.348030,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.057870,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
