In [6]:
from Bio.Blast import NCBIWWW
from Bio.Blast import NCBIXML
import requests
import numpy as np
import time
import os
import pandas as pd
import time
from requests import get
root = "/Users/bencekover/Library/CloudStorage/OneDrive-Personal/MSci Bahler lab/S.-Pombe-MLPs - Github/"


def get_amino_acid_sequence(uniprot_id):
    base_url = "https://www.uniprot.org/uniprot/"
    response_format = ".fasta"

    # Combine the URL to get the FASTA format data for the given UniProt ID
    url = f"{base_url}{uniprot_id}{response_format}"

    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for bad status codes (e.g., 404, 500)

        # Parse the response to extract the amino acid sequence
        lines = response.text.strip().split("\n")
        sequence = "".join(lines[1:])  # Skipping the first line (header)

        return sequence
    except requests.exceptions.RequestException as e:
        print(f"Error occurred while fetching the data: {e}")
        return None
    


In [202]:


def get_top_e_val(uniprot_id, max_retries=10):
    """
    Performs a BLAST search against the NCBI nr database for the given UniProt ID.
    Returns the top hit's e-value and score.

    :param uniprot_id: The UniProt ID to search for.
    :param max_retries: The maximum number of times to retry the BLAST search if it fails.
    """



    query_sequence = get_amino_acid_sequence(uniprot_id)
    hits = []

    #values to return
    cerevisiae_e_val = np.nan
    cerevisiae_score = np.nan
    cerevisiae_symbol = "N/A"
    albicans_e_val = np.nan
    albicans_score = np.nan
    albicans_symbol = "N/A"

    for retry in range(max_retries + 1):
        try:
            # Perform the BLAST search
            print("BLAST starts")



            result_handle = NCBIWWW.qblast("blastp", "nr", query_sequence, expect=1000, entrez_query="txid237561[ORGN] OR txid5476[ORGN] OR txid559292[ORGN] OR txid4932[ORGN]",hitlist_size=1000)

            blast_records = NCBIXML.parse(result_handle)

            hits = []
            for blast_record in blast_records:
                for alignment in blast_record.alignments:
                    for hsp in alignment.hsps:
                        hit_info = {
                                        "accession": alignment.accession,
                                        "description": alignment.title,
                                        "evalue": hsp.expect,
                                        "score": hsp.score
                                    }
                        hits.append(hit_info)
            #in hits find entries, where description contains the string "Saccharomyces cerevisiae"
            cerevisiae_entries = [hit for hit in hits if "Saccharomyces cerevisiae" in hit["description"]]
            albicans_entries = [hit for hit in hits if "Candida albicans" in hit["description"]]
            #order these by hgihest score first lowest score last
            cerevisiae_entries = sorted(cerevisiae_entries, key=lambda x: x["score"], reverse=True)
            albicans_entries = sorted(albicans_entries, key=lambda x: x["score"], reverse=True)

            if len(cerevisiae_entries) > 0:
                cerevisiae_e_val = cerevisiae_entries[0]["evalue"]
                cerevisiae_score = cerevisiae_entries[0]["score"]
                cerevisiae_symbol = cerevisiae_entries[0]["accession"]
            
            
            if len(albicans_entries) > 0:
                albicans_e_val = albicans_entries[0]["evalue"]
                albicans_score = albicans_entries[0]["score"]
                albicans_symbol = albicans_entries[0]["accession"]

            return cerevisiae_e_val, cerevisiae_score, cerevisiae_symbol, albicans_e_val, albicans_score, albicans_symbol

        except Exception as e:
            print(f"Error during attempt {retry + 1}: {e}")
            if retry < max_retries:
                print("Retrying...")
                time.sleep(60)  # Add a delay before retrying
            else:
                print(f"Max retries reached, returning default values.")
                return cerevisiae_e_val, cerevisiae_score, cerevisiae_symbol, albicans_e_val, albicans_score, albicans_symbol


In [206]:
#import pombase gene products file
import pandas as pd
import numpy as np
root = "/Users/bencekover/Library/CloudStorage/OneDrive-Personal/MSci Bahler lab/S.-Pombe-MLPs - Github/"
gene_info = pd.read_csv(root + "external data/Pombase files/gene_IDs_names_products.tsv", sep='\t', header=0)
gene_info = gene_info[["SPAC1002.01", "Q9US57", "mrx11"]]
gene_info = gene_info.rename(columns={"SPAC1002.01": "ID", "Q9US57": "uniprot", "mrx11":"gene_name"})

orthology_db = pd.read_csv(root + "Bence folder/Orthologs/final_orthology_database.csv")
#fetch the list of pombe_gene_IDs with orthopattern 7 and 1 into lists called conserved, unique
unique_genes= orthology_db[orthology_db['orthopattern'] == 1]["pombe_name(s)"].values
unique_genes = np.array([element[2:-2] for element in unique_genes])
unique_genes_uniprot = np.array([])
to_be_deleted = np.array([],dtype=int)
for i in range(len(unique_genes)):
    #find in gene_info the uniprot ID of the gene
    try:
        unique_genes_uniprot = np.append(unique_genes_uniprot, gene_info[gene_info["ID"] == unique_genes[i]]["uniprot"].values[0])
    except:
        unique_genes_uniprot = unique_genes_uniprot
        #remove that entry from unique_genes
        to_be_deleted = np.append(to_be_deleted, i)
unique_genes = np.delete(unique_genes, to_be_deleted)



conserved_genes= orthology_db[orthology_db['orthopattern'] == 7]["pombe_name(s)"].values
conserved_genes = np.array([element[2:-2] for element in conserved_genes])
#take uniprot IDs but in the same order!
conserved_genes_uniprot = np.array([])
to_be_deleted = np.array([],dtype=int)
for i in range(len(conserved_genes)):
    try:
        conserved_genes_uniprot = np.append(conserved_genes_uniprot, gene_info[gene_info["ID"] == conserved_genes[i]]["uniprot"].values[0])
    except:
        conserved_genes_uniprot = conserved_genes_uniprot
        to_be_deleted = np.append(to_be_deleted, i)
conserved_genes = np.delete(conserved_genes, to_be_deleted)



In [6]:
conserved_e_vals_cerevisiae = np.full(50, np.nan)
conserved_scores_cerevisiae = np.full(50, np.nan)
conserved_e_vals_albicans = np.full(50, np.nan)
conserved_scores_albicans = np.full(50, np.nan)
conserved_symbols_cerevisiae = np.full(50, "N/A")
conserved_symbols_albicans = np.full(50, "N/A")

In [208]:

try:
    conserved_e_vals_cerevisiae = np.loadtxt('conserved_e_vals_cerevisiae.txt')
    #drop all values after the last non nan value
    #n_entries = np.where(~np.isnan(conserved_e_vals_cerevisiae))[0][-1]
    n_entries=0
    conserved_e_vals_cerevisiae = conserved_e_vals_cerevisiae[:n_entries]
    #fill the rest with nan
    conserved_e_vals_cerevisiae = np.append(conserved_e_vals_cerevisiae, np.full(50-n_entries, np.nan))
    print("file loaded, entries:", n_entries)

except:
    conserved_e_vals_cerevisiae = np.full(50, np.nan)

try:
    conserved_scores_cerevisiae = np.loadtxt('conserved_scores_cerevisiae.txt')
    conserved_scores_cerevisiae = conserved_scores_cerevisiae[:n_entries]
    conserved_scores_cerevisiae = np.append(conserved_scores_cerevisiae, np.full(50-n_entries, np.nan))
    print("file loaded, entries:",n_entries)
except:
    conserved_scores_cerevisiae = np.full(50, np.nan)

try:
    conserved_e_vals_albicans = np.loadtxt('conserved_e_vals_albicans.txt')
    conserved_e_vals_albicans = conserved_e_vals_albicans[:n_entries]
    conserved_e_vals_albicans = np.append(conserved_e_vals_albicans, np.full(50-n_entries, np.nan))
    print("file loaded, entries:", n_entries)
except:
    conserved_e_vals_albicans = np.full(50, np.nan)

try:
    conserved_scores_albicans = np.loadtxt('conserved_scores_albicans.txt')
    conserved_scores_albicans = conserved_scores_albicans[:n_entries]
    conserved_scores_albicans = np.append(conserved_scores_albicans, np.full(50-n_entries, np.nan))
    print("file loaded, entries:", n_entries)

except:
    conserved_scores_albicans = np.full(50, np.nan)

try:
    conserved_symbols_cerevisiae = np.loadtxt('conserved_symbols_cerevisiae.txt', dtype=str)
    conserved_symbols_cerevisiae = conserved_symbols_cerevisiae[:n_entries]
    conserved_symbols_cerevisiae = np.append(conserved_symbols_cerevisiae, np.full(50-n_entries, "N/A"))
    print("file loaded, entries:", len(n_entries))

except:
    conserved_symbols_cerevisiae = np.full(50, "N/A")

try:
    conserved_symbols_albicans = np.loadtxt('conserved_symbols_albicans.txt', dtype=str)
    conserved_symbols_albicans = conserved_symbols_albicans[:n_entries]
    conserved_symbols_albicans = np.append(conserved_symbols_albicans, np.full(50-n_entries, "N/A"))
    print("file loaded, entries:", len(n_entries))

except:
    conserved_symbols_albicans = np.full(50, "N/A")



for i in range(n_entries,50):

    cerevisiae_e_val, cerevisiae_score, cerevisiae_symbol, albicans_e_val, albicans_score, albicans_symbol = get_top_e_val(conserved_genes_uniprot[i]) 
    print(cerevisiae_e_val, cerevisiae_score, cerevisiae_symbol, albicans_e_val, albicans_score, albicans_symbol,conserved_genes[i],conserved_genes_uniprot[i])
    print("currently at entry", i, "of", len(conserved_genes_uniprot))
    conserved_e_vals_cerevisiae[i] = cerevisiae_e_val
    conserved_scores_cerevisiae[i] = cerevisiae_score
    conserved_symbols_cerevisiae[i] = cerevisiae_symbol
    conserved_e_vals_albicans[i] = albicans_e_val
    conserved_scores_albicans[i] = albicans_score
    conserved_symbols_albicans[i] = albicans_symbol




file loaded, entries: 0
file loaded, entries: 0
file loaded, entries: 0
file loaded, entries: 0
BLAST starts
9.5634e-26 299.0 EEU05955 nan nan N/A SPBC336.07 Q9UST7
currently at entry 0 of 1070
BLAST starts
6.96575e-49 408.0 CAD6595082 2.69148e-54 445.0 KGU29437 SPCC1450.04 O74173
currently at entry 1 of 1070
BLAST starts
1.64219e-42 399.0 AJO98514 8.05207e-43 400.0 RLP63156 SPAC17H9.17c O13814
currently at entry 2 of 1070
BLAST starts
0.0 2272.0 CAI4237107 0.0 2253.0 EEQ42403 SPAC56F8.03 Q10251
currently at entry 3 of 1070
BLAST starts
2.50968e-21 209.0 AJO93589 3.72198e-23 222.0 KHC56833 SPCC4B3.11c Q9USK1
currently at entry 4 of 1070
BLAST starts
2.74025e-68 573.0 CAI4238260 6.50536e-107 833.0 KGU27560 SPAC23H3.05c O42858
currently at entry 5 of 1070
BLAST starts
1.64664e-136 1066.0 AJO97954 1.18354e-127 1007.0 KGU30207 SPBC660.13c Q92372
currently at entry 6 of 1070
BLAST starts
2.67218e-10 165.0 CAI4239675 3.96087e-05 122.0 KHC65636 SPBC1706.01 O60132
currently at entry 7 of 1070


In [209]:

np.savetxt('conserved_e_vals_cerevisiae_final.txt', conserved_e_vals_cerevisiae)
np.savetxt('conserved_e_vals_albicans_final.txt', conserved_e_vals_albicans)
np.savetxt('conserved_symbols_cerevisiae_final.txt', conserved_symbols_cerevisiae, fmt="%s")
np.savetxt('conserved_scores_cerevisiae_final.txt', conserved_scores_cerevisiae)
np.savetxt('conserved_scores_albicans_final.txt', conserved_scores_albicans)
np.savetxt('conserved_symbols_albicans_final.txt', conserved_symbols_albicans, fmt="%s")


In [213]:
np.savetxt('unique_e_vals_cerevisiae_final.txt', unique_e_vals_cerevisiae)
np.savetxt('unique_e_vals_albicans_final.txt', unique_e_vals_albicans)
np.savetxt('unique_scores_cerevisiae_final.txt', unique_scores_cerevisiae)
np.savetxt('unique_scores_albicans_final.txt', unique_scores_albicans)
np.savetxt('unique_symbols_cerevisiae_final.txt', unique_symbols_cerevisiae, fmt="%s")
np.savetxt('unique_symbols_albicans_final.txt', unique_symbols_albicans, fmt="%s")

In [212]:

try:
    unique_e_vals_cerevisiae = np.loadtxt('unique_e_vals_cerevisiae_final.txt')
    #drop all values after the last non nan value
    n_entries = np.where(~np.isnan(unique_e_vals_cerevisiae))[0][-1]
    
    unique_e_vals_cerevisiae = unique_e_vals_cerevisiae[:n_entries]
    #fill up with nans
    unique_e_vals_cerevisiae = np.append(unique_e_vals_cerevisiae, np.full(len(unique_genes_uniprot)-n_entries, np.nan))
    print("file loaded, entries:", n_entries)

except:
    unique_e_vals_cerevisiae = np.array([])

try:
    unique_scores_cerevisiae = np.loadtxt('unique_scores_cerevisiae_final.txt')
    unique_scores_cerevisiae = unique_scores_cerevisiae[:n_entries]
    unique_scores_cerevisiae = np.append(unique_scores_cerevisiae, np.full(len(unique_genes_uniprot)-n_entries, np.nan))
    print("file loaded, entries:", n_entries)
except:
    unique_scores_cerevisiae = np.array([])

try:
    unique_e_vals_albicans = np.loadtxt('unique_e_vals_albicans_final.txt')
    unique_e_vals_albicans = unique_e_vals_albicans[:n_entries]
    unique_e_vals_albicans = np.append(unique_e_vals_albicans, np.full(len(unique_genes_uniprot)-n_entries, np.nan))
    print("file loaded, entries:", n_entries)
except:
    unique_e_vals_albicans = np.array([])

try:
    unique_scores_albicans = np.loadtxt('unique_scores_albicans_final.txt')
    unique_scores_albicans = unique_scores_albicans[:n_entries]
    unique_scores_albicans = np.append(unique_scores_albicans, np.full(len(unique_genes_uniprot)-n_entries, np.nan))
    print("file loaded, entries:", n_entries)
except:
    unique_scores_albicans = np.array([])



try:
    unique_symbols_cerevisiae = np.loadtxt('unique_symbols_cerevisiae_final.txt', dtype=str)
    unique_symbols_cerevisiae = unique_symbols_cerevisiae[:n_entries]
    unique_symbols_cerevisiae = np.append(unique_symbols_cerevisiae, np.full(len(unique_genes_uniprot)-n_entries, "N/A"))
    print("file loaded, entries:", n_entries)

except:
    unique_symbols_cerevisiae = np.full(len(unique_genes_uniprot), "N/A")

try:
    unique_symbols_albicans = np.loadtxt('unique_symbols_albicans_final.txt', dtype=str)
    unique_symbols_albicans = unique_symbols_albicans[:n_entries]
    unique_symbols_albicans = np.append(unique_symbols_albicans, np.full(len(unique_genes_uniprot)-n_entries, "N/A"))
    print("file loaded, entries:", n_entries)

except:
    unique_symbols_albicans = np.full(len(unique_genes_uniprot), "N/A")




for i in range(n_entries,len(unique_genes_uniprot)):

    cerevisiae_e_val, cerevisiae_score, cerevisiae_symbol, albicans_e_val, albicans_score, albicans_symbol = get_top_e_val(unique_genes_uniprot[i]) 
    print(cerevisiae_e_val, cerevisiae_score, cerevisiae_symbol, albicans_e_val, albicans_score, albicans_symbol,unique_genes[i],unique_genes_uniprot[i])
    print("currently at entry", i, "of", len(unique_genes_uniprot))
    unique_e_vals_cerevisiae[i] = cerevisiae_e_val
    unique_scores_cerevisiae[i] = cerevisiae_score
    unique_symbols_cerevisiae[i] = cerevisiae_symbol
    unique_e_vals_albicans[i] = albicans_e_val
    unique_scores_albicans[i] = albicans_score
    unique_symbols_albicans[i] = albicans_symbol


np.savetxt('unique_e_vals_cerevisiae_final.txt', unique_e_vals_cerevisiae)
np.savetxt('unique_e_vals_albicans_final.txt', unique_e_vals_albicans)
np.savetxt('unique_scores_cerevisiae_final.txt', unique_scores_cerevisiae)
np.savetxt('unique_scores_albicans_final.txt', unique_scores_albicans)
    


file loaded, entries: 16
file loaded, entries: 16
file loaded, entries: 16
file loaded, entries: 16
file loaded, entries: 16
file loaded, entries: 16
BLAST starts
7.45392 73.0 CAI4823182 nan nan N/A SPBC359.04c Q9P5N1
currently at entry 16 of 25
BLAST starts
nan nan N/A 618.52 58.0 KAF6070001 SPBC646.06c O94510
currently at entry 17 of 25
BLAST starts
nan nan N/A nan nan N/A SPBC947.04 Q874R4
currently at entry 18 of 25
BLAST starts
0.00986924 109.0 CAI6684116 7.16111e-62 607.0 AAQ03243 SPBPJ4664.02 Q96WV6
currently at entry 19 of 25
BLAST starts
nan nan N/A nan nan N/A SPCC1223.13 O74412
currently at entry 20 of 25
BLAST starts
0.000244881 119.0 CAI4240790 2.27603e-05 127.0 AAN73329 SPCC1742.01 Q9P6S0
currently at entry 21 of 25
BLAST starts
nan nan N/A 0.000183926 115.0 O74623 SPCC188.09c Q7Z9I1
currently at entry 22 of 25
BLAST starts
115.489 66.0 CAD6645984 nan nan N/A SPCC736.08 O74954
currently at entry 23 of 25
BLAST starts
7.41169e-30 336.0 CAI7443233 3.17752e-33 364.0 KGT71740

In [None]:
np.savetxt('unique_e_vals_cerevisiae_final.txt', unique_e_vals_cerevisiae)
np.savetxt('unique_e_vals_albicans_final.txt', unique_e_vals_albicans)
np.savetxt('unique_scores_cerevisiae_final.txt', unique_scores_cerevisiae)
np.savetxt('unique_scores_albicans_final.txt', unique_scores_albicans)

In [None]:
import time
import subprocess
#import get
from requests import get

def scores_evals_from_uniprot(uniprot_id):
    """
    Returns the e-value and score of the best hit in S. cerevisiae and C. albicans for a given uniprot id
    
    Parameters
    ----------
    uniprot_id : str
        Uniprot id of the protein of interest
    """


    download_pdb_alphafold(uniprot_id)
    file = f'/Users/bencekover/Library/CloudStorage/OneDrive-Personal/MSci\ Bahler\ lab/S.-Pombe-MLPs\ -\ Github/Bence\ folder/Structure_and_Sequence_comparisons/{uniprot_id}.pdb'
    
    x=0
    
    while x < 5:
        try:
            job = !curl -X POST -F q=@{file} -F 'mode=3diaa' -F 'database[]=afdb-swissprot' https://search.foldseek.com/api/ticket
            job = job[-1].split('"')[3]
            result = get('https://search.foldseek.com/api/result/' + job + '/0').json()
            x = 5
        except:
            time.sleep(60)
            x += 1
            print("waiting")
           
    try:
        taxids = [result["results"][0]["alignments"][i]["taxId"] for i in range(len(result["results"][0]["alignments"]))]
        scores = [result["results"][0]["alignments"][i]["score"] for i in range(len(result["results"][0]["alignments"]))]
        evals = [result["results"][0]["alignments"][i]["eval"] for i in range(len(result["results"][0]["alignments"]))]

        #for top_score_cerevisiae find in taxids the entry which equals 4932 or 559292 and get the highest score and evalue from those
        cerevisiae_index = [i for i, x in enumerate(taxids) if x == 4932 or x == 559292]
        albicans_index = [i for i, x in enumerate(taxids) if x == 5476 or x== 237561]
    except:
        return 0,100,0,100
    try:
        top_cerevisiae_score = max([scores[i] for i in cerevisiae_index])
        top_cerevisiae_eval = min([evals[i] for i in cerevisiae_index])
    except:
        print("no cerevisiae")
        top_cerevisiae_score = 0
        top_cerevisiae_eval = 100
        

        #do the same for albicans
        
    try:
        top_albicans_score = max([scores[i] for i in albicans_index])
        top_albicans_eval = min([evals[i] for i in albicans_index])
    except:
        print("no albicans")
        top_albicans_score = 0
        top_albicans_eval = 100
        
    

    return top_cerevisiae_score, top_cerevisiae_eval, top_albicans_score, top_albicans_eval




In [103]:
np.savetxt('unique_e_vals_cerevisiae_foldseek.txt', unique_e_vals_cerevisiae_foldseek)
np.savetxt('unique_scores_cerevisiae_foldseek.txt', unique_scores_cerevisiae_foldseek)
np.savetxt('unique_e_vals_albicans_foldseek.txt', unique_e_vals_albicans_foldseek)
np.savetxt('unique_scores_albicans_foldseek.txt', unique_scores_albicans_foldseek)

In [52]:
unique_e_vals_cerevisiae_foldseek = np.loadtxt('unique_e_vals_cerevisiae_foldseek.txt')
    #drop all values after the last non nan value
n_entries = np.where(~np.isnan(unique_e_vals_cerevisiae_foldseek))[0][-1]
unique_e_vals_cerevisiae_foldseek = unique_e_vals_cerevisiae_foldseek[:n_entries]
    #fill up the rest until len(unique_genes_uniprot) with nan
unique_e_vals_cerevisiae_foldseek = np.append(unique_e_vals_cerevisiae_foldseek, np.full(len(unique_genes_uniprot)-n_entries, np.nan))
print("file loaded, entries:", n_entries)

file loaded, entries: 18


In [53]:
n_entries = 0 

try:
    unique_e_vals_cerevisiae_foldseek = np.loadtxt('unique_e_vals_cerevisiae_foldseek.txt')
    #drop all values after the last non nan value
    n_entries = np.where(~np.isnan(unique_e_vals_cerevisiae_foldseek))[0][-1]
    unique_e_vals_cerevisiae_foldseek = unique_e_vals_cerevisiae_foldseek[:n_entries]
    #fill up the rest until len(unique_genes_uniprot) with nan
    unique_e_vals_cerevisiae_foldseek = np.append(unique_e_vals_cerevisiae_foldseek, np.full(len(unique_genes_uniprot)-n_entries, np.nan))
    print("file loaded, entries:", n_entries)

except:
    unique_e_vals_cerevisiae_foldseek = np.full(len(unique_genes_uniprot), np.nan)

try:
    unique_scores_cerevisiae_foldseek = np.loadtxt('unique_scores_cerevisiae_foldseek.txt')
    unique_scores_cerevisiae_foldseek = unique_scores_cerevisiae_foldseek[:n_entries]
    unique_scores_cerevisiae_foldseek = np.append(unique_scores_cerevisiae_foldseek, np.full(len(unique_genes_uniprot)-n_entries, np.nan))

    print("file loaded, entries:", n_entries)
except:
    unique_scores_cerevisiae_foldseek = np.full(len(unique_genes_uniprot), np.nan)

try:
    unique_e_vals_albicans_foldseek = np.loadtxt('unique_e_vals_albicans_foldseek.txt')
    unique_e_vals_albicans_foldseek = unique_e_vals_albicans_foldseek[:n_entries]
    unique_e_vals_albicans_foldseek = np.append(unique_e_vals_albicans_foldseek, np.full(len(unique_genes_uniprot)-n_entries, np.nan))
    print("file loaded, entries:", n_entries)
except:
    unique_e_vals_albicans_foldseek = np.full(len(unique_genes_uniprot), np.nan)

try:
    unique_scores_albicans_foldseek  = np.loadtxt('unique_scores_albicans_foldseek.txt')
    unique_scores_albicans_foldseek  = unique_scores_albicans_foldseek [:n_entries]
    unique_scores_albicans_foldseek  = np.append(unique_scores_albicans_foldseek , np.full(len(unique_genes_uniprot)-n_entries, np.nan))
    print("file loaded, entries:", n_entries)

except:
    unique_scores_albicans_foldseek = np.full(len(unique_genes_uniprot), np.nan)

try:
    for i in range(n_entries,len(unique_genes_uniprot)):
        score_cerevisiae, eval_cerevisiae, score_albicans, eval_albicans = scores_evals_from_uniprot(unique_genes_uniprot[i])
        print(score_cerevisiae, eval_cerevisiae, score_albicans, eval_albicans, unique_genes[i], unique_genes_uniprot[i])
        print("currently at entry", i, "of", len(unique_genes_uniprot), "entries top")
        unique_e_vals_cerevisiae_foldseek[i] = eval_cerevisiae
        unique_scores_cerevisiae_foldseek[i] = score_cerevisiae
        unique_e_vals_albicans_foldseek[i] = eval_albicans
        unique_scores_albicans_foldseek[i] = score_albicans

except:
    for i in range(len(unique_genes_uniprot)):
        score_cerevisiae, eval_cerevisiae, score_albicans, eval_albicans = scores_evals_from_uniprot(unique_genes_uniprot[i])
        print(score_cerevisiae, eval_cerevisiae, score_albicans, eval_albicans, unique_genes[i], unique_genes_uniprot[i])
        print("currently at entry", i, "of", len(unique_genes_uniprot), "entries bot")
        unique_e_vals_cerevisiae_foldseek[i] = eval_cerevisiae
        unique_scores_cerevisiae_foldseek[i] = score_cerevisiae
        unique_e_vals_albicans_foldseek[i] = eval_albicans
        unique_scores_albicans_foldseek[i] = score_albicans

np.savetxt('unique_e_vals_cerevisiae_foldseek.txt', unique_e_vals_cerevisiae_foldseek)
np.savetxt('unique_scores_cerevisiae_foldseek.txt', unique_scores_cerevisiae_foldseek)
np.savetxt('unique_e_vals_albicans_foldseek.txt', unique_e_vals_albicans_foldseek)
np.savetxt('unique_scores_albicans_foldseek.txt', unique_scores_albicans_foldseek)



file loaded, entries: 18
file loaded, entries: 18
file loaded, entries: 18
file loaded, entries: 18


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  571k    0  571k    0     0  2139k      0 --:--:-- --:--:-- --:--:-- 2148k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 11.4M    0 11.4M    0     0  7291k      0 --:--:--  0:00:01 --:--:-- 7289k


0 100 0 100 SPBC947.04 Q874R4
currently at entry 18 of 25 entries top


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   127  100   127    0     0   1831      0 --:--:-- --:--:-- --:--:--  1840
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   127  100   127    0     0   2156      0 --:--:-- --:--:-- --:--:--  2189


waiting
waiting
waiting
waiting
waiting
0 100 0 100 SPBPJ4664.02 Q96WV6
currently at entry 19 of 25 entries top


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  613k    0  613k    0     0  2855k      0 --:--:-- --:--:-- --:--:-- 2865k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 11.1M    0 11.1M    0     0  7224k      0 --:--:--  0:00:01 --:--:-- 7223k


waiting
waiting
0 100 0 100 SPCC1223.13 O74412
currently at entry 20 of 25 entries top


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  888k    0  888k    0     0  3791k      0 --:--:-- --:--:-- --:--:-- 3797k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 31.5M    0 31.5M    0     0  8161k      0 --:--:--  0:00:03 --:--:-- 8161k


waiting
waiting
waiting
0 100 0 100 SPCC1742.01 Q9P6S0
currently at entry 21 of 25 entries top


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  358k    0  358k    0     0  2235k      0 --:--:-- --:--:-- --:--:-- 2239k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 4525k    0 4525k    0     0  6685k      0 --:--:-- --:--:-- --:--:-- 6694k


waiting
waiting
no cerevisiae
0 100 22 9.048 SPCC188.09c Q7Z9I1
currently at entry 22 of 25 entries top


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  380k    0  380k    0     0  2043k      0 --:--:-- --:--:-- --:--:-- 2047k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 4506k    0 4506k    0     0  6472k      0 --:--:-- --:--:-- --:--:-- 6475k


waiting
waiting
waiting
no cerevisiae
no albicans
0 100 0 100 SPCC736.08 O74954
currently at entry 23 of 25 entries top


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  937k    0  937k    0     0  3946k      0 --:--:-- --:--:-- --:--:-- 3956k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 27.2M    0 27.2M    0     0  8082k      0 --:--:--  0:00:03 --:--:-- 8082k


waiting
waiting
waiting
0 100 0 100 SPCC895.05 O94532
currently at entry 24 of 25 entries top


In [36]:
n_entries = 0 
try:
    conserved_e_vals_cerevisiae_foldseek = np.loadtxt('conserved_e_vals_cerevisiae.txt')
    #drop all values after the last non nan value
    #n_entries = np.where(~np.isnan(conserved_e_vals_cerevisiae_foldseek))[0][-1]
    n_entries = 0
    conserved_e_vals_cerevisiae_foldseek = conserved_e_vals_cerevisiae_foldseek[:n_entries]
    print("file loaded, entries:", len(n_entries))

except:
    conserved_e_vals_cerevisiae_foldseek = np.full(50, np.nan)

try:
    conserved_scores_cerevisiae_foldseek = np.loadtxt('conserved_scores_cerevisiae.txt')
    conserved_scores_cerevisiae_foldseek = conserved_scores_cerevisiae_foldseek[:n_entries]
    print("file loaded, entries:", len(n_entries))
except:
    conserved_scores_cerevisiae_foldseek = np.full(50, np.nan)

try:
    conserved_e_vals_albicans_foldseek = np.loadtxt('conserved_e_vals_albicans.txt')
    conserved_e_vals_albicans_foldseek = conserved_e_vals_albicans_foldseek[:n_entries]
    print("file loaded, entries:", len(n_entries))
except:
    conserved_e_vals_albicans_foldseek = np.full(50, np.nan)

try:
    conserved_scores_albicans_foldseek  = np.loadtxt('conserved_scores_albicans.txt')
    conserved_scores_albicans_foldseek  = conserved_scores_albicans_foldseek [:n_entries]
    print("file loaded, entries:", len(n_entries))

except:
    conserved_scores_albicans_foldseek = np.full(50, np.nan)

try:
    for i in range(n_entries,50):
        score_cerevisiae, eval_cerevisiae, score_albicans, eval_albicans = scores_evals_from_uniprot(conserved_genes_uniprot[i])
        print(score_cerevisiae, eval_cerevisiae, score_albicans, eval_albicans, conserved_genes[i], conserved_genes_uniprot[i])

        conserved_e_vals_cerevisiae_foldseek[i] = eval_cerevisiae
        conserved_scores_cerevisiae_foldseek[i] = score_cerevisiae
        conserved_e_vals_albicans_foldseek[i] = eval_albicans
        conserved_scores_albicans_foldseek[i] = score_albicans

except:
    for i in range(50):
        score_cerevisiae, eval_cerevisiae, score_albicans, eval_albicans = scores_evals_from_uniprot(conserved_genes_uniprot[i])
        print(score_cerevisiae, eval_cerevisiae, score_albicans, eval_albicans, conserved_genes[i], conserved_genes_uniprot[i])

        conserved_e_vals_cerevisiae_foldseek[i] = eval_cerevisiae
        conserved_scores_cerevisiae_foldseek[i] = score_cerevisiae
        conserved_e_vals_albicans_foldseek[i] = eval_albicans
        conserved_scores_albicans_foldseek[i] = score_albicans

np.savetxt('conserved_e_vals_cerevisiae_foldseek.txt', conserved_e_vals_cerevisiae_foldseek)
np.savetxt('conserved_scores_cerevisiae_foldseek.txt', conserved_scores_cerevisiae_foldseek)
np.savetxt('conserved_e_vals_albicans_foldseek.txt', conserved_e_vals_albicans_foldseek)
np.savetxt('conserved_scores_albicans_foldseek.txt', conserved_scores_albicans_foldseek)

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  867k    0  867k    0     0  8030k      0 --:--:-- --:--:-- --:--:-- 8110k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 22.6M    0 22.6M    0     0  26.0M      0 --:--:-- --:--:-- --:--:-- 26.1M


0 100 0 100 SPBC336.07 Q9UST7


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  136k  100  136k    0     0  2152k      0 --:--:-- --:--:-- --:--:-- 2196k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  522k    0  522k    0     0  6226k      0 --:--:-- --:--:-- --:--:-- 6289k


no albicans
523 1.863e-21 0 100 SPCC1450.04 O74173


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  241k  100  241k    0     0  3405k      0 --:--:-- --:--:-- --:--:-- 3448k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1567k    0 1567k    0     0  8726k      0 --:--:-- --:--:-- --:--:-- 8756k


1208 1.398e-32 1308 2.25e-34 SPAC17H9.17c O13814


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  675k    0  675k    0     0  3401k      0 --:--:-- --:--:-- --:--:-- 3413k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 14.1M    0 14.1M    0     0  21.0M      0 --:--:-- --:--:-- --:--:-- 21.0M


waiting
no cerevisiae
no albicans
0 100 0 100 SPAC56F8.03 Q10251


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 77111    0 77111    0     0   358k      0 --:--:-- --:--:-- --:--:--  360k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  139k    0  139k    0     0   869k      0 --:--:-- --:--:-- --:--:--  873k


waiting
waiting
waiting
waiting
no albicans
341 1.825e-11 0 100 SPCC4B3.11c Q9USK1


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  258k    0  258k    0     0  1456k      0 --:--:-- --:--:-- --:--:-- 1454k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1821k    0 1821k    0     0  6322k      0 --:--:-- --:--:-- --:--:-- 6347k


waiting
waiting
1407 1.831e-37 656 1.965e-20 SPAC23H3.05c O42858


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  386k    0  386k    0     0  1912k      0 --:--:-- --:--:-- --:--:-- 1913k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 4503k    0 4503k    0     0  12.0M      0 --:--:-- --:--:-- --:--:-- 12.0M


waiting
waiting
waiting
no albicans
1789 2.035e-65 0 100 SPBC660.13c Q92372


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  512k    0  512k    0     0  3132k      0 --:--:-- --:--:-- --:--:-- 3146k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 8364k    0 8364k    0     0  14.4M      0 --:--:-- --:--:-- --:--:-- 14.4M


waiting
waiting
waiting
no cerevisiae
no albicans
0 100 0 100 SPBC1706.01 O60132


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  218k    0  218k    0     0  1020k      0 --:--:-- --:--:-- --:--:-- 1024k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1297k    0 1297k    0     0  6279k      0 --:--:-- --:--:-- --:--:-- 6270k


waiting
waiting
no albicans
635 9.113e-22 0 100 SPAC1805.09c Q9UTG6


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  240k    0  240k    0     0   947k      0 --:--:-- --:--:-- --:--:--  949k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1577k    0 1577k    0     0  6967k      0 --:--:-- --:--:-- --:--:-- 6951k


waiting
waiting
waiting
1853 3.646e-44 1954 7.921e-46 SPAC31G5.09c P27638


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  526k    0  526k    0     0  2692k      0 --:--:-- --:--:-- --:--:-- 2697k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 8330k    0 8330k    0     0  14.8M      0 --:--:-- --:--:-- --:--:-- 14.8M


waiting
waiting
no albicans
3933 0 0 100 SPBC4.04c P40377


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  364k    0  364k    0     0  2401k      0 --:--:-- --:--:-- --:--:-- 2399k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 3874k    0 3874k    0     0  13.7M      0 --:--:-- --:--:-- --:--:-- 13.7M


waiting
waiting
waiting
no albicans
1738 5.362e-55 0 100 SPCC553.09c O74946


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  185k    0  185k    0     0  1380k      0 --:--:-- --:--:-- --:--:-- 1382k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  930k    0  930k    0     0  4665k      0 --:--:-- --:--:-- --:--:-- 4652k


waiting
1012 8.725e-19 100 0.03981 SPAC959.02 Q9P4X4


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  270k    0  270k    0     0  1483k      0 --:--:-- --:--:-- --:--:-- 1491k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1946k    0 1946k    0     0  10.5M      0 --:--:-- --:--:-- --:--:-- 10.5M


waiting
1201 7.443e-35 958 3.678e-33 SPAC17G8.13c Q10325


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  202k    0  202k    0     0  1221k      0 --:--:-- --:--:-- --:--:-- 1220k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1080k    0 1080k    0     0  6057k      0 --:--:-- --:--:-- --:--:-- 6071k


waiting
1167 4.069e-32 33 1.829 SPBC17A3.08 Q9UUF1


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  688k    0  688k    0     0  3623k      0 --:--:-- --:--:-- --:--:-- 3622k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 14.4M    0 14.4M    0     0  22.1M      0 --:--:-- --:--:-- --:--:-- 22.1M


waiting
no albicans
1321 2.402e-60 0 100 SPAC25G10.07c P24339


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  378k    0  378k    0     0  2215k      0 --:--:-- --:--:-- --:--:-- 2226k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 4114k    0 4114k    0     0  15.1M      0 --:--:-- --:--:-- --:--:-- 15.1M


waiting
no cerevisiae
no albicans
0 100 0 100 SPBC1703.12 Q9P7V9


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  696k    0  696k    0     0  4375k      0 --:--:-- --:--:-- --:--:-- 4356k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 13.4M    0 13.4M    0     0  19.9M      0 --:--:-- --:--:-- --:--:-- 20.0M


waiting
no albicans
6704 0 0 100 SPBC8D2.06 O13651


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  834k    0  834k    0     0  3390k      0 --:--:-- --:--:-- --:--:-- 3405k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 21.5M    0 21.5M    0     0  23.6M      0 --:--:-- --:--:-- --:--:-- 23.6M


waiting
no albicans
2920 1.971e-67 0 100 SPAC890.06 Q9URX8


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  179k    0  179k    0     0  1738k      0 --:--:-- --:--:-- --:--:-- 1744k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  874k    0  874k    0     0  5914k      0 --:--:-- --:--:-- --:--:-- 5949k


waiting
402 3.858e-15 163 2.447e-07 SPCC1494.08c O60082


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 86669    0 86669    0     0   565k      0 --:--:-- --:--:-- --:--:--  568k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  154k    0  154k    0     0  1011k      0 --:--:-- --:--:-- --:--:-- 1008k


waiting
no cerevisiae
no albicans
0 100 0 100 SPBC839.09c Q8WZK1


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  481k    0  481k    0     0  2779k      0 --:--:-- --:--:-- --:--:-- 2782k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 6304k    0 6304k    0     0  15.5M      0 --:--:-- --:--:-- --:--:-- 15.5M


waiting
3843 4.684e-83 3426 1.769e-75 SPBC660.07 O42893


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  175k    0  175k    0     0  1059k      0 --:--:-- --:--:-- --:--:-- 1057k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  768k    0  768k    0     0  5448k      0 --:--:-- --:--:-- --:--:-- 5492k


waiting
no albicans
1074 5.379e-28 0 100 SPAC4D7.04c O14171


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 98900    0 98900    0     0   724k      0 --:--:-- --:--:-- --:--:--  726k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  251k    0  251k    0     0  1029k      0 --:--:-- --:--:-- --:--:-- 1034k


waiting
no cerevisiae
no albicans
0 100 0 100 SPBC19C7.05 O60154


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  244k    0  244k    0     0  1810k      0 --:--:-- --:--:-- --:--:-- 1824k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1530k    0 1530k    0     0  7899k      0 --:--:-- --:--:-- --:--:-- 7931k


waiting
no albicans
2338 7.383e-58 0 100 SPBPB2B2.10c Q9HDU5


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  174k    0  174k    0     0  1520k      0 --:--:-- --:--:-- --:--:-- 1527k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  794k    0  794k    0     0  4496k      0 --:--:-- --:--:-- --:--:-- 4513k


waiting
800 3.053e-26 140 6.12e-05 SPBC119.06 O42899


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  285k    0  285k    0     0  1903k      0 --:--:-- --:--:-- --:--:-- 1906k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 2250k    0 2250k    0     0  13.4M      0 --:--:-- --:--:-- --:--:-- 13.4M


waiting
no albicans
1715 7.989e-35 0 100 SPBC13A2.03 Q9P381


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  537k    0  537k    0     0  2820k      0 --:--:-- --:--:-- --:--:-- 2828k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 9231k    0 9231k    0     0  3087k      0 --:--:--  0:00:02 --:--:-- 3087k


waiting
waiting
no cerevisiae
no albicans
0 100 0 100 SPCC320.03 O59780


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  223k    0  223k    0     0  1578k      0 --:--:-- --:--:-- --:--:-- 1585k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1296k    0 1296k    0     0  3957k      0 --:--:-- --:--:-- --:--:-- 3953k


waiting
waiting
no albicans
1934 5.137e-54 0 100 SPAC890.07c Q9URX7


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  199k    0  199k    0     0  1265k      0 --:--:-- --:--:-- --:--:-- 1271k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1069k    0 1069k    0     0  5107k      0 --:--:-- --:--:-- --:--:-- 5095k


waiting
waiting
waiting
no albicans
640 8.901e-14 0 100 SPCC31H12.02c O74870


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  307k    0  307k    0     0  1389k      0 --:--:-- --:--:-- --:--:-- 1392k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 2436k    0 2436k    0     0  9073k      0 --:--:-- --:--:-- --:--:-- 9059k


waiting
waiting
waiting
1051 1.247e-29 564 3.54e-18 SPBC2D10.05 O74799


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  225k    0  225k    0     0   485k      0 --:--:-- --:--:-- --:--:--  485k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1480k    0 1480k    0     0  8034k      0 --:--:-- --:--:-- --:--:-- 8048k


waiting
waiting
517 1.481e-23 475 1.239e-23 SPCC550.02c O59800


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  495k    0  495k    0     0  3473k      0 --:--:-- --:--:-- --:--:-- 3486k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 7615k    0 7615k    0     0  15.4M      0 --:--:-- --:--:-- --:--:-- 15.4M


waiting
waiting
waiting
3756 6.093e-100 3223 3.641e-95 SPAC1834.11c Q9P7Q4


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  201k    0  201k    0     0  1033k      0 --:--:-- --:--:-- --:--:-- 1037k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1168k    0 1168k    0     0  5203k      0 --:--:-- --:--:-- --:--:-- 5285k


waiting
waiting
1830 2.986e-33 512 1.899e-11 SPBC530.10c Q09188


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  436k    0  436k    0     0  2422k      0 --:--:-- --:--:-- --:--:-- 2426k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 5238k    0 5238k    0     0  19.7M      0 --:--:-- --:--:-- --:--:-- 19.8M


waiting
waiting
waiting
no albicans
773 1.028e-20 0 100 SPBC691.05c O13621


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  156k    0  156k    0     0   921k      0 --:--:-- --:--:-- --:--:--  923k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  666k    0  666k    0     0  4077k      0 --:--:-- --:--:-- --:--:-- 4063k


waiting
waiting
waiting
125 0.001269 33 3.533 SPBC428.06c O94355


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1079k    0 1079k    0     0  7046k      0 --:--:-- --:--:-- --:--:-- 7099k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 37.7M    0 37.7M    0     0  25.5M      0 --:--:--  0:00:01 --:--:-- 25.5M


waiting
waiting
no cerevisiae
no albicans
0 100 0 100 SPBC119.07 O42900


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  188k    0  188k    0     0   940k      0 --:--:-- --:--:-- --:--:--  942k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  988k    0  988k    0     0  5480k      0 --:--:-- --:--:-- --:--:-- 5494k


waiting
waiting
waiting
1147 8.638e-23 572 3.483e-12 SPAC4G9.20c Q10248


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  687k    0  687k    0     0  4802k      0 --:--:-- --:--:-- --:--:-- 4809k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 14.9M    0 14.9M    0     0  17.8M      0 --:--:-- --:--:-- --:--:-- 17.8M


waiting
waiting
waiting
no cerevisiae
no albicans
0 100 0 100 SPBC23E6.09 O60184


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 75734    0 75734    0     0   599k      0 --:--:-- --:--:-- --:--:--  601k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  106k    0  106k    0     0   764k      0 --:--:-- --:--:-- --:--:--  766k


waiting
waiting
no albicans
52 0.4306 0 100 SPBC947.14c O43089


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  333k    0  333k    0     0  2063k      0 --:--:-- --:--:-- --:--:-- 2059k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 3027k    0 3027k    0     0  11.3M      0 --:--:-- --:--:-- --:--:-- 11.4M


waiting
waiting
waiting
3076 4.633e-71 2871 3.635e-66 SPAC328.03 P40387


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 81566    0 81566    0     0   421k      0 --:--:-- --:--:-- --:--:--  421k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  127k    0  127k    0     0  1011k      0 --:--:-- --:--:-- --:--:-- 1022k


waiting
waiting
waiting
no albicans
670 8.638e-18 0 100 SPBC1734.14c P08463


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  646k    0  646k    0     0  3069k      0 --:--:-- --:--:-- --:--:-- 3065k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 12.6M    0 12.6M    0     0  20.0M      0 --:--:-- --:--:-- --:--:-- 20.1M


waiting
0 100 0 100 SPBC336.15 Q1MTN3


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  197k    0  197k    0     0  1257k      0 --:--:-- --:--:-- --:--:-- 1265k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  989k    0  989k    0     0  4161k      0 --:--:-- --:--:-- --:--:-- 4156k


waiting
1624 1.032e-39 1697 1.056e-40 SPBC11B10.09 P04551


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  128k    0  128k    0     0  86386      0 --:--:--  0:00:01 --:--:-- 86953
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  406k    0  406k    0     0  2312k      0 --:--:-- --:--:-- --:--:-- 2320k


waiting
no albicans
352 2.057e-08 0 100 SPBC409.21 Q9UUA4


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 88289    0 88289    0     0   372k      0 --:--:-- --:--:-- --:--:--  374k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  177k    0  177k    0     0  1348k      0 --:--:-- --:--:-- --:--:-- 1358k


waiting
no albicans
224 2.264e-06 0 100 SPCC14G10.03c O74416


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  245k    0  245k    0     0  1444k      0 --:--:-- --:--:-- --:--:-- 1453k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1661k    0 1661k    0     0  8890k      0 --:--:-- --:--:-- --:--:-- 8932k


waiting
824 1.055e-22 849 1.055e-22 SPBC354.03 O43017


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  493k    0  493k    0     0  2691k      0 --:--:-- --:--:-- --:--:-- 2699k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 6975k    0 6975k    0     0  16.7M      0 --:--:-- --:--:-- --:--:-- 16.7M


waiting
no albicans
1603 1.067e-45 0 100 SPBC1706.03 Q9USY7


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  227k    0  227k    0     0  1784k      0 --:--:-- --:--:-- --:--:-- 1789k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1501k    0 1501k    0     0  9065k      0 --:--:-- --:--:-- --:--:-- 9098k


waiting
1384 1.314e-28 590 2.026e-13 SPAC688.09 Q9P6L7


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  341k    0  341k    0     0  2576k      0 --:--:-- --:--:-- --:--:-- 2585k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 3494k    0 3494k    0     0  20.7M      0 --:--:-- --:--:-- --:--:-- 20.8M


no albicans
109 0.0005444 0 100 SPAC22H10.07 P40996


In [104]:
np.savetxt('conserved_e_vals_cerevisiae_foldseek.txt', conserved_e_vals_cerevisiae_foldseek)
np.savetxt('conserved_scores_cerevisiae_foldseek.txt', conserved_scores_cerevisiae_foldseek)
np.savetxt('conserved_e_vals_albicans_foldseek.txt', conserved_e_vals_albicans_foldseek)
np.savetxt('conserved_scores_albicans_foldseek.txt', conserved_scores_albicans_foldseek)

# Plotting the results



In [341]:
all_uniprot_ids = np.concatenate((unique_genes_uniprot, conserved_genes_uniprot[:50], unique_genes_uniprot, conserved_genes_uniprot[:50], unique_genes_uniprot, conserved_genes_uniprot[:50], unique_genes_uniprot, conserved_genes_uniprot[:50]))
all_gene_names = np.concatenate((unique_genes, conserved_genes[:50], unique_genes, conserved_genes[:50], unique_genes, conserved_genes[:50], unique_genes, conserved_genes[:50]))
all_scores = np.concatenate((unique_scores_cerevisiae, conserved_scores_cerevisiae[:50], unique_scores_cerevisiae_foldseek, conserved_scores_cerevisiae_foldseek[:50], unique_scores_albicans, conserved_scores_albicans[:50], unique_scores_albicans_foldseek, conserved_scores_albicans_foldseek[:50]))
all_e_vals = np.concatenate((unique_e_vals_cerevisiae, conserved_e_vals_cerevisiae[:50], unique_e_vals_cerevisiae_foldseek, conserved_e_vals_cerevisiae_foldseek[:50], unique_e_vals_albicans, conserved_e_vals_albicans[:50], unique_e_vals_albicans_foldseek, conserved_e_vals_albicans_foldseek[:50]))
unique_conserved = np.concatenate((np.full(len(unique_genes), 'unique'), np.full(50, 'conserved'), np.full(len(unique_genes), 'unique'), np.full(50, 'conserved'), np.full(len(unique_genes), 'unique'), np.full(50, 'conserved'), np.full(len(unique_genes), 'unique'), np.full(50, 'conserved')))
data_sources = np.concatenate((np.full(len(unique_genes), 'BlastP'), np.full(50, 'BlastP'), np.full(len(unique_genes), 'Foldseek'), np.full(50, 'Foldseek'), np.full(len(unique_genes), 'BlastP'), np.full(50, 'BlastP'), np.full(len(unique_genes), 'Foldseek'), np.full(50, 'Foldseek')))
#species S. cerevisiae or C. albicans
species = np.concatenate((np.full(len(unique_genes), 'S. cerevisiae'), np.full(50, 'S. cerevisiae'), np.full(len(unique_genes), 'S. cerevisiae'), np.full(50, 'S. cerevisiae'), np.full(len(unique_genes), 'C. albicans'), np.full(50, 'C. albicans'), np.full(len(unique_genes), 'C. albicans'), np.full(50, 'C. albicans')))
#create a df with uniprot IDs, gene names, scores and e-values, data_source, unique/conserved, species
df = pd.DataFrame({'uniprot_id': all_uniprot_ids, 'gene_name': all_gene_names, 'score': all_scores, 'e_value': all_e_vals, 'unique_conserved': unique_conserved, 'data_source': data_sources, 'species': species})


In [377]:
pombe_floc_dict = {"SPAC186.01":"pfl9",
                   "SPAC1F8.06":"pfl8",
                   "SPBC359.04c":"pfl7",
                   "SPAC977.07c":"pfl6",
                   "SPBC1289.15":"pfl5",
                   "SPCC188.09c":"pfl4",
                   "SPBC947.04":"pfl3",
                   "SPAP11E10.02c":"mam3",
                   "SPAPB15E9.01c":"pfl2",
                   "SPAPB2C8.01":"SPAPB2C8.01",
                   "SPBC1348.08c":"SPBC1348.08c",
                   "SPBC21D10.06c":"map4",
                   "SPBPJ4664.02":"SPBPJ4664.02",
                   "SPCC1742.01":"gsf2"}

df["color"] = ["True (n=14)" if x in pombe_floc_dict.keys() else "False" for x in df["gene_name"]]



In [378]:
#in df, for e_value turn Na into 100
df['e_value'] = df['e_value'].fillna(100)
#for score fill with 0
df['score'] = df['score'].fillna(0)
#any e_value higher than 100, turn into 100
df['e_value'] = df['e_value'].apply(lambda x: 100 if x > 100 else x)
#if lower than 10e-50, turn into 10e-50
df['e_value'] = df['e_value'].apply(lambda x: 10e-150 if x < 10e-150 else x)
#save df
df.to_csv("analysis.csv", index=False)

In [4]:
#load df
df = pd.read_csv("final_analysis.csv")

In [10]:
import bokeh.io
from bokeh.plotting import figure, show
from bokeh.models import Text
from scipy import stats
import iqplot
import pandas as pd

# Your DataFrame (df) and other data

# Create the plot
p = iqplot.stripbox(df, q="score", cats=["data_source", "species", "unique_conserved"], color_column="color", q_axis="y", frame_width=500, frame_height=400, spread="jitter", jitter_kwargs={'width': 0.6},
                    marker_kwargs=dict(size=8, alpha=0.5), box_kwargs=dict(line_color="black", line_width=2), whisker_kwargs=dict(line_color="black", line_width=2), median_kwargs=dict(line_color="black", line_width=2), show_legend=True)

# Customizations
p.xaxis.major_label_orientation = 1.2
p.xaxis.axis_label_text_font_size = '15pt'
p.xaxis.major_label_text_font_size = '15pt'
p.yaxis.axis_label_text_font_size = '15pt'
p.xaxis.major_label_text_font_size = '15pt'
p.yaxis.major_label_text_font_size = '15pt'
p.xaxis.axis_label_text_font_style = 'normal'
p.yaxis.axis_label_text_font_style = 'normal'
p.yaxis.axis_label = 'Score'
p.legend.title = 'Flocculin genes'

# Calculate p-values
p_vals = []
p_vals_floc = []
species_data_sources = [
    ('C. albicans', 'BlastP'),
    ('S. cerevisiae', 'BlastP'),
    ('C. albicans', 'Foldseek'),
    ('S. cerevisiae', 'Foldseek')

]

for species, data_source in species_data_sources:
    unique_scores = df[(df['unique_conserved'] == 'unique') & (df['species'] == species) & (df['data_source'] == data_source)]['score']
    unique_floc_scores = df[(df['unique_conserved'] == 'unique') & (df['species'] == species) & (df['data_source'] == data_source) & (df['color'] == "True (n=14)")]['score']
    conserved_scores = df[(df['unique_conserved'] == 'conserved') & (df['species'] == species) & (df['data_source'] == data_source)]['score']
    p_val = stats.mannwhitneyu(unique_scores, conserved_scores, alternative='two-sided')[1]
    p_vals.append(p_val)
    p_val_floc = stats.mannwhitneyu(unique_floc_scores, conserved_scores, alternative='two-sided')[1]
    p_vals_floc.append(p_val_floc)

# Create the text labels
p_vals_labels = ["p = {:.3e}".format(p_val) for p_val in p_vals]
p_vals_labels_floc = ["p = {:.3e}".format(p_val_floc) for p_val_floc in p_vals_floc]

# Adjust text label positions
x_positions = [1.2,4,7,9.8]  # Corresponding to the species and data_source combinations

y_positions = [4100,4300,4100,4300]


# Add text labels to the plot
for x, y, label in zip(x_positions, y_positions, p_vals_labels):
    text = Text(x=x, y=y+250, text=[label], text_font_size='12pt', text_align='center', text_baseline='middle', text_color='black', text_alpha=1)
    p.add_glyph(text)

for x, y, label in zip(x_positions, y_positions, p_vals_labels_floc):
    text = Text(x=x, y=y, text=[label], text_font_size='12pt', text_align='center', text_baseline='middle', text_color='orange', text_alpha=1)
    p.add_glyph(text)


#ymax = 4800
p.y_range = bokeh.models.Range1d(0, 4800)
#make everything part of the same plot
p.output_backend = "svg"
p = bokeh.layouts.column(p)

# Show the plot
bokeh.io.output_notebook()  # Use this if you're working in a Jupyter Notebook
#make browser output
#turn format into svg

bokeh.io.export_svg(p, filename=root + "Figures/All figures/blast_p_foldseek_results.svg")
bokeh.io.export_png(p, filename=root + "Figures/All figures/blast_p_foldseek_results.png")


'/Users/bencekover/Library/CloudStorage/OneDrive-Personal/MSci Bahler lab/S.-Pombe-MLPs - Github/Figures/All figures/blast_p_foldseek_results.png'

In [438]:
#create a df called weird results. Include entries where unique yet e_val is less than 10-5 and conserved yet e_val is hgiher than 10-5
weird_results = df[((df['unique_conserved'] == 'unique') & (df['e_value'] < 10**-5)) | ((df['unique_conserved'] == 'conserved') & (df['e_value'] > 10**-5))]
#keep only blastp results
weird_results_blastp = weird_results[weird_results['data_source'] == 'BlastP']
#create weird_results_foldseek which is unique and e_value < 10-5 
weird_results_foldseek = df[(df['unique_conserved'] == 'unique') & (df['e_value'] < 10**-5) & (df['data_source'] == 'Foldseek')]
weird_results_foldseek


Unnamed: 0,uniprot_id,gene_name,score,e_value,unique_conserved,data_source,species,color
78,Q10211,SPAC4H3.03c,323.0,2.916e-10,unique,Foldseek,S. cerevisiae,False


In [439]:
weird_results_blastp

Unnamed: 0,uniprot_id,gene_name,score,e_value,unique_conserved,data_source,species,color
8,Q9C0Y2,SPAPB2C8.01,166.0,5.32609e-10,unique,BlastP,S. cerevisiae,True (n=14)
10,Q9URU4,SPBC1289.15,279.0,2.8662200000000005e-23,unique,BlastP,S. cerevisiae,True (n=14)
12,O74308,SPBC15D4.02,142.0,7.38255e-08,unique,BlastP,S. cerevisiae,False
15,O43019,SPBC354.05c,191.0,8.83257e-14,unique,BlastP,S. cerevisiae,False
24,O94532,SPCC895.05,336.0,7.41169e-30,unique,BlastP,S. cerevisiae,False
60,O94355,SPBC428.06c,70.0,9.33,conserved,BlastP,S. cerevisiae,False
64,O43089,SPBC947.14c,50.0,100.0,conserved,BlastP,S. cerevisiae,False
67,Q1MTN3,SPBC336.15,66.0,100.0,conserved,BlastP,S. cerevisiae,False
69,Q9UUA4,SPBC409.21,66.0,22.9384,conserved,BlastP,S. cerevisiae,False
158,Q9C0Y2,SPAPB2C8.01,250.0,7.473729999999999e-20,unique,BlastP,C. albicans,True (n=14)


In [440]:
#which gene_name is twice in the weird results?
weird_results_blastp['gene_name'].value_counts()

SPAPB2C8.01     2
SPBC15D4.02     2
SPBC354.05c     2
SPCC895.05      2
SPBC428.06c     2
SPBC947.14c     2
SPBC336.15      2
SPBC409.21      2
SPBC1289.15     1
SPBPJ4664.02    1
SPBC336.07      1
SPBC1706.01     1
SPCC1494.08c    1
SPBC839.09c     1
SPBC19C7.05     1
Name: gene_name, dtype: int64