In [1]:
#Author: Hana Jaafari
#Date: April 28, 2020
#The objective of this script is to locally set-up the files for a protein family's parent protein-pseudogene pairs
#to run AWSEM simulations on Nots.

import sys
import requests
import os
import subprocess
from shutil import copyfile
from datetime import datetime

In [2]:
#These are filepaths for my local desktop.
root_directory="/Users/hanajaafari/Desktop"
hmm_script_repository=os.path.join(root_directory,"hmmer-3.2.1/src/hmmer-3.2.1/src")
ncbi_blast_repository=os.path.join(root_directory,"ncbi-blast-2.10.0+/bin")

##These are filepaths for Nots.
#root_directory="/scratch/hkj1"
#hmm_script_repository=os.path.join(root_directory,"hmmer/hmmer-3.3/src")
#ncbi_blast_repository="/opt/apps/software/MPI/GCC/8.3.0/OpenMPI/3.1.4/BLAST+/2.9.0/bin"

time_devolution_directory=os.path.join(root_directory,"Time-Devolution-Calc")
openawsem_repository=os.path.join(root_directory,"openawsem")
past_Nots_files=os.path.join(time_devolution_directory,"DCA_Energy_Calculations/11_09_2019_Nots_Pseudogene_Analysis")
family_hmm_repository=os.path.join(past_Nots_files,"11_09_2019_Nots_hmm_files")
family_pseudogene_aligned_files_repository= os.path.join(past_Nots_files,"11_27_2019_family_aligned_pseudogenes")
family_parentprotein_aligned_files_repository= os.path.join(past_Nots_files,"11_09_2019_Nots_ParentProtein_MSA")
family_parentprotein_pseudogene_sequences_repository=os.path.join(past_Nots_files,"11_27_2019_renamed_pseudogene_parentprotein_sequence_fasta")
parentsequences_DCAEnergy_files_respository=os.path.join(past_Nots_files,"11_09_2019_Nots_ParentGene_Energies")

#This is the directory collecting all of the families' results from this run.
families_analysis_repository=os.path.join(time_devolution_directory,f"AWSEM_Energy_Calculations/\
{datetime.today().strftime('%m_%d_%Y')}_Global_AWSEM_Energy_Calculation_Search_Complete_Coverage_Translated_Pseudogene_Candidates")

try:
    os.mkdir(families_analysis_repository)
except:
    pass                                      

In [3]:
#This is a modification of an excerpt from the openawsem script '/Users/hanajaafari/Desktop/openawsem-master/helperFunctions/MultCha_prepFrags_index.py' 
#that blasts the pdb sequence. This function will use psiblast to find the known full structure of the given 
#parent protein sequence if it exists.
def blast_pdb(protein_identifier,protein_sequence,open_awsem,psiblast_repository):
    if not os.path.exists(f"./{protein_identifier}_psiblast_identified_structures.txt"):
        print("#####")
        print(f"Blasting {protein_identifier} to Find Its Complete Structures")
        print("#####")
        print("The protein sequence is", protein_sequence)      
        with open(f"./{protein_identifier}_sequence.fasta","w") as output:
            output.write(">{}\n{}\n".format(protein_identifier,protein_sequence))
            
        pdb_ID_log=[]    
        database=os.path.join(open_awsem,"pdbDB")
        # blast the whole sequence to identify homologs Evalue 0.005
        exeline = f"{psiblast_repository}/psiblast -num_iterations 1 -word_size 3 -evalue 0.005"
        exeline += " -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db " + \
            database + f" -query ./{protein_identifier}_sequence.fasta"
        print("finding homologs")
        print("executing::: " + exeline)
        homoOut = os.popen(exeline).read()
        homoOut = homoOut.splitlines()  # now an array
        for line in homoOut:
            entries = line.split()
            print(f"homologs: {entries}")
            if len(entries):
                with open(f"./{protein_identifier}_psiblast_identified_structures.txt","a") as f:
                    f.write("{}\n".format(entries))
                pdb_ID_log.append([entries[0],float(entries[5]),float(entries[6])])
    else:
        pdb_ID_log=[]
        with open(f"./{protein_identifier}_psiblast_identified_structures.txt","r") as f:
            for line in f:
                line=line.replace("'","").replace(",","").replace("[","").replace("]","")
                pdb_ID_log.append([line.split()[0],float(line.split()[5]),float(line.split()[6])])
    print(f"The homologs for {protein_identifier} are: {pdb_ID_log}")
    return pdb_ID_log

In [4]:
#This function extracts the PDB crystal structures that meet or exceed the percentage identity to a given parent 
#protein sequence.If there is not a crystal structure for the parent protein with 100% sequence identity, 
#then the parent protein structure will be predicted using AWSEM.
def find_best_pdb_for_sequence(protein_identifier,blast_results,pdb_database,open_awsem):
    import pypdb as py
    import urllib
    qualified_pdb=[]
    if not os.path.exists(f"./{protein_identifier}_psiblast_identified_x-ray_diffraction_structure.txt"):
        print("#####")
        print(f"Finding Qualifying Structure for {protein_identifier}")
        print("#####")
        if not blast_results:
            return
        qualified_blast_results=[[i,j,k] for [i,j,k] in blast_results if j == 100.0 and k==100.0]
        if not qualified_blast_results:
            print("None of the blast hits are exact matches!")
            return
        #We now loop over to find entries whose crystal structure is determined through X-ray diffraction. We try to find 
        #an entry with a crystal structure. Once these conditions are met, we break from the loop below.
        for [pdb_result,pdb_result_percent_identity,pdb_result_sequence_coverage] in qualified_blast_results:
            pdb_information=py.describe_pdb(pdb_result[:4])
            if "X-RAY DIFFRACTION" in pdb_information.values():
                try:
                    if not pdb_result[:4] in open(f"{open_awsem}/notExistPDBsList").read():
                        urllib.request.urlretrieve(f"https://files.rcsb.org/download/{pdb_result[:4].upper()}.pdb",f"{pdb_database}/{pdb_result[:4].upper()}.pdb")
                        qualified_pdb= pdb_result.upper()
                        with open(f"./{protein_identifier}_psiblast_identified_x-ray_diffraction_structure.txt","a") as f:
                            f.write("{} {} \n".format(str(pdb_result.upper()),str(pdb_result_percent_identity)))
                        print("Run was successful")
                        break
                except:
                    print(":::Cannot build PDB for PDB ID, failed to download:" + pdb_result[:4])
                    os.system(f"echo '{pdb_result[:4]}' >> {open_awsem}/notExistPDBsList")
    else:
        with open(f"./{protein_identifier}_psiblast_identified_x-ray_diffraction_structure.txt","r") as f:
            for line in f:
                qualified_pdb = (line.strip("\n").split()[0])
                pdb_result_percent_identity=(line.strip("\n").split()[1])
    if not qualified_pdb:
        print(f"There are no qualifying crystal PDB structures for {protein_identifier}")
        return None
    else:
        print(f"The crystal structure of {protein_identifier} is: {qualified_pdb}")
        return qualified_pdb

In [5]:
#We set-up the necessary files to run the AWSEM simulation on Nots.
def AWSEM_calculations_setup(protein_identifier,open_awsem,parent_protein_folder,pdb_database,
                             fragment_memory,existing_structure,copy_ssweight):
    if not os.path.exists(f"./{protein_identifier}_openawsem_output.txt"):
        print("#####")
        print(f"Preparing OpenAWSEM Files for {protein_identifier}")
        print("#####")  

        with open(f"./{protein_identifier}_openawsem_output.txt", "w") as output:
            commands=["python",f"{open_awsem}/mm_create_project.py"]
            if existing_structure:
                commands.extend([f"{pdb_database}/{protein_identifier[:4]}.pdb", "--extended"])
                if protein_identifier[4:]:
                    commands.extend(["--c", f"{protein_identifier[4:]}"])
            else:
                commands.extend([f"./{protein_identifier}.fasta", "--predict_ssweight_from_fasta"])
            if fragment_memory:
                commands.append("--frag")
            print("The commands are: "); print(commands)
            subprocess.call(commands,stdout=output)
            #In order to ensure that the difference in AWSEM energy is due to changes in the contact energy, 
            #we copy the ssweight and ssweight.stride files below from the parent folder (these can change the AWSEM rama term)
            if copy_ssweight:
                copyfile(f"{parent_protein_folder}/ssweight.stride","./ssweight.stride")
                copyfile(f"{parent_protein_folder}/ssweight","./ssweight")

In [6]:
#Here we align the parent protein crystal structure sequence to the family MSA and mutate it in regions of 
#difference betweent aligned parent ENSEMBL sequence and the pseudogene sequence.
def align_parentprotein_structure_sequence(protein_family, protein_file_name, hmm_files,family_hmm_files):
    if not os.path.exists(f"./{protein_file_name}_aligned.sto"):
        print("#####")
        print(f"Aligning {protein_file_name} to {protein_family} Family MSA")
        print("#####")
        #The fasta file is created during the AWSEM energy calculations by "mm_create_project.py"
        cmd= [f"{hmm_files}/hmmalign {family_hmm_files}/{protein_family}.hmm ./{protein_file_name[:4]}.fasta"]
        with open(f"./{protein_file_name}_aligned.sto", "wb", 0) as output_file:
            subprocess.check_call(cmd, stdout=output_file,shell=True)

In [7]:
#The function below transform files from the stockholm to fasta format.
def pseudofam_convert_stockholm_to_fasta(input_file_name, output_file_name=None):
    from Bio import AlignIO

    if output_file_name == None:
        output_file_name = input_file_name.rstrip(".sto") + ".fasta"

    input_handle = open(input_file_name, "rU")
    output_handle = open(output_file_name, "w")

    alignments = AlignIO.parse(input_handle, "stockholm")
    AlignIO.write(alignments, output_handle, "fasta")

    output_handle.close()
    input_handle.close()

In [8]:
def pseudofam_get_sublist_complement(list_name, indices_list):
    return [list_name[i] for i in range(len(list_name)) if i not in indices_list]

In [9]:
def pseudofam_filter_inserts_and_gaps(input_file_name, gap_threshold=0.25):
    from Bio import AlignIO
    # rewrite Stockholm alignment in FASTA format
    input_handle = open(input_file_name, "rU")
    alignment = AlignIO.read(input_handle, "fasta")
    input_handle.close()

    output_handle = open(input_file_name.rstrip(".fasta")+"_gaps_and_inserts_filtered_up.fasta", "w")

    index_mask = []
    for i, record in enumerate(alignment): #HJ: This associates a number to each element
        index_mask += [i for i, x in enumerate(list(record.seq)) if x != x.upper()] #HJ: This logs each number 
        #(an index essentially) to each element in a string (here the sequence of a gene in the alignment) if 
        #the element is not equal to its uppercase (so essentially any lowercase letter, which represent any 
        #unaligned letters. Gaps meet this condition). This mask should not remove aligned sequences.
        
        #I define indexes of both the preserved and removed residues in the aligned sequence.
        #This necessary to "create" a pseudogene sequence from the parent pdb sequence with the 
        #subsitutions found when comapring the two aligned sequences.
        sequence=[x for i, x in enumerate(list(record.seq)) if x != "-"]
        print("The sequence without dashes is ", sequence)
        unaligned_mask=[i for i, x in enumerate(list(sequence)) if x != x.upper()]
        removed_residues=[x for i, x in enumerate(list(sequence)) if x != x.upper()]
        aligned_mask=[i for i, x in enumerate(list(sequence)) if x == x.upper()]
    index_mask = sorted(list(set(index_mask)));aligned_mask=sorted(list(set(aligned_mask)))
    unaligned_mask=sorted(list(set(unaligned_mask)))
    sequences_passed_threshold = 0
    for i, record in enumerate(alignment):
        aligned_sequence = pseudofam_get_sublist_complement(list(record.seq), index_mask)
        if float(aligned_sequence.count("-"))/len(aligned_sequence) < gap_threshold:
            output_handle.write(">%s\n" % record.id + "".join(aligned_sequence)+"\n")
            sequences_passed_threshold += 1
    print("%d sequences passed threshold." % sequences_passed_threshold)
    output_handle.close()
    return removed_residues, unaligned_mask, aligned_mask

In [10]:
#This function seeks to find the substitutions (ignoring any dashes) between the pseudogene and the parent 
#structure sequence.This function then builds a pseudogene structure sequence that is the parent structure 
#sequence featuring these subsitutions. 
def mutate_and_map_pdb_sequence(protein_file_name,pseudogene_identifier,protein_family,removed_residues,
                                unaligned_mask,aligned_mask,aligned_protein_repository):
    #Here we extract the aligned pseudogene sequence and the aligned parent structure sequence
    with open(f"{aligned_protein_repository}/{protein_family}_pseudogenes_aligned_gaps_and_inserts_filtered_up.fasta","r") as f:
        for line in f:
            if pseudogene_identifier in line:
                aligned_protein_sequence=next(f)
                print("The aligned pseudogene sequence is: {} \n".format(aligned_protein_sequence))
    with open(f"./{protein_file_name}_aligned_gaps_and_inserts_filtered_up.fasta","r") as f:
        for line in f:
            if ">" in line:
                aligned_parent_structure_sequence=next(f)
                print("The aligned parent sequence is: {} \n".format(aligned_parent_structure_sequence))

    print("The removed residues for the parent are: {} \n".format(removed_residues))
    print("The indices of the removed residues for the parent are: {} \n".format(unaligned_mask))
    print("The indices of the remaining residues for the parent are: {} \n".format(aligned_mask))
    substitutions_index=[i for i in range(len(aligned_parent_structure_sequence)) if \
                         aligned_parent_structure_sequence[i] != aligned_protein_sequence[i] and \
                         aligned_parent_structure_sequence[i] != "-" and aligned_protein_sequence[i] != "-"]
    print("The substitution indices are: {} \n".format(substitutions_index))
    #Here we mutate the parent structure sequence based on substitutions identified from comparing 
    #the aligned sequences (indices determined immediately above)
    aligned_parent_structure_sequence=list(aligned_parent_structure_sequence)
    for i in substitutions_index:
        aligned_parent_structure_sequence[i]=aligned_protein_sequence[i]
    aligned_parent_structure_sequence_with_gaps="".join(aligned_parent_structure_sequence)
    print("The mutated, aligned parent/homolog sequence is (with gaps): {} \n".format(aligned_parent_structure_sequence_with_gaps))
    aligned_parent_structure_sequence_without_gaps=aligned_parent_structure_sequence_with_gaps.replace("-","")
    print("The mutated, aligned parent/homolog sequence is (without gaps): {} \n".format(aligned_parent_structure_sequence_without_gaps))
        
    #Here we "build" the pseudogene structure sequence.
    mutated_structure_sequence=[i for i in range(len(unaligned_mask)+len(aligned_mask))]
    for (i, j) in zip(unaligned_mask,removed_residues):
        mutated_structure_sequence[i]=j.upper()
    for (i, j) in zip(aligned_mask,aligned_parent_structure_sequence_without_gaps):
        mutated_structure_sequence[i]=j      
    print("The pseudogene sequence is: {} \n".format("".join(mutated_structure_sequence)))
    
    return "".join(mutated_structure_sequence),len(substitutions_index)

In [11]:
def protein_pairs_AWSEM_setup(parent_identifier,protein_family,fragment_memory,
                              open_awsem,hmm_files,family_hmm_files,psiblast_repository,family_repository,
                              aligned_pseudogene_repository,aligned_parentprotein_repository):
    #We begin by setting up for the pair's parent protein AWSEM calculation.
    try:
        parent_protein_folder=os.path.join(family_repository,parent_identifier)
        os.mkdir(parent_protein_folder)            
    except OSError:
        pass
    os.chdir(parent_protein_folder)
    #Here we find the homologs for the parent sequence after extracting it below. As these are sequences I haven't
    #analyzed before, I need to retrieve them from Ensembl.
    server = f"https://rest.ensembl.org/sequence/id/{parent_identifier}?"
    r = requests.get(server, headers={ "Content-Type" : "text/plain"})
    parent_sequence=r.text
    
    #We check to see if the parent protein has a complete crystal structure in PDB.
    psiblast_PDB_results=blast_pdb(parent_identifier,parent_sequence,open_awsem,psiblast_repository) 
    qualified_PDB_id=find_best_pdb_for_sequence(parent_identifier,psiblast_PDB_results,os.path.join(open_awsem,"PDBs"),open_awsem)
    if qualified_PDB_id:
        with open("../../protein_family_parent_protein_crystal_structures.txt","a") as f:
            f.write("{} {} {} \n".format(protein_family,parent_identifier, qualified_PDB_id))
    #     if qualified_PDB_id:
    #         #Here we modify the PDB file to include only the relevant chains and remove any heteratoms.
    #         file_name=qualified_PDB_id; existing_structure=True
    #     else:
    #         file_name=f"{parent_identifier}_sequence"; existing_structure=False

    #     AWSEM_calculations_setup(file_name,open_awsem,os.getcwd(),os.path.join(open_awsem,"PDBs"),
    #                              fragment_memory,existing_structure,copy_ssweight=False)                                                                                               
    #     #The following lines of code align the parent protein sequence to the family MSA and identify substitution 
    #     #sites between the pseudogene and aligned parent sequence (mapping it to the full parent protein sequence).
    #     align_parentprotein_structure_sequence(protein_family, file_name, hmm_files,family_hmm_files)
    #     pseudofam_convert_stockholm_to_fasta(f"./{file_name}_aligned.sto")            
    #     removed_residues,removed_residue_indices,remaining_residue_indices = pseudofam_filter_inserts_and_gaps(f"./{file_name}_aligned.fasta",
    #                                                                                                              gap_threshold=.25)
    #     pseudogene_sequence, parent_protein_pseudogene_substitution_count= mutate_and_map_pdb_sequence(file_name,
    #                                                                                                    pseudogene_identifier,
    #                                                                                                    protein_family,
    #                                                                                                    removed_residues,
    #                                                                                                    removed_residue_indices,
    #                                                                                                    remaining_residue_indices,
    #                                                                                                    aligned_pseudogene_repository)


    #     ###
    #     #Now we begin setting up for the pair's pseudogene AWSEM calculation.
    #     try:
    #         pseudogene_folder=os.path.join(family_repository,pseudogene_identifier)
    #         os.mkdir(pseudogene_folder)            
    #     except OSError:
    #         pass
    #     os.chdir(pseudogene_folder)
    #     with open(f"./{pseudogene_identifier}.fasta","w") as f:
    #         f.write(">{}\n{}".format(pseudogene_identifier,pseudogene_sequence))
    #     AWSEM_calculations_setup(pseudogene_identifier,open_awsem,parent_protein_folder,os.path.join(open_awsem,"PDBs"),
    #                              fragment_memory,existing_structure=False,copy_ssweight=True)

    #     ###
    #     with open(f"{family_repository}/{protein_family}_parentprotein_pseudogene_pair_substitutions_AWSEM_energies.txt", "a") as f:
    #         f.write("{} {} {} \n".format(parent_identifier,pseudogene_identifier,str(parent_protein_pseudogene_substitution_count)))


In [12]:
def family_protein_pair_AWSEM_setup(protein_family,parent_protein,fragment_memory,
                                    AWSEM_results_directory=families_analysis_repository,
                                    open_awsem=openawsem_repository,hmm_files=hmm_script_repository,
                                    family_hmm_files=family_hmm_repository,psiblast_repository=ncbi_blast_repository,
                                    aligned_pseudogene_repository=family_pseudogene_aligned_files_repository,
                                    aligned_parentprotein_repository=family_parentprotein_aligned_files_repository):
    print("#####")
    print(f"Beginning Setup for AWSEM Calculations of {protein_family} Pseudogene-Parent Protein Pair")
    print("#####")      
    
    #We first create a repository for the selected protein family to store all of the files produced by this program,
    print(AWSEM_results_directory)
    protein_family_repository=os.path.join(AWSEM_results_directory,protein_family)
    try:
        os.mkdir(protein_family_repository)
    except OSError:
        pass
    os.chdir(protein_family_repository)
    
    protein_pairs_AWSEM_setup(parent_protein,protein_family,fragment_memory,
                              open_awsem,hmm_files,family_hmm_files,psiblast_repository,protein_family_repository,
                              aligned_pseudogene_repository,aligned_parentprotein_repository)

### Protein families (listed below) are extracted from the results Michael found from the PsiDR data. These are families with at least more than one pseudogene sharing the same parent protein with indications that they can be translated.

In [13]:
qualified_protein_families={"PF05649":"ENSP00000302051","PF03719":"ENSP00000341885","PF01431":"ENSP00000302051",
                            "PF00333":"ENSP00000341885"}
os.chdir(families_analysis_repository)

for i in qualified_protein_families:
    family_protein_pair_AWSEM_setup(i,qualified_protein_families[i],False)
            

#####
Beginning Setup for AWSEM Calculations of PF05649 Pseudogene-Parent Protein Pair
#####
/Users/hanajaafari/Desktop/Time-Devolution-Calc/AWSEM_Energy_Calculations/06_24_2020_Global_AWSEM_Energy_Calculation_Search_Complete_Coverage_Translated_Pseudogene_Candidates
#####
Blasting ENSP00000302051 to Find Its Complete Structures
#####
The protein sequence is MEPPYSLTAHYDEFQEVKYVSRCGAGGARGASLPPGFPLGAARSATGARSGLPRWNRREVCLLSGLVFAAGLCAILAAMLALKYLGPVAAGGGACPEGCPERKAFARAARFLAANLDASIDPCQDFYSFACGGWLRRHAIPDDKLTYGTIAAIGEQNEERLRRLLARPGGGPGGAAQRKVRAFFRSCLDMREIERLGPRPMLEVIEDCGGWDLGGAEERPGVAARWDLNRLLYKAQGVYSAAALFSLTVSLDDRNSSRYVIRIDQDGLTLPERTLYLAQDEDSEKILAAYRVFMERVLSLLGADAVEQKAQEILQVEQQLANITVSEHDDLRRDVSSMYNKVTLGQLQKITPHLRWKWLLDQIFQEDFSEEEEVVLLATDYMQQVSQLIRSTPHRVLHNYLVWRVVVVLSEHLSPPFREALHELAQEMEGSDKPQELARVCLGQANRHFGMALGALFVHEHFSAASKAKVQQLVEDIKYILGQRLEELDWMDAETRAAARAKLQYMMVMVGYPDFLLKPDAVDKEYEFEVHEKTYFKNILNSIRFSIQLSVKKIRQEVDKSTWLLPPQALNAYYLPNKNQMVFPAGILQPTLYDPDFPQSLNYGGIGTIIGHELTHGYDDWGGQYDRSGNLLHWWTEAS

There are no qualifying crystal PDB structures for ENSP00000341885
#####
Beginning Setup for AWSEM Calculations of PF01431 Pseudogene-Parent Protein Pair
#####
/Users/hanajaafari/Desktop/Time-Devolution-Calc/AWSEM_Energy_Calculations/06_24_2020_Global_AWSEM_Energy_Calculation_Search_Complete_Coverage_Translated_Pseudogene_Candidates
#####
Blasting ENSP00000302051 to Find Its Complete Structures
#####
The protein sequence is MEPPYSLTAHYDEFQEVKYVSRCGAGGARGASLPPGFPLGAARSATGARSGLPRWNRREVCLLSGLVFAAGLCAILAAMLALKYLGPVAAGGGACPEGCPERKAFARAARFLAANLDASIDPCQDFYSFACGGWLRRHAIPDDKLTYGTIAAIGEQNEERLRRLLARPGGGPGGAAQRKVRAFFRSCLDMREIERLGPRPMLEVIEDCGGWDLGGAEERPGVAARWDLNRLLYKAQGVYSAAALFSLTVSLDDRNSSRYVIRIDQDGLTLPERTLYLAQDEDSEKILAAYRVFMERVLSLLGADAVEQKAQEILQVEQQLANITVSEHDDLRRDVSSMYNKVTLGQLQKITPHLRWKWLLDQIFQEDFSEEEEVVLLATDYMQQVSQLIRSTPHRVLHNYLVWRVVVVLSEHLSPPFREALHELAQEMEGSDKPQELARVCLGQANRHFGMALGALFVHEHFSAASKAKVQQLVEDIKYILGQRLEELDWMDAETRAAARAKLQYMMVMVGYPDFLLKPDAVDKEYEFEVHEKTYFKNILNSIRFSIQLSVKKIRQEVDKSTWLLPPQALNA

There are no qualifying crystal PDB structures for ENSP00000341885


### As shown above, no parent proteins have a complete crystal structure. I repeat the process again but this time try to find any protein families with a parent protein with a complete crystal structure.

In [14]:
qualified_protein_families={'PF15433': 'ENSP00000315397', 'PF00578': 'ENSP00000298510', 'PF10417': 'ENSP00000298510', 'PF00478': 'ENSP00000321584', 'PF00571': 'ENSP00000321584', 'PF13639': 'ENSP00000426503', 'PF00160': 'ENSP00000361918', 'PF00956': 'ENSP00000417919', 'PF01431': 'ENSP00000302051', 'PF05649': 'ENSP00000302051', 'PF00501': 'ENSP00000350012', 'PF00400': 'ENSP00000359149', 'PF05191': 'ENSP00000371230', 'PF00406': 'ENSP00000371230', 'PF03953': 'ENSP00000326042', 'PF00091': 'ENSP00000326042', 'PF07686': 'ENSP00000312158', 'PF11627': 'ENSP00000333504', 'PF00076': 'ENSP00000350877', 'PF00333': 'ENSP00000341885', 'PF03719': 'ENSP00000341885', 'PF03465': 'ENSP00000353741', 'PF03463': 'ENSP00000353741', 'PF03464': 'ENSP00000353741', 'PF10409': 'ENSP00000361021', 'PF00782': 'ENSP00000380530', 'PF00581': 'ENSP00000380530', 'PF00038': 'ENSP00000293308', 'PF16208': 'ENSP00000293308', 'PF01652': 'ENSP00000389624', 'PF11717': 'ENSP00000312244', 'PF05712': 'ENSP00000312244', 'PF02023': 'ENSP00000355053', 'PF13405': 'ENSP00000362807', 'PF13499': 'ENSP00000362807', 'PF00935': 'ENSP00000404375', 'PF03946': 'ENSP00000354739', 'PF00298': 'ENSP00000354739', 'PF14904': 'ENSP00000432790', 'PF17832': 'ENSP00000360365', 'PF01472': 'ENSP00000360365', 'PF00198': 'ENSP00000335304', 'PF00364': 'ENSP00000335304', 'PF01246': 'ENSP00000260443', 'PF12937': 'ENSP00000359149', 'PF07654': 'ENSP00000351621', 'PF00012': 'ENSP00000432884', 'PF08075': 'ENSP00000409773', 'PF12934': 'ENSP00000418823', 'PF12933': 'ENSP00000418823', 'PF03066': 'ENSP00000377408', 'PF00248': 'ENSP00000235835', 'PF00179': 'ENSP00000429419', 'PF00118': 'ENSP00000275603', 'PF13837': 'ENSP00000355053', 'PF00096': 'ENSP00000355053', 'PF00210': 'ENSP00000273550', 'PF03143': 'ENSP00000339063', 'PF03144': 'ENSP00000339063', 'PF00009': 'ENSP00000339063', 'PF01187': 'ENSP00000385714', 'PF01459': 'ENSP00000265333', 'PF01015': 'ENSP00000424547', 'PF01283': 'ENSP00000348849', 'PF00615': 'ENSP00000426255', 'PF00787': 'ENSP00000426255', 'PF08628': 'ENSP00000426255', 'PF02194': 'ENSP00000426255', 'PF01158': 'ENSP00000378081', 'PF01248': 'ENSP00000230050', 'PF00723': 'ENSP00000362646', 'PF03357': 'ENSP00000223500', 'PF13765': 'ENSP00000312158', 'PF00622': 'ENSP00000312158', 'PF10559': 'ENSP00000243253', 'PF00344': 'ENSP00000243253', 'PF01199': 'ENSP00000378163', 'PF06229': 'ENSP00000226798', 'PF00071': 'ENSP00000387286'}
os.chdir(families_analysis_repository)

for i in qualified_protein_families:
    family_protein_pair_AWSEM_setup(i,qualified_protein_families[i],False)

#####
Beginning Setup for AWSEM Calculations of PF15433 Pseudogene-Parent Protein Pair
#####
/Users/hanajaafari/Desktop/Time-Devolution-Calc/AWSEM_Energy_Calculations/06_24_2020_Global_AWSEM_Energy_Calculation_Search_Complete_Coverage_Translated_Pseudogene_Candidates
#####
Blasting ENSP00000315397 to Find Its Complete Structures
#####
The protein sequence is MFPRVSTFLPLRPLSRHPLSSGSPETSAAAIMLLTVRHGTVRYRSSALLARTKNNIQRYFGTNSVICSKKDKQSVRTEETSKETSESQDSEKENTKKDLLGIIKGMKVELSTVNVRTTKPPKRRPLKSLEATLGRLRRATEYAPKKRIEPLSPELVAAASAVADSLPFDKQTTKSELLSQLQQHEEESRAQRDAKRPKISFSNIISDMKVARSATARVRSRPELRIQFDEGYDNYPGQEKTDDLKKRKNIFTGKRLNIFDMMAVTKEAPETDTSPSLWDVEFAKQLATVNEQPLQNGFEELIQWTKEGKLWEFPINNEAGFDDDGSEFHEHIFLEKHLESFPKQGPIRHFMELVTCGLSKNPYLSVKQKVEHIEWFRNYFNEKKDILKESNIQFN
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/op

#####
Blasting ENSP00000298510 to Find Its Complete Structures
#####
The protein sequence is MAAAVGRLLRASVARHVSAIPWGISATAALRPAACGRTSLTNLLCSGSSQAKLFSTSSSCHAPAVTQHAPYFKGTAVVNGEFKDLSLDDFKGKYLVLFFYPLDFTFVCPTEIVAFSDKANEFHDVNCEVVAVSVDSHFSHLAWINTPRKNGGLGHMNIALLSDLTKQISRDYGVLLEGSGLALRGLFIIDPNGVIKHLSVNDLPVGRSVEETLRLVKAFQYVETHGEVCPANWTPDSPTIKPSPAASKEYFQKVNQ
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000298510_sequence.fasta
homologs: ['5jcg_I', '201', '408', '1048', '1.18e-145', '100.000', '76']
homologs: ['5jcg_H', '201', '408', '1048', '1.18e-145', '100.000', '76']
homologs: ['5jcg_G', '201', '408', '1048', '1.18e-145', '100.000', '76']
homologs: ['5jcg_F', '201', '408', '1048', '1.18e-145', '100.000', '76']
homologs: ['5jcg_E', '201', '408', '1048', '1.18e-145', '100.

#####
Blasting ENSP00000321584 to Find Its Complete Structures
#####
The protein sequence is MADYLISGGTSYVPDDGLTAQQLFNCGDGLTYNDFLILPGYIDFTADQVDLTSALTKKITLKTPLVSSPMDTVTEAGMAIAMALTGGIGFIHHNCTPEFQANEVRKVKKYEQGFITDPVVLSPKDRVRDVFEAKARHGFCGIPITDTGRMGSRLVGIISSRDIDFLKEEEHDCFLEEIMTKREDLVVAPAGITLKEANEILQRSKKGKLPIVNEDDELVAIIARTDLKKNRDYPLASKDAKKQLLCGAAIGTHEDDKYRLDLLAQAGVDVVVLDSSQGNSIFQINMIKYIKDKYPNLQVIGGNVVTAAQAKNLIDAGVDALRVGMGSGSICITQEVLACGRPQATAVYKVSEYARRFGVPVIADGGIQNVGHIAKALALGASTVMMGSLLAATTEAPGEYFFSDGIRLKKYRGMGSLDAMDKHLSSQNRYFSEADKIKVAQGVSGAVQDKGSIHKFVPYLIAGIQHSCQDIGAKSLTQVRAMMYSGELKFEKRTSSAQVEGGVHSLHSYEKRLF
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000321584_sequence.fasta
homologs: ['6udq_H', '519', '1056', '2730', '0.0', '100.000', '100']
homologs: ['6udq_D', '519'

Run was successful
The crystal structure of ENSP00000321584 is: 1NFB_B
#####
Beginning Setup for AWSEM Calculations of PF00571 Pseudogene-Parent Protein Pair
#####
/Users/hanajaafari/Desktop/Time-Devolution-Calc/AWSEM_Energy_Calculations/06_24_2020_Global_AWSEM_Energy_Calculation_Search_Complete_Coverage_Translated_Pseudogene_Candidates
#####
Blasting ENSP00000321584 to Find Its Complete Structures
#####
The protein sequence is MADYLISGGTSYVPDDGLTAQQLFNCGDGLTYNDFLILPGYIDFTADQVDLTSALTKKITLKTPLVSSPMDTVTEAGMAIAMALTGGIGFIHHNCTPEFQANEVRKVKKYEQGFITDPVVLSPKDRVRDVFEAKARHGFCGIPITDTGRMGSRLVGIISSRDIDFLKEEEHDCFLEEIMTKREDLVVAPAGITLKEANEILQRSKKGKLPIVNEDDELVAIIARTDLKKNRDYPLASKDAKKQLLCGAAIGTHEDDKYRLDLLAQAGVDVVVLDSSQGNSIFQINMIKYIKDKYPNLQVIGGNVVTAAQAKNLIDAGVDALRVGMGSGSICITQEVLACGRPQATAVYKVSEYARRFGVPVIADGGIQNVGHIAKALALGASTVMMGSLLAATTEAPGEYFFSDGIRLKKYRGMGSLDAMDKHLSSQNRYFSEADKIKVAQGVSGAVQDKGSIHKFVPYLIAGIQHSCQDIGAKSLTQVRAMMYSGELKFEKRTSSAQVEGGVHSLHSYEKRLF
finding homologs
executing::: /Users/hanajaafari/Desk

Run was successful
The crystal structure of ENSP00000321584 is: 1NFB_B
#####
Beginning Setup for AWSEM Calculations of PF13639 Pseudogene-Parent Protein Pair
#####
/Users/hanajaafari/Desktop/Time-Devolution-Calc/AWSEM_Energy_Calculations/06_24_2020_Global_AWSEM_Energy_Calculation_Search_Complete_Coverage_Translated_Pseudogene_Candidates
#####
Blasting ENSP00000426503 to Find Its Complete Structures
#####
The protein sequence is MSTRKRRGGAINSRQAQKRTREATSTPEISLEAEPIELVETAGDEIVDLTCESLEPVVVDLTHNDSVVIVDERRRPRRNARRLPQDHADSCVVSSDDEELSRDRDVYVTTHTPRNARDEGATGLRPSGTVSCPICMDGYSEIVQNGRLIVSTECGHVFCSQCLRDSLKNANTCPTCRKKINHKRYHPIYI
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000426503_sequence.fasta
homologs: ['4ppe_B', '76', '154', '389', '2.77e-48', '97.260', '38']
homologs: [

#####
Blasting ENSP00000417919 to Find Its Complete Structures
#####
The protein sequence is MSLPESPHSPATLDYALEDPHQGQRSREKSKATEVMADMFDGRLEPIVFPPPRLPEEGVAPQDPADGGHTFHILVDAGRSHGAIKAGQEVTPPPAEGLEAASASLTTDGSLKNGFPGEETHGLGGEKALETCGAGRSESEVIAEGKAEDVKPEECAMFSAPVDEKPGGEEMDVAEENRAIDEVNREAGPGPGPGPLNVGLHLNPLESIQLELDSVNAEADRALLQVERRFGQIHEYYLEQRNDIIRNIPGFWVTAFRHHPQLSAMIRGQDAEMLSYLTNLEVKELRHPRTGCKFKFFFQRNPYFRNKLIVKVYEVRSFGQVVSFSTLIMWRRGHGPQSFIHRNRHVICSFFTWFSDHSLPESDRIAQIIKEDLWSNPLQYYLLGEDAHRARRRLVREPVEIPRPFGFQCG
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000417919_sequence.fasta
homologs: ['2e50_Q', '225', '153', '386', '3.85e-43', '37.766', '44']
homologs: ['2e50_P', '225', '153', '386', '3.85e-43', '37.766', '44']
homologs: ['2e50_B', '225', '153', '386', '3.85e-43', '37.7

#####
Blasting ENSP00000350012 to Find Its Complete Structures
#####
The protein sequence is MNNHVSSKPSTMKLKHTINPILLYFIHFLISLYTILTYIPFYFFSESRQEKSNRIKAKPVNSKPDSAYRSVNSLDGLASVLYPGCDTLDKVFTYAKNKFKNKRLLGTREVLNEEDEVQPNGKIFKKVILGQYNWLSYEDVFVRAFNFGNGLQMLGQKPKTNIAIFCETRAEWMIAAQACFMYNFQLVTLYATLGGPAIVHALNETEVTNIITSKELLQTKLKDIVSLVPRLRHIITVDGKPPTWSEFPKGIIVHTMAAVEALGAKASMENQPHSKPLPSDIAVIMYTSGSTGLPKGVMISHSNIIAGITGMAERIPELGEEDVYIGYLPLAHVLELSAELVCLSHGCRIGYSSPQTLADQSSKIKKGSKGDTSMLKPTLMAAVPEIMDRIYKNVMNKVSEMSSFQRNLFILAYNYKMEQISKGRNTPLCDSFVFRKVRSLLGGNIRLLLCGGAPLSATTQRFMNICFCCPVGQGYGLTESAGAGTISEVWDYNTGRVGAPLVCCEIKLKNWEEGGYFNTDKPHPRGEILIGGQSVTMGYYKNEAKTKADFFEDENGQRWLCTGDIGEFEPDGCLKIIDRKKDLVKLQAGEYVSLGKVEAALKNLPLVDNICAYANSYHSYVIGFVVPNQKELTELARKKGLKGTWEELCNSCEMENEVLKVLSEAAISASLEKFEIPVKIRLSPEPWTPETGLVTDAFKLKRKELKTHYQADIERMYGRK
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident 

#####
Blasting ENSP00000359149 to Find Its Complete Structures
#####
The protein sequence is MGSQGRSGPPGNGGPGEGEGGEARKLQEGRVARGKRRKGKGKGKARAGQGGRGSGAEGKPGPQTAKEAAGPGADAGARACPREEAEGGRSVEEGARGIVKGVEGSAGAGKEAQGREYGKKEEWRVRARRREGARPGRAQGRGGQAWADIAGTGVAMAAAAGEEEEEEEAARESAARPAAGPALWRLPEELLLLICSYLDMRALGRLAQVCRWLRRFTSCDLLWRRIARASLNSGFTRLGTDLMTSVPVKERVKVSQNWRLGRCREGILLKWRCSQMPWMQLEDDSLYISQANFILAYQFRPDGASLNRRPLGVFAGHDEDVCHFVLANSHIVSAGGDGKIGIHKIHSTFTVKYSAHEQEVNCVDCKGGIIVSGSRDRTAKVWPLASGRLGQCLHTIQTEDRVWSIAISPLLSSFVTGTACCGHFSPLRIWDLNSGQLMTHLGSDFPPGAGVLDVMYESPFTLLSCGYDTYVRYWDLRTSVRKCVMEWEEPHDSTLYCLQTDGNHLLATGSSYYGVVRLWDRRQRACLHAFPLTSTPLSSPVYCLRLTTKHLYAALSYNLHVLDFQNP
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000359149_sequence.fasta
homologs: ['2ovr_B', '445', '71.2', '173',

#####
Blasting ENSP00000371230 to Find Its Complete Structures
#####
The protein sequence is MGASARLLRAVIMGAPGSGKGTVSSRITTHFELKHLSSGDLLRDNMLRGTEIGVLAKAFIDQGKLIPDDVMTRLALHELKNLTQYSWLLDGFPRTLPQAEALDRAYQIDTVINLNVPFEVIKQRLTARWIHPASGRVYNIEFNPPKTVGIDDLTGEPLIQREDDKPETVIKRLKAYEDQTKPVLEYYQKKGVLETFSGTETNKIWPYVYAFLQTKVPQRSQKASVTP
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000371230_sequence.fasta
homologs: ['1zd8_A', '227', '468', '1203', '2.21e-169', '100.000', '100']
homologs: ['2ak3_B', '226', '438', '1127', '6.48e-158', '92.478', '99']
homologs: ['2ak3_A', '226', '438', '1127', '6.48e-158', '92.478', '99']
homologs: ['2bbw_B', '246', '286', '731', '3.32e-97', '59.729', '97']
homologs: ['2bbw_A', '246', '286', '731', '3.32e-97', '59.729', '97']
homologs: ['2ar7_B', '24

Run was successful
The crystal structure of ENSP00000371230 is: 1ZD8_A
#####
Beginning Setup for AWSEM Calculations of PF00406 Pseudogene-Parent Protein Pair
#####
/Users/hanajaafari/Desktop/Time-Devolution-Calc/AWSEM_Energy_Calculations/06_24_2020_Global_AWSEM_Energy_Calculation_Search_Complete_Coverage_Translated_Pseudogene_Candidates
#####
Blasting ENSP00000371230 to Find Its Complete Structures
#####
The protein sequence is MGASARLLRAVIMGAPGSGKGTVSSRITTHFELKHLSSGDLLRDNMLRGTEIGVLAKAFIDQGKLIPDDVMTRLALHELKNLTQYSWLLDGFPRTLPQAEALDRAYQIDTVINLNVPFEVIKQRLTARWIHPASGRVYNIEFNPPKTVGIDDLTGEPLIQREDDKPETVIKRLKAYEDQTKPVLEYYQKKGVLETFSGTETNKIWPYVYAFLQTKVPQRSQKASVTP
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000371230_sequence.fasta
homologs: ['1zd8_A', '227', '468', '1203', 

Run was successful
The crystal structure of ENSP00000371230 is: 1ZD8_A
#####
Beginning Setup for AWSEM Calculations of PF03953 Pseudogene-Parent Protein Pair
#####
/Users/hanajaafari/Desktop/Time-Devolution-Calc/AWSEM_Energy_Calculations/06_24_2020_Global_AWSEM_Energy_Calculation_Search_Complete_Coverage_Translated_Pseudogene_Candidates
#####
Blasting ENSP00000326042 to Find Its Complete Structures
#####
The protein sequence is MRECISIHVGQAGVQIGNACWELYCLEHGIQPDGQMPSDKTIGGGDDSFNTFFSETGAGKHVPRAVFVDLEPTVVDEVRTGTYRQLFHPEQLITGKEDAANNYARGHYTIGKEIVDLVLDRIRKLADLCTGLQGFLIFHSFGGGTGSGFASLLMERLSVDYGKKSKLEFAIYPAPQVSTAVVEPYNSILTTHTTLEHSDCAFMVDNEAIYDICRRNLDIERPTYTNLNRLIGQIVSSITASLRFDGALNVDLTEFQTNLVPYPRIHFPLATYAPVISAEKAYHEQLSVAEITNACFEPANQMVKCDPRHGKYMACCMLYRGDVVPKDVNAAIATIKTKRTIQFVDWCPTGFKVGINYQPPTVVPGGDLAKVQRAVCMLSNTTAIAEAWARLDHKFDLMYAKRAFVHWYVGEGMEEGEFSEAREDLAALEKDYEEVGVDSVEAEAEEGEEY
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size

#####
Blasting ENSP00000326042 to Find Its Complete Structures
#####
The protein sequence is MRECISIHVGQAGVQIGNACWELYCLEHGIQPDGQMPSDKTIGGGDDSFNTFFSETGAGKHVPRAVFVDLEPTVVDEVRTGTYRQLFHPEQLITGKEDAANNYARGHYTIGKEIVDLVLDRIRKLADLCTGLQGFLIFHSFGGGTGSGFASLLMERLSVDYGKKSKLEFAIYPAPQVSTAVVEPYNSILTTHTTLEHSDCAFMVDNEAIYDICRRNLDIERPTYTNLNRLIGQIVSSITASLRFDGALNVDLTEFQTNLVPYPRIHFPLATYAPVISAEKAYHEQLSVAEITNACFEPANQMVKCDPRHGKYMACCMLYRGDVVPKDVNAAIATIKTKRTIQFVDWCPTGFKVGINYQPPTVVPGGDLAKVQRAVCMLSNTTAIAEAWARLDHKFDLMYAKRAFVHWYVGEGMEEGEFSEAREDLAALEKDYEEVGVDSVEAEAEEGEEY
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000326042_sequence.fasta
homologs: ['5kx5_C', '451', '924', '2389', '0.0', '97.561', '100']
homologs: ['5kx5_A', '451', '924', '2389', '0.0', '97.561', '100']
homologs: ['4x20_C', '451

#####
Blasting ENSP00000312158 to Find Its Complete Structures
#####
The protein sequence is MESAAALHFSRPASLLLLLLSLCALVSAQFIVVGPTDPILATVGENTTLRCHLSPEKNAEDMEVRWFRSQFSPAVFVYKGGRERTEEQMEEYRGRTTFVSKDISRGSVALVIHNITAQENGTYRCYFQEGRSYDEAILHLVVAGLGSKPLISMRGHEDGGIRLECISRGWYPKPLTVWRDPYGGVAPALKEVSMPDADGLFMVTTAVIIRDKSVRNMSCSINNTLLGQKKESVIFIPESFMPSVSPCAVALPIIVVILMIPIAVCIYWINKLQKEKKILSGEKEFERETREIALKELEKERVQKEEELQVKEKLQEELRWRRTFLHAVDVVLDPDTAHPDLFLSEDRRSVRRCPFRHLGESVPDNPERFDSQPCVLGRESFASGKHYWEVEVENVIEWTVGVCRDSVERKGEVLLIPQNGFWTLEMHKGQYRAVSSPDRILPLKESLCRVGVFLDYEAGDVSFYNMRDRSHIYTCPRSAFSVPVRPFFRLGCEDSPIFICPALTGANGVTVPEEGLTLHRVGTHQSL
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000312158_sequence.fasta
homologs: ['4hh8_A', '224', '218', '556', '5.12e-67', '50.704', '40']
homologs: ['

#####
Blasting ENSP00000333504 to Find Its Complete Structures
#####
The protein sequence is MSKSESPKEPEQLRKLFIGGLSFETTDESLRSHFEQWGTLTDCVVMRDPNTKRSRGFGFVTYATVEEVDAAMNARPHKVDGRVVEPKRAVSREDSQRPGAHLTVKKIFVGGIKEDTEEHHLRDYFEQYGKIEVIEIMTDRGSGKKRGFAFVTFDDHDSVDKIVIQKYHTVNGHNCEVRKALSKQEMASASSSQRGRSGSGNFGGGRGGGFGGNDNFGRGGNFSGRGIGDGYNGFGNDGSNFGGGGSYNDFGNYNNQSSNFGPMKGGNFGGRSSGPYGGGGQYFAKPRNQGGYGGSSSSSSYGSGRRF
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000333504_sequence.fasta
homologs: ['1u1r_A', '196', '411', '1056', '4.43e-146', '100.000', '64']
homologs: ['1u1q_A', '196', '411', '1056', '4.43e-146', '100.000', '64']
homologs: ['1u1p_A', '196', '411', '1056', '4.43e-146', '100.000', '64']
homologs: ['1u1o_A', '196', '411', '1056', '4.43e-146', '100.000', '64']
homologs: 

#####
Blasting ENSP00000350877 to Find Its Complete Structures
#####
The protein sequence is MSYGRPPPDVEGMTSLKVDNLTYRTSPDTLRRVFEKYGRVGDVYIPRDRYTKESRGFAFVRFHDKRDAEDAMDAMDGAVLDGRELRVQMARYGRPPDSHHSRRGPPPRRYGGGGYGRRSRSRSRSRSRSRSRS
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000350877_sequence.fasta
homologs: ['2lec_A', '135', '207', '527', '2.23e-69', '100.000', '76']
homologs: ['2leb_A', '135', '207', '527', '2.23e-69', '100.000', '76']
homologs: ['2lea_A', '135', '207', '527', '2.23e-69', '100.000', '76']
homologs: ['2kn4_A', '158', '193', '490', '1.83e-63', '100.000', '70']
homologs: ['2dnm_A', '103', '148', '374', '1.65e-46', '79.570', '70']
homologs: ['1x5s_A', '102', '78.6', '192', '5.94e-19', '43.478', '69']
homologs: ['5tbx_B', '92', '77.4', '189', '1.25e-18

The homologs for ENSP00000341885 are: [['6y57_SC', 100.0, 100.0], ['6y2l_SC', 100.0, 100.0], ['6y0g_SC', 100.0, 100.0], ['6ip8_3G', 100.0, 100.0], ['6ip6_3G', 100.0, 100.0], ['6ip5_3G', 100.0, 100.0], ['6g5i_C', 100.0, 100.0], ['6g5h_C', 100.0, 100.0], ['6g53_C', 100.0, 100.0], ['6g51_C', 100.0, 100.0], ['6g4s_C', 100.0, 100.0], ['6g18_C', 100.0, 100.0], ['6ek0_SC', 100.0, 100.0], ['5vyc_C6', 100.0, 100.0], ['5vyc_C5', 100.0, 100.0], ['5vyc_C4', 100.0, 100.0], ['5vyc_C3', 100.0, 100.0], ['5vyc_C2', 100.0, 100.0], ['5vyc_C1', 100.0, 100.0], ['5t2c_AJ', 100.0, 100.0], ['5oa3_C', 100.0, 100.0], ['5lks_SC', 100.0, 100.0], ['5flx_C', 100.0, 100.0], ['5aj0_BC', 100.0, 100.0], ['5a2q_C', 100.0, 100.0], ['4v6x_AC', 100.0, 100.0], ['4uje_BC', 100.0, 100.0], ['4ujd_CC', 100.0, 100.0], ['4ujc_CC', 100.0, 100.0], ['4ug0_SC', 100.0, 100.0], ['4d61_C', 100.0, 100.0], ['4d5l_C', 100.0, 100.0], ['6sgc_D1', 98.294, 100.0], ['6hcq_D2', 98.294, 100.0], ['6hcm_D1', 98.294, 100.0], ['6hcj_D2', 98.294, 100.

There are no qualifying crystal PDB structures for ENSP00000341885
#####
Beginning Setup for AWSEM Calculations of PF03719 Pseudogene-Parent Protein Pair
#####
/Users/hanajaafari/Desktop/Time-Devolution-Calc/AWSEM_Energy_Calculations/06_24_2020_Global_AWSEM_Energy_Calculation_Search_Complete_Coverage_Translated_Pseudogene_Candidates
The homologs for ENSP00000341885 are: [['6y57_SC', 100.0, 100.0], ['6y2l_SC', 100.0, 100.0], ['6y0g_SC', 100.0, 100.0], ['6ip8_3G', 100.0, 100.0], ['6ip6_3G', 100.0, 100.0], ['6ip5_3G', 100.0, 100.0], ['6g5i_C', 100.0, 100.0], ['6g5h_C', 100.0, 100.0], ['6g53_C', 100.0, 100.0], ['6g51_C', 100.0, 100.0], ['6g4s_C', 100.0, 100.0], ['6g18_C', 100.0, 100.0], ['6ek0_SC', 100.0, 100.0], ['5vyc_C6', 100.0, 100.0], ['5vyc_C5', 100.0, 100.0], ['5vyc_C4', 100.0, 100.0], ['5vyc_C3', 100.0, 100.0], ['5vyc_C2', 100.0, 100.0], ['5vyc_C1', 100.0, 100.0], ['5t2c_AJ', 100.0, 100.0], ['5oa3_C', 100.0, 100.0], ['5lks_SC', 100.0, 100.0], ['5flx_C', 100.0, 100.0], ['5aj0_BC', 1

There are no qualifying crystal PDB structures for ENSP00000341885
#####
Beginning Setup for AWSEM Calculations of PF03465 Pseudogene-Parent Protein Pair
#####
/Users/hanajaafari/Desktop/Time-Devolution-Calc/AWSEM_Energy_Calculations/06_24_2020_Global_AWSEM_Energy_Calculation_Search_Complete_Coverage_Translated_Pseudogene_Candidates
#####
Blasting ENSP00000353741 to Find Its Complete Structures
#####
The protein sequence is MADDPSAADRNVEIWKIKKLIKSLEAARGNGTSMISLIIPPKDQISRVAKMLADEFGTASNIKSRVNRLSVLGAITSVQQRLKLYNKVPPNGLVVYCGTIVTEEGKEKKVNIDFEPFKPINTSLYLCDNKFHTEALTALLSDDSKFGFIVIDGSGALFGTLQGNTREVLHKFTVDLPKKHGRGGQSALRFARLRMEKRHNYVRKVAETAVQLFISGDKVNVAGLVLAGSADFKTELSQSDMFDQRLQSKVLKLVDISYGGENGFNQAIELSTEVLSNVKFIQEKKLIGRYFDEISQDTGKYCFGVEDTLKALEMGAVEILIVYENLDIMRYVLHCQGTEEEKILYLTPEQEKDKSHFTDKETGQEHELIESMPLLEWFANNYKKFGATLEIVTDKSQEGSQFVKGFGGIGGILRYRVDFQGMEYQGGDDEFFDLDDY
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 

Run was successful
The crystal structure of ENSP00000353741 is: 3E1Y_D
#####
Beginning Setup for AWSEM Calculations of PF03464 Pseudogene-Parent Protein Pair
#####
/Users/hanajaafari/Desktop/Time-Devolution-Calc/AWSEM_Energy_Calculations/06_24_2020_Global_AWSEM_Energy_Calculation_Search_Complete_Coverage_Translated_Pseudogene_Candidates
#####
Blasting ENSP00000353741 to Find Its Complete Structures
#####
The protein sequence is MADDPSAADRNVEIWKIKKLIKSLEAARGNGTSMISLIIPPKDQISRVAKMLADEFGTASNIKSRVNRLSVLGAITSVQQRLKLYNKVPPNGLVVYCGTIVTEEGKEKKVNIDFEPFKPINTSLYLCDNKFHTEALTALLSDDSKFGFIVIDGSGALFGTLQGNTREVLHKFTVDLPKKHGRGGQSALRFARLRMEKRHNYVRKVAETAVQLFISGDKVNVAGLVLAGSADFKTELSQSDMFDQRLQSKVLKLVDISYGGENGFNQAIELSTEVLSNVKFIQEKKLIGRYFDEISQDTGKYCFGVEDTLKALEMGAVEILIVYENLDIMRYVLHCQGTEEEKILYLTPEQEKDKSHFTDKETGQEHELIESMPLLEWFANNYKKFGATLEIVTDKSQEGSQFVKGFGGIGGILRYRVDFQGMEYQGGDDEFFDLDDY
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.

#####
Blasting ENSP00000380530 to Find Its Complete Structures
#####
The protein sequence is MAGDRLPRKVMDAKKLASLLRGGPGGPLVIDSRSFVEYNSWHVLSSVNICCSKLVKRRLQQGKVTIAELIQPAARSQVEATEPQDVVVYDQSTRDASVLAADSFLSILLSKLDGCFDSVAILTGGFATFSSCFPGLCEGKPAALLPMSLSQPCLPVPSVGLTRILPHLYLGSQKDVLNKDLMTQNGISYVLNASNSCPKPDFICESRFMRVPINDNYCEKLLPWLDKSIEFIDKAKLSSCQVIVHCLAGISRSATIAIAYIMKTMGMSSDDAYRFVKDRRPSISPNFNFLGQLLEYERSLKLLAALQGDPGTPSGTPEPPPSPAAGAPLPRLPPPTSESAATGNAAAREGGLSAGGEPPAPPTPPATSALQQGLRGLHLSSDRLQDTNRLKRSFSLDIKSAYAPSRRPDGPGPPDPGEAPKLCKLDSPSGAALGLSSPSPDSPDAAPEARPRPRRRPRPPAGSPARSPAHSLGLNFGDAARQTPRHGLSALSAPGLPGPGQPAGPGAWAPPLDSPGTPSPDGPWCFSPEGAQGAGGVLFAPFGRAGAPGPGGGSDLRRREAARAEPRDARTGWPEEPAPETQFKRRSCQMEFEEGMVEGRARGEELAALGKQASFSGSVEVIEVS
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000380530

#####
Blasting ENSP00000380530 to Find Its Complete Structures
#####
The protein sequence is MAGDRLPRKVMDAKKLASLLRGGPGGPLVIDSRSFVEYNSWHVLSSVNICCSKLVKRRLQQGKVTIAELIQPAARSQVEATEPQDVVVYDQSTRDASVLAADSFLSILLSKLDGCFDSVAILTGGFATFSSCFPGLCEGKPAALLPMSLSQPCLPVPSVGLTRILPHLYLGSQKDVLNKDLMTQNGISYVLNASNSCPKPDFICESRFMRVPINDNYCEKLLPWLDKSIEFIDKAKLSSCQVIVHCLAGISRSATIAIAYIMKTMGMSSDDAYRFVKDRRPSISPNFNFLGQLLEYERSLKLLAALQGDPGTPSGTPEPPPSPAAGAPLPRLPPPTSESAATGNAAAREGGLSAGGEPPAPPTPPATSALQQGLRGLHLSSDRLQDTNRLKRSFSLDIKSAYAPSRRPDGPGPPDPGEAPKLCKLDSPSGAALGLSSPSPDSPDAAPEARPRPRRRPRPPAGSPARSPAHSLGLNFGDAARQTPRHGLSALSAPGLPGPGQPAGPGAWAPPLDSPGTPSPDGPWCFSPEGAQGAGGVLFAPFGRAGAPGPGGGSDLRRREAARAEPRDARTGWPEEPAPETQFKRRSCQMEFEEGMVEGRARGEELAALGKQASFSGSVEVIEVS
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000380530

#####
Blasting ENSP00000293308 to Find Its Complete Structures
#####
The protein sequence is MSIRVTQKSYKVSTSGPRAFSSRSYTSGPGSRISSSSFSRVGSSNFRGGLGGGYGGASGMGGITAVTVNQSLLSPLVLEVDPNIQAVRTQEKEQIKTLNNKFASFIDKVRFLEQQNKMLETKWSLLQQQKTARSNMDNMFESYINNLRRQLETLGQEKLKLEAELGNMQGLVEDFKNKYEDEINKRTEMENEFVLIKKDVDEAYMNKVELESRLEGLTDEINFLRQLYEEEIRELQSQISDTSVVLSMDNSRSLDMDSIIAEVKAQYEDIANRSRAEAESMYQIKYEELQSLAGKHGDDLRRTKTEISEMNRNISRLQAEIEGLKGQRASLEAAIADAEQRGELAIKDANAKLSELEAALQRAKQDMARQLREYQELMNVKLALDIEIATYRKLLEGEESRLESGMQNMSIHTKTTSGYAGGLSSAYGGLTSPGLSYSLGSSFGSGAGSSSFSRTSSSRAVVVKKIETRDGKLVSESSDVLPK
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000293308_sequence.fasta
homologs: ['3tnu_B', '129', '196', '499', '2.02e-60', '77.519', '27']
homologs: ['6uui_C', '120', '157', '397', '1.72e-45', '6

#####
Blasting ENSP00000293308 to Find Its Complete Structures
#####
The protein sequence is MSIRVTQKSYKVSTSGPRAFSSRSYTSGPGSRISSSSFSRVGSSNFRGGLGGGYGGASGMGGITAVTVNQSLLSPLVLEVDPNIQAVRTQEKEQIKTLNNKFASFIDKVRFLEQQNKMLETKWSLLQQQKTARSNMDNMFESYINNLRRQLETLGQEKLKLEAELGNMQGLVEDFKNKYEDEINKRTEMENEFVLIKKDVDEAYMNKVELESRLEGLTDEINFLRQLYEEEIRELQSQISDTSVVLSMDNSRSLDMDSIIAEVKAQYEDIANRSRAEAESMYQIKYEELQSLAGKHGDDLRRTKTEISEMNRNISRLQAEIEGLKGQRASLEAAIADAEQRGELAIKDANAKLSELEAALQRAKQDMARQLREYQELMNVKLALDIEIATYRKLLEGEESRLESGMQNMSIHTKTTSGYAGGLSSAYGGLTSPGLSYSLGSSFGSGAGSSSFSRTSSSRAVVVKKIETRDGKLVSESSDVLPK
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000293308_sequence.fasta
homologs: ['3tnu_B', '129', '196', '499', '2.02e-60', '77.519', '27']
homologs: ['6uui_C', '120', '157', '397', '1.72e-45', '6

#####
Blasting ENSP00000389624 to Find Its Complete Structures
#####
The protein sequence is MATVEPETTPTPNPPTTEEEKTESNQEVANPEHYIKHPLQNRWALWFFKNDKSKTWQANLRLISKFDTVEDFWALYNHIQLSSNLMPGCDYSLFKDGIEPMWEDEKNKRGGRWLITLNKQQRRSDLDRFWLETLLCLIGESFDDYSDDVCGAVVNVRAKGDKIAIWTTECENREAVTHIGRVYKERLGLPPKIVIGYQSHADTATKSGSTTKNRFVV
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000389624_sequence.fasta
homologs: ['5t46_C', '220', '456', '1172', '5.32e-165', '100.000', '100']
homologs: ['5t46_A', '220', '456', '1172', '5.32e-165', '100.000', '100']
homologs: ['5ekv_C', '217', '455', '1171', '6.56e-165', '100.000', '100']
homologs: ['5ekv_A', '217', '455', '1171', '6.56e-165', '100.000', '100']
homologs: ['5eir_A', '217', '455', '1171', '6.56e-165', '100.000', '100']
homologs: ['5ei3_A', '

Run was successful
The crystal structure of ENSP00000389624 is: 5T46_C
#####
Beginning Setup for AWSEM Calculations of PF11717 Pseudogene-Parent Protein Pair
#####
/Users/hanajaafari/Desktop/Time-Devolution-Calc/AWSEM_Energy_Calculations/06_24_2020_Global_AWSEM_Energy_Calculation_Search_Complete_Coverage_Translated_Pseudogene_Candidates
#####
Blasting ENSP00000312244 to Find Its Complete Structures
#####
The protein sequence is MSASEGMKFKFHSGEKVLCFEPDPTKARVLYDAKIVDVIVGKDEKGRKIPEYLIHFNGWNRSWDRWAAEDHVLRDTDENRRLQRKLARKAVARLRSTGRKKKRCRLPGVDSVLKGLPTEEKDENDENSLSSSSDCSENKDEEISEESDIEEKTEVKEEPELQTRREMEERTITIEIPEVLKKQLEDDCYYINRRKRLVKLPCQTNIITILESYVKHFAINAAFSANERPRHHHVMPHANMNVHYIPAEKNVDLCKEMVDGLRITFDYTLPLVLLYPYEQAQYKKVTSSKFFLPIKESATSTNRSQEELSPSPPLLNPSTPQSTESQPTTGEPATPKRRKAEPEALQSLRRSTRHSANCDRLSESSASPQPKRRQQDTSASMPKLFLHLEKKTPVHSRSSSPIPLTPSKEGSAVFAGFEGRRTNEINEVLSWKLVPDNYPPGDQPPPPSYIYGAQHLLRLFVKLPEILGKMSFSEKNLKALLKHFDLFLRFLAEYHDDFFPESAYVAACEAHYSTKNPRAIY
finding homologs
executing::: /Users/hanajaafa

#####
Blasting ENSP00000355053 to Find Its Complete Structures
#####
The protein sequence is MAMALELQAQASPQPEPEELLIVKLEEDSWGSESKLWEKDRGSVSGPEASRQRFRQFQYRDAAGPHEAFSQLWALCCRWLRPEIRLKEQILELLVLEQFLTILPREVQTWVQARHPESGEEAVALVEDWHRETRTAGQSGLELHTEETRPLKTGEEAQSFQLQPVDPWPEGQSQKKGVKNTCPDLPNHLNAEVAPQPLKESAVLTPRVPTLPKMGSVGDWEVTAESQEALGPGKHAEKELCKDPPGDDCGNSVCLGVPVSKPSNTSEKEQGPEFWGLSLINSGKRSTADYSLDNEPAQALTWRDSRAWEEQYQWDVEDMKVSGVHWGYEETKTFLAILSESPFSEKLRTCHQNRQVYRAIAEQLRARGFLRTLEQCRYRVKNLLRNYRKAKSSHPPGTCPFYEELEALVRARTAIRATDGPGEAVALPRLGYSDAEMDEQEEGGWDPEEMAEDCNGAGLVNVESTQGPRIAGAPALFQSRIAGVHWGYEETKAFLAILSESPFSEKLRTCHQNSQVYRAIAERLCALGFLRTLEQCRYRFKNLLRSYRKAKSSHPPGTCPFYEELDSLMRARAAVRAMGTVREAAGLPRCGQSSAETDAQEAWGEVANEDAVKPSTLCPKAPDMGFEMRHEDEDQISEQDIFEGLPGALSKCPTEAVCQPLDWGEDSENENEDEGQWGNPSQEQWQESSSEEDLEKLIDHQGLYLAEKPYKCDTCMKSFSRSSHFIAHQRIHTGEKPYKCLECGKNFSDRSNLNTHQRIHTGEKPYKCLECGKSFSDHSNLITHQRIHTGEKPYKCGECWKSFNQSSNLLKHQRIHLGGNPDQCSEPGGNFAQSPSFSAHWRNSTEETAPEQPQSISKDLNSPGPHSTNSGEKLYECSECGRSFSKSSALISHQRIHTGEKPYECAE

homologs: ['6jnn_N', '101', '54.7', '130', '4.73e-08', '37.179', '27']
homologs: ['6jnn_N', '101', '54.7', '130', '4.96e-08', '35.526', '27']
homologs: ['6jnn_N', '101', '54.3', '129', '6.71e-08', '38.158', '27']
homologs: ['6jnn_N', '101', '53.9', '128', '9.25e-08', '40.789', '27']
homologs: ['6jnn_N', '101', '51.6', '122', '6.88e-07', '39.474', '27']
homologs: ['6jnn_N', '101', '50.8', '120', '1.19e-06', '35.526', '27']
homologs: ['6jnn_B', '101', '60.1', '144', '6.84e-10', '42.105', '27']
homologs: ['6jnn_B', '101', '54.7', '130', '4.73e-08', '37.179', '27']
homologs: ['6jnn_B', '101', '54.7', '130', '4.96e-08', '35.526', '27']
homologs: ['6jnn_B', '101', '54.3', '129', '6.71e-08', '38.158', '27']
homologs: ['6jnn_B', '101', '53.9', '128', '9.25e-08', '40.789', '27']
homologs: ['6jnn_B', '101', '51.6', '122', '6.88e-07', '39.474', '27']
homologs: ['6jnn_B', '101', '50.8', '120', '1.19e-06', '35.526', '27']
homologs: ['6jnn_A', '101', '60.1', '144', '6.84e-10', '42.105', '27']
homolo

homologs: ['2el6_A', '46', '40.0', '92', '0.002', '58.621', '14']
homologs: ['2el6_A', '46', '39.7', '91', '0.002', '58.621', '14']
homologs: ['2el6_A', '46', '39.7', '91', '0.003', '55.172', '14']
homologs: ['2epv_A', '44', '41.6', '96', '5.93e-04', '64.286', '13']
homologs: ['2epv_A', '44', '41.2', '95', '6.61e-04', '60.714', '13']
homologs: ['2epv_A', '44', '39.3', '90', '0.003', '57.143', '13']
homologs: ['2epv_A', '44', '38.9', '89', '0.004', '60.714', '13']
homologs: ['2epv_A', '44', '38.9', '89', '0.005', '60.714', '13']
homologs: ['2en2_A', '42', '40.4', '93', '0.001', '54.839', '3']
homologs: ['2eps_A', '54', '40.4', '93', '0.002', '52.941', '6']
homologs: ['2eps_A', '54', '40.0', '92', '0.003', '50.000', '6']
homologs: ['2emz_A', '46', '40.4', '93', '0.002', '56.667', '6']
homologs: ['2emz_A', '46', '39.3', '90', '0.004', '50.000', '6']
homologs: ['2en8_A', '46', '40.0', '92', '0.002', '60.000', '3']
homologs: ['2dlk_A', '79', '40.8', '94', '0.002', '38.596', '5']
homologs: [

#####
Blasting ENSP00000362807 to Find Its Complete Structures
#####
The protein sequence is MASYPYRQGCPGAAGQAPGAPPGSYYPGPPNSGGQYGSGLPPGGGYGGPAPGGPYGPPAGGGPYGHPNPGMFPSGTPGGPYGGAAPGGPYGQPPPSSYGAQQPGLYGQGGAPPNVDPEAYSWFQSVDSDHSGYISMKELKQALVNCNWSSFNDETCLMMINMFDKTKSGRIDVYGFSALWKFIQQWKNLFQQYDRDRSGSISYTELQQALSQMGYNLSPQFTQLLVSRYCPRSANPAMQLDRFIQVCTQLQVLTEAFREKDTAVQGNIRLSFEDFVTMTASRML
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000362807_sequence.fasta
homologs: ['3wxa_B', '172', '133', '335', '1.36e-37', '42.515', '58']
homologs: ['3wxa_A', '172', '133', '335', '1.36e-37', '42.515', '58']
homologs: ['2znd_A', '172', '133', '335', '1.36e-37', '42.515', '58']
homologs: ['2zn9_B', '172', '133', '335', '1.36e-37', '42.515', '58']
homologs: ['2zn9_A', '172', '133', '335', '1.

#####
Blasting ENSP00000362807 to Find Its Complete Structures
#####
The protein sequence is MASYPYRQGCPGAAGQAPGAPPGSYYPGPPNSGGQYGSGLPPGGGYGGPAPGGPYGPPAGGGPYGHPNPGMFPSGTPGGPYGGAAPGGPYGQPPPSSYGAQQPGLYGQGGAPPNVDPEAYSWFQSVDSDHSGYISMKELKQALVNCNWSSFNDETCLMMINMFDKTKSGRIDVYGFSALWKFIQQWKNLFQQYDRDRSGSISYTELQQALSQMGYNLSPQFTQLLVSRYCPRSANPAMQLDRFIQVCTQLQVLTEAFREKDTAVQGNIRLSFEDFVTMTASRML
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000362807_sequence.fasta
homologs: ['3wxa_B', '172', '133', '335', '1.36e-37', '42.515', '58']
homologs: ['3wxa_A', '172', '133', '335', '1.36e-37', '42.515', '58']
homologs: ['2znd_A', '172', '133', '335', '1.36e-37', '42.515', '58']
homologs: ['2zn9_B', '172', '133', '335', '1.36e-37', '42.515', '58']
homologs: ['2zn9_A', '172', '133', '335', '1.

#####
Blasting ENSP00000404375 to Find Its Complete Structures
#####
The protein sequence is MIAPTDSHEEVRSGTSYILPFASRFLSFRADSAHASMVNVPKTRRTFCKKCGKHQPHKVTQYKKGKDSLYAQGKRRYDRKQSGYGGQTKPIFRKKAKTTKKIVLRLECVEPNCRSKRMLAIKRCKHFELGGDKKRKGQVIQF
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000404375_sequence.fasta
homologs: ['6y57_Lo', '106', '216', '551', '2.50e-73', '100.000', '75']
homologs: ['6y2l_Lo', '106', '216', '551', '2.50e-73', '100.000', '75']
homologs: ['6y0g_Lo', '106', '216', '551', '2.50e-73', '100.000', '75']
homologs: ['6t59_o3', '106', '216', '551', '2.50e-73', '100.000', '75']
homologs: ['6sgc_o2', '106', '216', '551', '2.50e-73', '100.000', '75']
homologs: ['6p5n_Ao', '106', '216', '551', '2.50e-73', '100.000', '75']
homologs: ['6p5k_Ao', '106', '216',

#####
Blasting ENSP00000354739 to Find Its Complete Structures
#####
The protein sequence is MPPKFDPNEIKVVYLRCTGGEVGATSALAPKIGPLGLSPKKVGDDIAKATGDWKGLRITVKLTIQNRQAQIEVVPSASALIIKALKEPPRDRKKQKNIKHSGNITFDEIVNIARQMRHRSLARELSGTIKEILGTAQSVGCNVDGRHPHDIIDDINSGAVECPAS
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000354739_sequence.fasta
homologs: ['6t59_t3', '165', '336', '862', '1.59e-119', '100.000', '100']
homologs: ['6sgc_t2', '165', '336', '862', '1.59e-119', '100.000', '100']
homologs: ['6hcq_u3', '165', '336', '862', '1.59e-119', '100.000', '100']
homologs: ['6hcm_t3', '165', '336', '862', '1.59e-119', '100.000', '100']
homologs: ['6hcj_u3', '165', '336', '862', '1.59e-119', '100.000', '100']
homologs: ['6hcf_t3', '165', '336', '862', '1.59e-119', '100.000', '100']


There are no qualifying crystal PDB structures for ENSP00000354739
#####
Beginning Setup for AWSEM Calculations of PF00298 Pseudogene-Parent Protein Pair
#####
/Users/hanajaafari/Desktop/Time-Devolution-Calc/AWSEM_Energy_Calculations/06_24_2020_Global_AWSEM_Energy_Calculation_Search_Complete_Coverage_Translated_Pseudogene_Candidates
#####
Blasting ENSP00000354739 to Find Its Complete Structures
#####
The protein sequence is MPPKFDPNEIKVVYLRCTGGEVGATSALAPKIGPLGLSPKKVGDDIAKATGDWKGLRITVKLTIQNRQAQIEVVPSASALIIKALKEPPRDRKKQKNIKHSGNITFDEIVNIARQMRHRSLARELSGTIKEILGTAQSVGCNVDGRHPHDIIDDINSGAVECPAS
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000354739_sequence.fasta
homologs: ['6t59_t3', '165', '336', '862', '1.59e-119', '100.000', '100']
homologs: ['6sgc_t2', '165', '336',

There are no qualifying crystal PDB structures for ENSP00000354739
#####
Beginning Setup for AWSEM Calculations of PF14904 Pseudogene-Parent Protein Pair
#####
/Users/hanajaafari/Desktop/Time-Devolution-Calc/AWSEM_Energy_Calculations/06_24_2020_Global_AWSEM_Energy_Calculation_Search_Complete_Coverage_Translated_Pseudogene_Candidates
#####
Blasting ENSP00000432790 to Find Its Complete Structures
#####
The protein sequence is APEENAGTELLLQGFERRFLAVRTLRSFPWQSLEAKLRDSSDSELLRDILQKSSGGSVTLSKSTAIISHGTTGLVTWDAALYLAEWAIENPAAFINR
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000432790_sequence.fasta
The homologs for ENSP00000432790 are: []
#####
Finding Qualifying Structure for ENSP00000432790
#####
#####
Beginning Setup for AWSEM Calculations of PF17832 Pseudogene-Parent P

homologs: ['6h05_A', '426', '778', '2010', '0.0', '100.000', '85']
homologs: ['6pbr_F', '233', '292', '747', '1.81e-96', '59.829', '52']
homologs: ['6pbr_E', '233', '292', '747', '1.81e-96', '59.829', '52']
homologs: ['6pbr_D', '233', '292', '747', '1.81e-96', '59.829', '52']
homologs: ['6pbr_C', '233', '292', '747', '1.81e-96', '59.829', '52']
homologs: ['6pbr_B', '233', '292', '747', '1.81e-96', '59.829', '52']
homologs: ['6pbr_A', '233', '292', '747', '1.81e-96', '59.829', '52']
homologs: ['1scz_A', '233', '292', '747', '1.81e-96', '59.829', '52']
homologs: ['1e2o_A', '233', '292', '747', '1.81e-96', '59.829', '52']
homologs: ['1c4t_C', '233', '292', '747', '1.81e-96', '59.829', '52']
homologs: ['1c4t_B', '233', '292', '747', '1.81e-96', '59.829', '52']
homologs: ['1c4t_A', '233', '292', '747', '1.81e-96', '59.829', '52']
homologs: ['1b5s_E', '242', '163', '412', '2.77e-46', '37.344', '53']
homologs: ['1b5s_D', '242', '163', '412', '2.77e-46', '37.344', '53']
homologs: ['1b5s_C', '2

#####
Blasting ENSP00000335304 to Find Its Complete Structures
#####
The protein sequence is MLSRSRCVSRAFSRSLSAFQKGNCPLGRRSLPGVSLCQGPGYPNSRKVVINNSVFSVRFFRTTAVCKDDLVTVKTPAFAESVTEGDVRWEKAVGDTVAEDEVVCEIETDKTSVQVPSPANGVIEALLVPDGGKVEGGTPLFTLRKTGAAPAKAKPAEAPAAAAPKAEPTAAAVPPPAAPIPTQMPPVPSPSQPPSGKPVSAVKPTVAPPLAEPGAGKGLRSEHREKMNRMRQRIAQRLKEAQNTCAMLTTFNEIDMSNIQEMRARHKEAFLKKHNLKLGFMSAFVKASAFALQEQPVVNAVIDDTTKEVVYRDYIDISVAVATPRGLVVPVIRNVEAMNFADIERTITELGEKARKNELAIEDMDGGTFTISNGGVFGSLFGTPIINPPQSAILGMHGIFDRPVAIGGKVEVRPMMYVALTYDHRLIDGREAVTFLRKIKAAVEDPRVLLLDL
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000335304_sequence.fasta
homologs: ['6h05_A', '426', '778', '2010', '0.0', '100.000', '85']
homologs: ['6pbr_F', '233', '292', '747', '1.81e-96', '59.829', '52']
homologs: ['6pbr_E'

#####
Blasting ENSP00000260443 to Find Its Complete Structures
#####
The protein sequence is MRIEKCYFCSGPIYPGHGMMFVRNDCKVFRFCKSKCHKNFKKKRNPRKVRWTKAFRKAAGKELTVDNSFEFEKRRNEPIKYQRELWNKTIDAMKRVEEIKQKRQAKFIMNRLKKNKELQKVQDIKEVKQNIHLIRAPLAGKGKQLEEKMVQQLQEDVDMEDAP
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000260443_sequence.fasta
homologs: ['6n8l_u', '199', '178', '451', '1.65e-56', '60.741', '83']
homologs: ['6n8k_u', '199', '178', '451', '1.65e-56', '60.741', '83']
homologs: ['6n8j_u', '199', '178', '451', '1.65e-56', '60.741', '83']
homologs: ['6ft6_u', '199', '178', '451', '1.65e-56', '60.741', '83']
homologs: ['6em5_u', '199', '178', '451', '1.65e-56', '60.741', '83']
homologs: ['6em4_u', '199', '178', '451', '1.65e-56', '60.741', '83']
homologs: ['6em1_u', '199'

#####
Blasting ENSP00000359149 to Find Its Complete Structures
#####
The protein sequence is MGSQGRSGPPGNGGPGEGEGGEARKLQEGRVARGKRRKGKGKGKARAGQGGRGSGAEGKPGPQTAKEAAGPGADAGARACPREEAEGGRSVEEGARGIVKGVEGSAGAGKEAQGREYGKKEEWRVRARRREGARPGRAQGRGGQAWADIAGTGVAMAAAAGEEEEEEEAARESAARPAAGPALWRLPEELLLLICSYLDMRALGRLAQVCRWLRRFTSCDLLWRRIARASLNSGFTRLGTDLMTSVPVKERVKVSQNWRLGRCREGILLKWRCSQMPWMQLEDDSLYISQANFILAYQFRPDGASLNRRPLGVFAGHDEDVCHFVLANSHIVSAGGDGKIGIHKIHSTFTVKYSAHEQEVNCVDCKGGIIVSGSRDRTAKVWPLASGRLGQCLHTIQTEDRVWSIAISPLLSSFVTGTACCGHFSPLRIWDLNSGQLMTHLGSDFPPGAGVLDVMYESPFTLLSCGYDTYVRYWDLRTSVRKCVMEWEEPHDSTLYCLQTDGNHLLATGSSYYGVVRLWDRRQRACLHAFPLTSTPLSSPVYCLRLTTKHLYAALSYNLHVLDFQNP
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000359149_sequence.fasta
homologs: ['2ovr_B', '445', '71.2', '173',

#####
Blasting ENSP00000351621 to Find Its Complete Structures
#####
The protein sequence is MEPAGPAPGRLGPLLCLLLAASCAWSGVAGEEELQVIQPDKSVLVAAGETATLRCTATSLIPVGPIQWFRGAGPGRELIYNQKEGHFPRVTTVSDLTKRNNMDFSIRIGNITPADAGTYYCVKFRKGSPDDVEFKSGAGTELSVRAKPSAPVVSGPAARATPQHTVSFTCESHGFSPRDITLKWFKNGNELSDFQTNVDPVGESVSYSIHSTAKVVLTREDVHSQVICEVAHVTLQGDPLRGTANLSETIRVPPTLEVTQQPVRAENQVNVTCQVRKFYPQRLQLTWLENGNVSRTETASTVTENKDGTYNWMSWLLVNVSAHRDDVKLTCQVEHDGQPAVSKSHDLKVSAHPKEQGSNTAAENTGSNERNIYIVVGVVCTLLVALLMAALYLVRIRQKKAQGSTSSTRLHEPEKNAREITQDTNDITYADLNLPKGKKPAPQAAEPNNHTEYASIQTSPQPASEDTLTYADLDMVHLNRTPKQPAPKPEPSFSEYASVQVPRK
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000351621_sequence.fasta
homologs: ['2wng_A', '327', '636', '1641', '0.0', '95.938', '63']
homologs: ['4i2x_F', '328', '553', '142

#####
Blasting ENSP00000432884 to Find Its Complete Structures
#####
The protein sequence is MNPTNTVFDAKRLIGRRFDDAVVQSDMKHWPFMVVNDAGRPKVQVEYKGETKSFYPEEVSSMVLTKMKEIAEAYLGKTVTNAVVTVPAYFNDSQRQATKDAGTIAGLNVLRIINEPTAAAIAYGLDKKVGAERNVLIFDLGGGTFDVSILTIEDGIFEVKSTAGDTHLGGEDFDNRMVNHFIAEFKRKHKKDISENKRAVRRLRTACERAKRTLSSSTQASIEIDSLYEGIDFYTSITRARFEELNADLFRGTLDPVEKALRDAKLDKSQIHDIVLVGGSTRIPKIQKLLQDFFNGKELNKSINPDEAVAYG
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000432884_sequence.fasta
homologs: ['3c7n_B', '554', '644', '1662', '0.0', '100.000', '100']
homologs: ['3cqx_B', '386', '643', '1659', '0.0', '100.000', '100']
homologs: ['3cqx_A', '386', '643', '1659', '0.0', '100.000', '100']
homologs: ['1ngg_A', '386', '643', '1659', '0.0', '100.000', '100']
homologs: ['6b1i_B', '400

Run was successful
The crystal structure of ENSP00000432884 is: 3C7N_B
#####
Beginning Setup for AWSEM Calculations of PF08075 Pseudogene-Parent Protein Pair
#####
/Users/hanajaafari/Desktop/Time-Devolution-Calc/AWSEM_Energy_Calculations/06_24_2020_Global_AWSEM_Energy_Calculation_Search_Complete_Coverage_Translated_Pseudogene_Candidates
#####
Blasting ENSP00000409773 to Find Its Complete Structures
#####
The protein sequence is XPDITEEEMRKLFEKYGKAGEVFIHKDKGFGFIRLVERAVVIVDDRGRPSGKGIVEFSGKPAARKALDRCSEGSFLLTTFPRPVTVEPMDQLDDEEGLPEKLVIKNQQFHKEREQPPRFAQPGSFEYEYAMRWKALIEMEKQQQDQVDRNIKEAREKLEMEMEAARHEHQVMLMRQDLMRRQEELRRMEELHNQEVQKRKQLELRQEEERRRREEEMRRQQEEMMRRQQEGFKG
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000409773_sequence.fasta
homologs: ['3sde_B', '261', '288', '

homologs: ['4qho_A', '495', '996', '2574', '0.0', '99.580', '94']
homologs: ['4ie7_A', '495', '996', '2574', '0.0', '99.580', '94']
homologs: ['4ie6_A', '495', '996', '2574', '0.0', '99.580', '94']
homologs: ['4ie5_A', '495', '996', '2574', '0.0', '99.580', '94']
homologs: ['4ie4_A', '495', '996', '2574', '0.0', '99.580', '94']
homologs: ['4ie0_A', '495', '996', '2574', '0.0', '99.580', '94']
homologs: ['4idz_A', '495', '996', '2574', '0.0', '99.580', '94']
homologs: ['4cxy_A', '495', '996', '2574', '0.0', '99.580', '94']
homologs: ['4cxx_A', '495', '996', '2574', '0.0', '99.580', '94']
homologs: ['4cxw_A', '495', '996', '2574', '0.0', '99.580', '94']
homologs: ['3lfm_A', '495', '996', '2574', '0.0', '99.580', '94']
homologs: ['4zs3_A', '478', '995', '2572', '0.0', '99.580', '94']
homologs: ['4zs2_A', '478', '995', '2572', '0.0', '99.580', '94']
homologs: ['4qkn_A', '476', '991', '2562', '0.0', '99.578', '94']
homologs: ['5dab_A', '479', '990', '2560', '0.0', '99.160', '94']
homologs: 

#####
Blasting ENSP00000235835 to Find Its Complete Structures
#####
The protein sequence is MLSAASRVVSRAAVHCALRSPPPEARALAMSRPPPPRVASVLGTMEMGRRMDAPASAAAVRAFLERGHTELDTAFMYSDGQSETILGGLGLGLGGGDCRVKIATKANPWDGKSLKPDSVRSQLETSLKRLQCPQVDLFYLHAPDHGTPVEETLHACQRLHQEGKFVELGLSNYASWEVAEICTLCKSNGWILPTVYQGMYNATTRQVETELFPCLRHFGLRFYAYNPLAGGLLTGKYKYEDKDGKQPVGRFFGNSWAETYRNRFWKEHHFEAIALVEKALQAAYGASAPSVTSAALRWMYHHSQLQGAHGDAVILGMSSLEQLEQNLAATEEGPLEPAVVDAFNQAWHLVAHECPNYFR
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000235835_sequence.fasta
homologs: ['2bp1_D', '360', '748', '1930', '0.0', '100.000', '100']
homologs: ['2bp1_C', '360', '748', '1930', '0.0', '100.000', '100']
homologs: ['2bp1_B', '360', '748', '1930', '0.0', '100.000', '100']
homologs: ['2bp1_A', '360', '748', '1930', '0.

Run was successful
The crystal structure of ENSP00000235835 is: 2BP1_D
#####
Beginning Setup for AWSEM Calculations of PF00179 Pseudogene-Parent Protein Pair
#####
/Users/hanajaafari/Desktop/Time-Devolution-Calc/AWSEM_Energy_Calculations/06_24_2020_Global_AWSEM_Energy_Calculation_Search_Complete_Coverage_Translated_Pseudogene_Candidates
#####
Blasting ENSP00000429419 to Find Its Complete Structures
#####
The protein sequence is MTLTRWTGMIIGPPRTNYENRIYSLKVECGPKYPEAPPSVRFVTKINMNGINNSSGMVDARSIPVLAKWQNSYSIKVVLQELRRLMMSKENMKLPQPPEGQTYNN
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000429419_sequence.fasta
homologs: ['5ait_G', '147', '217', '553', '1.13e-73', '100.000', '100']
homologs: ['5ait_D', '147', '217', '553', '1.13e-73', '100.000', '100']
homologs: ['1j7d_A', 

Run was successful
The crystal structure of ENSP00000429419 is: 5AIT_G
#####
Beginning Setup for AWSEM Calculations of PF00118 Pseudogene-Parent Protein Pair
#####
/Users/hanajaafari/Desktop/Time-Devolution-Calc/AWSEM_Energy_Calculations/06_24_2020_Global_AWSEM_Energy_Calculation_Search_Complete_Coverage_Translated_Pseudogene_Candidates
#####
Blasting ENSP00000275603 to Find Its Complete Structures
#####
The protein sequence is MAAVKTLNPKAEVARAQAALAVNISAARGLQDVLRTNLGPKGTMKMLVSGAGDIKLTKDGNVLLHEMQIQHPTASLIAKVATAQDDITGDGTTSNVLIIGELLKQADLYISEGLHPRIITEGFEAAKEKALQFLEEVKVSREMDRETLIDVARTSLRTKVHAELADVLTEAVVDSILAIKKQDEPIDLFMIEIMEMKHKSETDTSLIRGLVLDHGARHPDMKKRVEDAYILTCNVSLEYEKTEVNSGFFYKSAEEREKLVKAERKFIEDRVKKIIELKRKVCGDSDKGFVVINQKGIDPFSLDALSKEGIVALRRAKRRNMERLTLACGGVALNSFDDLSPDCLGHAGLVYEYTLGEEKFTFIEKCNNPRSVTLLIKGPNKHTLTQIKDAVRDGLRAVKNAIDDGCVVPGAGAVEVAMAEALIKHKPSVKGRAQLGVQAFADALLIIPKVLAQNSGFDLQETLVKIQAEHSESGQLVGVDLNTGEPMVAAEVGVWDNYCVKKQLLHSCTVIATNILLVDEIMRAGMSSLKG
finding homologs
executing::: /Users

There are no qualifying crystal PDB structures for ENSP00000275603
#####
Beginning Setup for AWSEM Calculations of PF13837 Pseudogene-Parent Protein Pair
#####
/Users/hanajaafari/Desktop/Time-Devolution-Calc/AWSEM_Energy_Calculations/06_24_2020_Global_AWSEM_Energy_Calculation_Search_Complete_Coverage_Translated_Pseudogene_Candidates
#####
Blasting ENSP00000355053 to Find Its Complete Structures
#####
The protein sequence is MAMALELQAQASPQPEPEELLIVKLEEDSWGSESKLWEKDRGSVSGPEASRQRFRQFQYRDAAGPHEAFSQLWALCCRWLRPEIRLKEQILELLVLEQFLTILPREVQTWVQARHPESGEEAVALVEDWHRETRTAGQSGLELHTEETRPLKTGEEAQSFQLQPVDPWPEGQSQKKGVKNTCPDLPNHLNAEVAPQPLKESAVLTPRVPTLPKMGSVGDWEVTAESQEALGPGKHAEKELCKDPPGDDCGNSVCLGVPVSKPSNTSEKEQGPEFWGLSLINSGKRSTADYSLDNEPAQALTWRDSRAWEEQYQWDVEDMKVSGVHWGYEETKTFLAILSESPFSEKLRTCHQNRQVYRAIAEQLRARGFLRTLEQCRYRVKNLLRNYRKAKSSHPPGTCPFYEELEALVRARTAIRATDGPGEAVALPRLGYSDAEMDEQEEGGWDPEEMAEDCNGAGLVNVESTQGPRIAGAPALFQSRIAGVHWGYEETKAFLAILSESPFSEKLRTCHQNSQVYRAIAERLCALGFLRTLEQCRYRFKNLLRSYRKAKSSHPPGTCPFYEELDSLMRAR

homologs: ['1f2i_K', '73', '48.9', '115', '2.62e-06', '41.071', '30']
homologs: ['1f2i_K', '73', '42.0', '97', '7.12e-04', '41.026', '30']
homologs: ['1f2i_K', '73', '40.0', '92', '0.004', '37.500', '30']
homologs: ['1f2i_J', '73', '59.7', '143', '4.79e-10', '48.214', '30']
homologs: ['1f2i_J', '73', '57.8', '138', '2.07e-09', '50.000', '30']
homologs: ['1f2i_J', '73', '55.8', '133', '1.10e-08', '43.636', '30']
homologs: ['1f2i_J', '73', '53.9', '128', '4.38e-08', '44.643', '30']
homologs: ['1f2i_J', '73', '53.1', '126', '1.03e-07', '44.643', '30']
homologs: ['1f2i_J', '73', '50.8', '120', '6.71e-07', '42.857', '30']
homologs: ['1f2i_J', '73', '50.4', '119', '7.19e-07', '41.818', '30']
homologs: ['1f2i_J', '73', '48.9', '115', '2.62e-06', '41.071', '30']
homologs: ['1f2i_J', '73', '42.0', '97', '7.12e-04', '41.026', '30']
homologs: ['1f2i_J', '73', '40.0', '92', '0.004', '37.500', '30']
homologs: ['1f2i_I', '73', '59.7', '143', '4.79e-10', '48.214', '30']
homologs: ['1f2i_I', '73', '57

homologs: ['2enc_A', '46', '39.3', '90', '0.003', '54.839', '22']
homologs: ['2en7_A', '44', '43.9', '102', '7.17e-05', '67.857', '13']
homologs: ['2en7_A', '44', '43.9', '102', '7.84e-05', '67.857', '13']
homologs: ['2en7_A', '44', '41.2', '95', '7.74e-04', '64.286', '13']
homologs: ['2en7_A', '44', '40.8', '94', '0.001', '64.286', '13']
homologs: ['2en7_A', '44', '40.8', '94', '0.001', '60.714', '13']
homologs: ['2el4_A', '46', '43.9', '102', '7.30e-05', '63.333', '22']
homologs: ['2el4_A', '46', '43.5', '101', '1.04e-04', '63.333', '22']
homologs: ['2el4_A', '46', '43.5', '101', '1.27e-04', '60.000', '22']
homologs: ['2el4_A', '46', '43.1', '100', '1.77e-04', '60.000', '22']
homologs: ['2el4_A', '46', '42.4', '98', '2.92e-04', '60.000', '22']
homologs: ['2el4_A', '46', '42.4', '98', '3.25e-04', '60.000', '22']
homologs: ['2el4_A', '46', '42.0', '97', '3.66e-04', '60.000', '22']
homologs: ['2el4_A', '46', '40.0', '92', '0.002', '60.000', '22']
homologs: ['2emc_A', '46', '43.9', '102'

#####
Blasting ENSP00000355053 to Find Its Complete Structures
#####
The protein sequence is MAMALELQAQASPQPEPEELLIVKLEEDSWGSESKLWEKDRGSVSGPEASRQRFRQFQYRDAAGPHEAFSQLWALCCRWLRPEIRLKEQILELLVLEQFLTILPREVQTWVQARHPESGEEAVALVEDWHRETRTAGQSGLELHTEETRPLKTGEEAQSFQLQPVDPWPEGQSQKKGVKNTCPDLPNHLNAEVAPQPLKESAVLTPRVPTLPKMGSVGDWEVTAESQEALGPGKHAEKELCKDPPGDDCGNSVCLGVPVSKPSNTSEKEQGPEFWGLSLINSGKRSTADYSLDNEPAQALTWRDSRAWEEQYQWDVEDMKVSGVHWGYEETKTFLAILSESPFSEKLRTCHQNRQVYRAIAEQLRARGFLRTLEQCRYRVKNLLRNYRKAKSSHPPGTCPFYEELEALVRARTAIRATDGPGEAVALPRLGYSDAEMDEQEEGGWDPEEMAEDCNGAGLVNVESTQGPRIAGAPALFQSRIAGVHWGYEETKAFLAILSESPFSEKLRTCHQNSQVYRAIAERLCALGFLRTLEQCRYRFKNLLRSYRKAKSSHPPGTCPFYEELDSLMRARAAVRAMGTVREAAGLPRCGQSSAETDAQEAWGEVANEDAVKPSTLCPKAPDMGFEMRHEDEDQISEQDIFEGLPGALSKCPTEAVCQPLDWGEDSENENEDEGQWGNPSQEQWQESSSEEDLEKLIDHQGLYLAEKPYKCDTCMKSFSRSSHFIAHQRIHTGEKPYKCLECGKNFSDRSNLNTHQRIHTGEKPYKCLECGKSFSDHSNLITHQRIHTGEKPYKCGECWKSFNQSSNLLKHQRIHLGGNPDQCSEPGGNFAQSPSFSAHWRNSTEETAPEQPQSISKDLNSPGPHSTNSGEKLYECSECGRSFSKSSALISHQRIHTGEKPYECAE

homologs: ['1f2i_J', '73', '42.0', '97', '7.12e-04', '41.026', '30']
homologs: ['1f2i_J', '73', '40.0', '92', '0.004', '37.500', '30']
homologs: ['1f2i_I', '73', '59.7', '143', '4.79e-10', '48.214', '30']
homologs: ['1f2i_I', '73', '57.8', '138', '2.07e-09', '50.000', '30']
homologs: ['1f2i_I', '73', '55.8', '133', '1.10e-08', '43.636', '30']
homologs: ['1f2i_I', '73', '53.9', '128', '4.38e-08', '44.643', '30']
homologs: ['1f2i_I', '73', '53.1', '126', '1.03e-07', '44.643', '30']
homologs: ['1f2i_I', '73', '50.8', '120', '6.71e-07', '42.857', '30']
homologs: ['1f2i_I', '73', '50.4', '119', '7.19e-07', '41.818', '30']
homologs: ['1f2i_I', '73', '48.9', '115', '2.62e-06', '41.071', '30']
homologs: ['1f2i_I', '73', '42.0', '97', '7.12e-04', '41.026', '30']
homologs: ['1f2i_I', '73', '40.0', '92', '0.004', '37.500', '30']
homologs: ['1f2i_H', '73', '59.7', '143', '4.79e-10', '48.214', '30']
homologs: ['1f2i_H', '73', '57.8', '138', '2.07e-09', '50.000', '30']
homologs: ['1f2i_H', '73', '55

homologs: ['2ema_A', '46', '46.6', '109', '8.18e-06', '64.516', '22']
homologs: ['2ema_A', '46', '46.6', '109', '9.96e-06', '64.516', '22']
homologs: ['2ema_A', '46', '45.8', '107', '1.53e-05', '67.742', '22']
homologs: ['2ema_A', '46', '45.1', '105', '2.79e-05', '64.516', '22']
homologs: ['2elz_A', '46', '47.4', '111', '4.33e-06', '63.333', '25']
homologs: ['2elz_A', '46', '45.8', '107', '2.00e-05', '66.667', '25']
homologs: ['2elz_A', '46', '45.4', '106', '2.25e-05', '60.000', '25']
homologs: ['2elz_A', '46', '45.1', '105', '3.75e-05', '63.333', '25']
homologs: ['2elz_A', '46', '43.9', '102', '8.72e-05', '60.000', '25']
homologs: ['2elz_A', '46', '41.6', '96', '6.35e-04', '57.143', '25']
homologs: ['2elz_A', '46', '41.2', '95', '8.44e-04', '51.613', '25']
homologs: ['2elz_A', '46', '41.2', '95', '8.69e-04', '53.333', '25']
homologs: ['2elz_A', '46', '39.3', '90', '0.003', '50.000', '25']
homologs: ['2eof_A', '44', '47.4', '111', '5.43e-06', '65.517', '27']
homologs: ['2eof_A', '44', 

#####
Blasting ENSP00000273550 to Find Its Complete Structures
#####
The protein sequence is MTTASTSQVRQNYHQDSEAAINRQINLELYASYVYLSMSYYFDRDDVALKNFAKYFLHQSHEEREHAEKLMKLQNQRGGRIFLQDIKKPDCDDWESGLNAMECALHLEKNVNQSLLELHKLATDKNDPHLCDFIETHYLNEQVKAIKELGDHVTNLRKMGAPESGLAEYLFDKHTLGDSDNES
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000273550_sequence.fasta
homologs: ['6m54_X', '183', '382', '982', '3.38e-137', '100.000', '100']
homologs: ['6m54_W', '183', '382', '982', '3.38e-137', '100.000', '100']
homologs: ['6m54_V', '183', '382', '982', '3.38e-137', '100.000', '100']
homologs: ['6m54_U', '183', '382', '982', '3.38e-137', '100.000', '100']
homologs: ['6m54_T', '183', '382', '982', '3.38e-137', '100.000', '100']
homologs: ['6m54_S', '183', '382', '982', '3.38e-137', '100.0

Run was successful
The crystal structure of ENSP00000273550 is: 4YKH_A
#####
Beginning Setup for AWSEM Calculations of PF03143 Pseudogene-Parent Protein Pair
#####
/Users/hanajaafari/Desktop/Time-Devolution-Calc/AWSEM_Energy_Calculations/06_24_2020_Global_AWSEM_Energy_Calculation_Search_Complete_Coverage_Translated_Pseudogene_Candidates
#####
Blasting ENSP00000339063 to Find Its Complete Structures
#####
The protein sequence is MGKEKTHINIVVIGHVDSGKSTTTGHLIYKCGGIDKRTIEKFEKEAAEMGKGSFKYAWVLDKLKAERERGITIDISLWKFETSKYYVTIIDAPGHRDFIKNMITGTSQADCAVLIVAAGVGEFEAGISKNGQTREHALLAYTLGVKQLIVGVNKMDSTEPPYSQKRYEEIVKEVSTYIKKIGYNPDTVAFVPISGWNGDNMLEPSANMPWFKGWKVTRKDGNASGTTLLEALDCILPPTRPTDKPLRLPLQDVYKIGGIGTVPVGRVETGVLKPGMVVTFAPVNVTTEVKSVEMHHEALSEALPGDNVGFNVKNVSVKDVRRGNVAGDSKNDPPMEAAGFTAQVIILNHPGQISAGYAPVLDCHTAHIACKFAELKEKIDRRSGKKLEDGPKFLKSGDAAIVDMVPGKPMCVESFSDYPPLGRFAVRDMRQTVAVGVIKAVDKKAAGAGKVTKSAQKAQKAK
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 

There are no qualifying crystal PDB structures for ENSP00000339063
#####
Beginning Setup for AWSEM Calculations of PF03144 Pseudogene-Parent Protein Pair
#####
/Users/hanajaafari/Desktop/Time-Devolution-Calc/AWSEM_Energy_Calculations/06_24_2020_Global_AWSEM_Energy_Calculation_Search_Complete_Coverage_Translated_Pseudogene_Candidates
#####
Blasting ENSP00000339063 to Find Its Complete Structures
#####
The protein sequence is MGKEKTHINIVVIGHVDSGKSTTTGHLIYKCGGIDKRTIEKFEKEAAEMGKGSFKYAWVLDKLKAERERGITIDISLWKFETSKYYVTIIDAPGHRDFIKNMITGTSQADCAVLIVAAGVGEFEAGISKNGQTREHALLAYTLGVKQLIVGVNKMDSTEPPYSQKRYEEIVKEVSTYIKKIGYNPDTVAFVPISGWNGDNMLEPSANMPWFKGWKVTRKDGNASGTTLLEALDCILPPTRPTDKPLRLPLQDVYKIGGIGTVPVGRVETGVLKPGMVVTFAPVNVTTEVKSVEMHHEALSEALPGDNVGFNVKNVSVKDVRRGNVAGDSKNDPPMEAAGFTAQVIILNHPGQISAGYAPVLDCHTAHIACKFAELKEKIDRRSGKKLEDGPKFLKSGDAAIVDMVPGKPMCVESFSDYPPLGRFAVRDMRQTVAVGVIKAVDKKAAGAGKVTKSAQKAQKAK
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -w

There are no qualifying crystal PDB structures for ENSP00000339063
#####
Beginning Setup for AWSEM Calculations of PF00009 Pseudogene-Parent Protein Pair
#####
/Users/hanajaafari/Desktop/Time-Devolution-Calc/AWSEM_Energy_Calculations/06_24_2020_Global_AWSEM_Energy_Calculation_Search_Complete_Coverage_Translated_Pseudogene_Candidates
#####
Blasting ENSP00000339063 to Find Its Complete Structures
#####
The protein sequence is MGKEKTHINIVVIGHVDSGKSTTTGHLIYKCGGIDKRTIEKFEKEAAEMGKGSFKYAWVLDKLKAERERGITIDISLWKFETSKYYVTIIDAPGHRDFIKNMITGTSQADCAVLIVAAGVGEFEAGISKNGQTREHALLAYTLGVKQLIVGVNKMDSTEPPYSQKRYEEIVKEVSTYIKKIGYNPDTVAFVPISGWNGDNMLEPSANMPWFKGWKVTRKDGNASGTTLLEALDCILPPTRPTDKPLRLPLQDVYKIGGIGTVPVGRVETGVLKPGMVVTFAPVNVTTEVKSVEMHHEALSEALPGDNVGFNVKNVSVKDVRRGNVAGDSKNDPPMEAAGFTAQVIILNHPGQISAGYAPVLDCHTAHIACKFAELKEKIDRRSGKKLEDGPKFLKSGDAAIVDMVPGKPMCVESFSDYPPLGRFAVRDMRQTVAVGVIKAVDKKAAGAGKVTKSAQKAQKAK
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -w

There are no qualifying crystal PDB structures for ENSP00000339063
#####
Beginning Setup for AWSEM Calculations of PF01187 Pseudogene-Parent Protein Pair
#####
/Users/hanajaafari/Desktop/Time-Devolution-Calc/AWSEM_Energy_Calculations/06_24_2020_Global_AWSEM_Energy_Calculation_Search_Complete_Coverage_Translated_Pseudogene_Candidates
#####
Blasting ENSP00000385714 to Find Its Complete Structures
#####
The protein sequence is MQLFLLAVSVPLPAMPFLELDTNLPANRVPAGLEKRLCAAAASILGKPADRVNVTVRPGLAMALSGSTEPCAQLSISSIGVVGTAEDNRSHSAHFFEFLTKELALGQDRILIRFFPLESWQIGKIGTVMTFL
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000385714_sequence.fasta
homologs: ['6c5f_C', '117', '239', '611', '1.53e-82', '100.000', '89']
homologs: ['6c5f_B', '117', '239', '611', '1.53e-82', '100.000', '89']


#####
Blasting ENSP00000265333 to Find Its Complete Structures
#####
The protein sequence is MAVPPTYADLGKSARDVFTKGYGFGLIKLDLKTKSENGLEFTSSGSANTETTKVTGSLETKYRWTEYGLTFTEKWNTDNTLGTEITVEDQLARGLKLTFDSSFSPNTGKKNAKIKTGYKREHINLGCDMDFDIAGPSIRGALVLGYEGWLAGYQMNFETAKSRVTQSNFAVGYKTDEFQLHTNVNDGTEFGGSIYQKVNKKLETAVNLAWTAGNSNTRFGIAAKYQIDPDACFSAKVNNSSLIGLGYTQTLKPGIKLTLSALLDGKNVNAGGHKLGLGLEFQA
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000265333_sequence.fasta
homologs: ['5xdo_B', '295', '578', '1490', '0.0', '100.000', '100']
homologs: ['5xdo_A', '295', '578', '1490', '0.0', '100.000', '100']
homologs: ['5xdn_B', '295', '578', '1490', '0.0', '100.000', '100']
homologs: ['5xdn_A', '295', '578', '1490', '0.0', '100.000', '100']
homologs: ['6tir_A', '291', '578', '1489', '0.0', '100

There are no qualifying crystal PDB structures for ENSP00000424547
#####
Beginning Setup for AWSEM Calculations of PF01283 Pseudogene-Parent Protein Pair
#####
/Users/hanajaafari/Desktop/Time-Devolution-Calc/AWSEM_Energy_Calculations/06_24_2020_Global_AWSEM_Energy_Calculation_Search_Complete_Coverage_Translated_Pseudogene_Candidates
#####
Blasting ENSP00000348849 to Find Its Complete Structures
#####
The protein sequence is MTKKRRNNGRAKKGRGHVQPIRCTNCARCVPKDKAIKKFVIRNIVEAAAVRDISEASVFDAYVLPKLYVKLHYCVSCAIHSKVVRNRSREARKDRTPPPRFRPAGAAPRPPPKPM
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000348849_sequence.fasta
homologs: ['6y57_Sa', '115', '230', '586', '5.65e-79', '100.000', '100']
homologs: ['6y2l_Sa', '115', '230', '586', '5.65e-79', '100.000', '100']
homologs: ['6

There are no qualifying crystal PDB structures for ENSP00000348849
#####
Beginning Setup for AWSEM Calculations of PF00615 Pseudogene-Parent Protein Pair
#####
/Users/hanajaafari/Desktop/Time-Devolution-Calc/AWSEM_Energy_Calculations/06_24_2020_Global_AWSEM_Energy_Calculation_Search_Complete_Coverage_Translated_Pseudogene_Candidates
#####
Blasting ENSP00000426255 to Find Its Complete Structures
#####
The protein sequence is MDKALKEVFDYSYRDYILSWYGNLSRDEGQLYHLLLEDFWEIARQLHHRLSHVDVVKVVCNDVVRTLLTHFCDLKAANARHEEQPRPFVLHACLRNSDDEVRFLQTCSRVLVFCLLPSKDVQSLSLRIMLAEILTTKVLKPVVELLSNPDYINQMLLAQLAYREQMNEHHKRAYTYAPSYEDFIKLINSNSDVEFLKQLRYQIVVEIIQATTISSFPQLKRHKGKETAAMKADLLRARNMKRYINQLTVAKKQCEKRIRILGGPAYDQQEDGALDEGEGPQSQKILQFEDILANTFYREHFGMYMERMDKRALISFWESVEHLKNANKNEIPQLVGEIYQNFFVESKEISVEKSLYKEIQQCLVGNKGIEVFYKIQEDVYETLKDRYYPSFIVSDLYEKLLIKEEEKHASQMISNKDEMGPRDEAGEEAVDDGTNQINEQASFAVNKLRELNEKLEYKRQALNSIQNAPKPDKKIVSKLKDEIILIEKERTDLQLHMARTDWWCENLGMWKASITSGEVTEENGEQLPCYFVMVSLQEVGGVETKNWTVPRRLSEFQNLHRKLSECVPSLKK

homologs: ['5woe_A', '125', '256', '655', '2.45e-80', '100.000', '15']
homologs: ['5xdz_A', '121', '184', '468', '9.65e-54', '71.429', '14']
homologs: ['5xdz_B', '122', '184', '468', '1.09e-53', '71.429', '14']
homologs: ['4pqp_D', '128', '53.1', '126', '2.95e-07', '31.250', '14']
homologs: ['4pqp_C', '128', '53.1', '126', '2.95e-07', '31.250', '14']
homologs: ['4pqp_B', '128', '53.1', '126', '2.95e-07', '31.250', '14']
homologs: ['4pqp_A', '128', '53.1', '126', '2.95e-07', '31.250', '14']
homologs: ['4pqo_A', '128', '53.1', '126', '2.95e-07', '31.250', '14']
homologs: ['4bgj_A', '147', '53.1', '126', '4.16e-07', '30.147', '15']
homologs: ['2a72_B', '146', '51.2', '121', '1.79e-06', '28.906', '15']
homologs: ['2a72_A', '146', '51.2', '121', '1.79e-06', '28.906', '15']
homologs: ['6n9g_B', '469', '54.3', '129', '2.41e-06', '29.104', '15']
homologs: ['6n9g_A', '469', '54.3', '129', '2.41e-06', '29.104', '15']
homologs: ['2d9j_A', '139', '50.4', '119', '3.01e-06', '30.435', '13']
homologs

There are no qualifying crystal PDB structures for ENSP00000378081
#####
Beginning Setup for AWSEM Calculations of PF01248 Pseudogene-Parent Protein Pair
#####
/Users/hanajaafari/Desktop/Time-Devolution-Calc/AWSEM_Energy_Calculations/06_24_2020_Global_AWSEM_Energy_Calculation_Search_Complete_Coverage_Translated_Pseudogene_Candidates
#####
Blasting ENSP00000230050 to Find Its Complete Structures
#####
The protein sequence is MAEEGIAAGGVMDVNTALQEVLKTALIHDGLARGIREAAKALDKRQAHLCVLASNCDEPMYVKLVEALCAEHQINLIKVDDNKKLGEWVGLCKIDREGKPRKVVGCSCVVVKDYGKESQAKDVIEEYFKCKK
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000230050_sequence.fasta
homologs: ['6yam_O', '132', '265', '678', '1.91e-92', '100.000', '100']
homologs: ['6yal_O', '132', '265', '678', '1.91e-92', '100.000', '100'

Run was successful
The crystal structure of ENSP00000230050 is: 4KZZ_M
#####
Beginning Setup for AWSEM Calculations of PF00723 Pseudogene-Parent Protein Pair
#####
/Users/hanajaafari/Desktop/Time-Devolution-Calc/AWSEM_Energy_Calculations/06_24_2020_Global_AWSEM_Energy_Calculation_Search_Complete_Coverage_Translated_Pseudogene_Candidates
#####
Blasting ENSP00000362646 to Find Its Complete Structures
#####
The protein sequence is MRSRSNSGVRLDGYARLVQQTILCHQNPVTGLLPASYDQKDAWVRDNVYSILAVWGLGLAYRKNADRDEDKAKAYELEQSVVKLMRGLLHCMIRQVDKVESFKYSQSTKDSLHAKYNTKTCATVVGDDQWGHLQLDATSVYLLFLAQMTASGLHIIHSLDEVNFIQNLVFYIEAAYKTADFGIWERGDKTNQGISELNASSVGMAKAALEALDELDLFGVKGGPQSVIHVLADEVQHCQSILNSLLPRASTSKEVDASLLSVVSFPAFAVEDSQLVELTKQEIITKLQGRYGCCRFLRDGYKTPKEDPNRLYYEPAELKLFENIECEWPLFWTYFILDGVFSGNAEQVQEYKEALEAVLIKGKNGVPLLPELYSVPPDRVDEEYQNPHTVDRVPMGKLPHMWGQSLYILGSLMAEGFLAPGEIDPLNRRFSTVPKPDVVVQVSILAETEEIKTILKDKGIYVETIAEVYPIRVQPARILSHIYSSLGCNNRMKLSGRPYRHMGVLGTSKLYDIRKTIFTFTPQFIDQQQFYLALDNKMIVEMLRTDLSYLCSRWRMTGQPTITFPISH

#####
Blasting ENSP00000312158 to Find Its Complete Structures
#####
The protein sequence is MESAAALHFSRPASLLLLLLSLCALVSAQFIVVGPTDPILATVGENTTLRCHLSPEKNAEDMEVRWFRSQFSPAVFVYKGGRERTEEQMEEYRGRTTFVSKDISRGSVALVIHNITAQENGTYRCYFQEGRSYDEAILHLVVAGLGSKPLISMRGHEDGGIRLECISRGWYPKPLTVWRDPYGGVAPALKEVSMPDADGLFMVTTAVIIRDKSVRNMSCSINNTLLGQKKESVIFIPESFMPSVSPCAVALPIIVVILMIPIAVCIYWINKLQKEKKILSGEKEFERETREIALKELEKERVQKEEELQVKEKLQEELRWRRTFLHAVDVVLDPDTAHPDLFLSEDRRSVRRCPFRHLGESVPDNPERFDSQPCVLGRESFASGKHYWEVEVENVIEWTVGVCRDSVERKGEVLLIPQNGFWTLEMHKGQYRAVSSPDRILPLKESLCRVGVFLDYEAGDVSFYNMRDRSHIYTCPRSAFSVPVRPFFRLGCEDSPIFICPALTGANGVTVPEEGLTLHRVGTHQSL
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000312158_sequence.fasta
homologs: ['4hh8_A', '224', '218', '556', '5.12e-67', '50.704', '40']
homologs: ['

#####
Blasting ENSP00000243253 to Find Its Complete Structures
#####
The protein sequence is MAIKFLEVIKPFCVILPEIQKPERKIQFKEKVLWTAITLFIFLVCCQIPLFGIMSSDSADPFYWMRVILASNRGTLMELGISPIVTSGLIMQLLAGAKIIEVGDTPKDRALFNGAQKLFGMIITIGQSIVYVMTGMYGDPSEMGAGICLLITIQLFVAGLIVLLLDELLQKGYGLGSGISLFIATNICETIVWKAFSPTTVNTGRGMEFEGAIIALFHLLATRTDKVRALREAFYRQNLPNLMNLIATIFVFAVVIYFQGFRVDLPIKSARYRGQYNTYPIKLFYTSNIPIILQSALVSNLYVISQMLSARFSGNLLVSLLGTWSDTSSGGPARAYPVGGLCYYLSPPESFGSVLEDPVHAVVYIVFMLGSCAFFSKTWIEVSGSSAKDVAKQLKEQQMVMRGHRETSMVHELNRYIPTAAAFGGLCIGALSVLADFLGAIGSGTGILLAVTIIYQYFEIFVKEQSEVGSMGALLF
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000243253_sequence.fasta
homologs: ['4cg7_A', '476', '966', '2496', '0.0', '99.790', '100']
homologs: ['4cg6_A', '476', '966', '2496', '0.0', '99.790', '100']

There are no qualifying crystal PDB structures for ENSP00000378163
#####
Beginning Setup for AWSEM Calculations of PF06229 Pseudogene-Parent Protein Pair
#####
/Users/hanajaafari/Desktop/Time-Devolution-Calc/AWSEM_Energy_Calculations/06_24_2020_Global_AWSEM_Energy_Calculation_Search_Complete_Coverage_Translated_Pseudogene_Candidates
#####
Blasting ENSP00000226798 to Find Its Complete Structures
#####
The protein sequence is MAEYSYVKSTKLVLKGTKTKSKKKKSKDKKRKREEDEETQLDIVGIWWTVTNFGEISGTIAIEMDKGTYIHALDNGLFTLGAPHKEVDEGPSPPEQFTAVKLSDSRIALKSGYGKYLGINSDGLVVGRSDAIGPREQWEPVFQNGKMALLASNSCFIRCNEAGDIEAKSKTAGEEEMIKIRSCAERETKKKDDIPEEDKGNVKQCEINYVKKFQSFQDHKLKISKEDSKILKKARKDGFLHETLLDRRAKLKADRYCK
finding homologs
executing::: /Users/hanajaafari/Desktop/ncbi-blast-2.10.0+/bin/psiblast -num_iterations 1 -word_size 3 -evalue 0.005 -outfmt '6 sseqid slen bitscore score evalue pident qcovs' -matrix BLOSUM62 -db /Users/hanajaafari/Desktop/openawsem/pdbDB -query ./ENSP00000226798_sequence.fasta
homologs: ['2yug

## 14 protein families have a parent protein with a crystal structure. 

One of the families, PF00406, is the protein family of adenylate kinase, which monitors the levels of and converts between ATP, ADP, and AMP. It converts through its open lid, which it closes after binding ATP and AMP to push out water and increase efficacy of reaction. The parent protein, ENSP00000371230 (also associated with PF05191), is associated with the pseudogene transcript ENST00000432033. Below we retrieve the pseudogene sequence and analyze it with EMBOSS Transeq. 

In [15]:
server = "https://rest.ensembl.org/sequence/id/ENST00000432033?"
r = requests.get(server, headers={ "Content-Type" : "text/plain"})
pseudogene_sequence=r.text
pseudogene_sequence

'GAAGCACCTCTCTAGCGGAGACCTGCTCCAGGATCAACATGCTGCGGGGCACAGAAATTAATGTGTTAGCTAACGCTTTCATTGACCAAAGGAAACTCATCCGAGATGATGTCATGACTCAGCTGGCCCTATATGAGCTGAAAAATCTCACTTAGTATAGCTAGCTGTTGGGTGGTTTTCCATGGACACTTCCACAGGCAGAAGCCCTAGACTAAGCTTCTTAGACAGACAAAGTGATTAGCCCGAATGTGCCCTTTGAGGTCATTAAACAATGCCTTACTGCTCGCTAGATTCATCCTGCCAGTGGCTGAGTCTACAACATTGAATTCAACCCTCCCAAAACTGTGGGCACTGATGATCTGACTGGGGATCCTCTCATTCAGCATGAGGATGATAAACCAGAGACGGTTATCAAAAGACTAAAGGCTTATGAAGCCCAAACAAAGCCAGTCCTGGAATATTACCAAGCTTTTCTACAAACTAAAGTTCCACAAAGAAGCCAGAAAGCTGCCATTACTC'

The retrieved sequence above, when translated, shows many premature stop codons (represented by asterisks). The stop codons can be seen when clicking this cell.

EAPL\*RRPAPGSTCCGAQKLMC\*LTLSLTKGNSSEMMS\*LSWPYMS\*KISLSIASCWVVF
HGHFHRQKP\*TKLLRQTK\*LARMCPLRSLNNALLLARFILPVAESTTLNSTLPKLWALMI
\*LGILSFSMRMINQRRLSKD\*RLMKPKQSQSWNITKLFYKLKFHKEARKLPLL

Families PF00571 and PF00478 have the parent protein ENSP00000321584, with pseudogene transcript ENST00000440965. Like above we can analyze this pseudogene transcript:

In [16]:
server = "https://rest.ensembl.org/sequence/id/ENST00000440965?"
r = requests.get(server, headers={ "Content-Type" : "text/plain"})
pseudogene_sequence=r.text
pseudogene_sequence

'ATGAGGGACTACCTATCAGCAGCAGCACCGGCTACGTGGTCGAGGACGGGTTCACTGCGCTGAAGGGGTGGGTTGCCCCTCCACACCTGTGGGTTTTTCTCGTTAGGTGGAACGAGAGACTTGGAAAAGAAAGACACAGACGCAAAGTATAGAGAAAGAAAAAAGGGGGCCCAGGGGACCGGCGTTCAGCATACGGAGGATCCCGCCGGCCTCTTAGTTCCCTTAGTATTTATTGATCATTTTTAGGTGTTTCTCGGAGAGGGGGGTGTGGCAGGGTCATAGAATAATAGTGGAGAGAAGGTCAGCAGATAAACACGTGAACAAAGGTCTCTGCATCAAGAACAAGGTAAAGAATTAAGTGCTGTGCTTCAGATATGCATACACATAAACATCTCAATGACTTAAAGAGCAGTAGTGCTGCCCGCATGTCCCACCTCCAGCCCTAAGGCGGTTTTCCCCTATCTCAGCAGATGGAACATACAATCAGGTTTTACACCGAGACATTCCATTGCCCAGGGACTGGCAGGAGACAGATGCCTTCCTCTTCTCTCAACTGCAAACAGGCGTTCCTTCCTCTTTTACTAATCCTCCTCAGCACAGACCCTTTATGGGTGTCGGGCTGGGGGACGGTCAGGTCTTTCCCTTCCCATGAGGCCATATTTCAGACTATCACATGGGGAGAAACCTTGGACAATACCTGGCTTTCCTAGGCAGAGGTCCCTGCGGCCTTCCGCAGTGTTTGTGTCCCTGGGTACTTGAGATTAGGGAGTGGTGATGACTCTTAACGAGCACGCTGCCTTCAAGCATCTGTTTAACAAAGCACATCTTGCACAGCCCTTAATCCACTTAACCCTGAGTTGACATAGCACATGTTTCAGGGAGCACAGGGTTGGGGGTAAGGTTACAGATTAACAGCATCTCGAGGCAGAAGAATTTTTCTTAGTACAGAACAAAATGGAGTCTCCTATGTCTACTTCTTTCTACACAGACACAGTAACA

Like above the pseudogene, when translated, has many stop codons:
MRDYLSAAAPATWSRTGSLR\*RGGLPLHTCGFFSLGGTRDLEKKDTDAKYRERKKGAQGT
GVQHTEDPAGLLVPLVFIDHF\*VFLGEGGVAGS\*NNSGEKVSR\*TREQRSLHQEQGKELS
AVLQICIHINISMT\*RAVVLPACPTSSPKAVFPYLSRWNIQSGFTPRHSIAQGLAGDRCL
PLLSTANRRSFLFY\*SSSAQTLYGCRAGGRSGLSLPMRPYFRLSHGEKPWTIPGFPRQRS
LRPSAVFVSLGT\*D\*GVVMTLNEHAAFKHLFNKAHLAQPLIHLTLS\*HSTCFREHRVGGK
VTD\*QHLEAEEFFLVQNKMESPMSTSFYTDTVTI\*SLFLFPTPCSSSSPVTRDSPTTTS\*
FSQDS\*TS\*LMRWT\*PQP\*PNGHSEDAADLLPHGHCDRGRPGHRDGSDGRYWFHSPQLHP
RVPGQ\*GAEGQEV\*TGLYHTPRGAEPLAHCG\*CVGGQDASWLLWHPHH\*DGYHGQQAGGH
RHLPRHRLSC\*EGPHHPPQ\*GDDAKDQASGGSSSVRLKEANEILQLSKKGSCLSSMIAMS
WWPLSPAPR\*RTETTLWPPRIPMSSCWAGQLWVPMRMTNTTWTCSPR\*ASMS\*AWTRPKG
TRCIRSPWCITSNKSNPTSR\*LGGTW\*QQPRPTT\*LTLVWMGWAGAWTARLHLHQPGSDS
LQSAPGHCCVQGGQAYPELWCAHHSRWWHPDHGACGQGPGPRSLHSDDGLPAGRHHGGPR
RVLLLRRNAAQEVPGHGLTGCHGEEQQQPETILQRRG\*GEDHAGCLGLHPGQRVHSEVRA
LPHSGHPAWLPGYRGPQPVCPSVHDVLRGAQV\*EADHVSPDRRWHPWPALLREVAVLX

## Iterating over other parent proteins from list of 14

In [42]:
parents=[]
with open("../../protein_family_parent_protein_crystal_structures.txt","r") as f:
    for line in f:
        parent_identifier=line.split()[1]
        parents.append(parent_identifier)
parents=(set(parents))
parents.remove("ENSP00000321584");parents.remove("ENSP00000371230")
parents

{'ENSP00000230050',
 'ENSP00000235835',
 'ENSP00000265333',
 'ENSP00000273550',
 'ENSP00000353741',
 'ENSP00000389624',
 'ENSP00000429419',
 'ENSP00000432884'}

### Protein Family: PF01248 (Ribosomal S12/Ribsomal L30), Parent Protein: ENSP00000230050, Pseudogene Transcript: ENST00000451515

In [35]:
server = "https://rest.ensembl.org/sequence/id/ENST00000451515?"
r = requests.get(server, headers={ "Content-Type" : "text/plain"})
pseudogene_sequence=r.text
pseudogene_sequence

'CATGGCCGAGGAAGGCAGTGCTGCTGGAGGTGTAATGGACATTAATACTGTTTTACAGGAGGTGCTGAAGACCGCCCTCATCCATGATGGCCTAGCATATGAAATTTGCAAAGCTGCCAAAGCCTCAGACAAGTGCCAAGCCCATCTTTGTGTGCTGTGTGTGCTTGCATCCAACTGTGATGAGCCTATGTATGTCAAGTTGGTGGAGGCCCTTTGTGCTGAACACCAAATCAACCTAATTAAGGTTGATGACCAGAAACTAGGGGAATCGGTAGGCCTCTGTAAAACTGACAGAGAGGGGAAACCGTGTAAAGTGGTTGGTTGAAGTTGTATAGTAGTTACGAACTATGGCAAGGAGTCTCAGGCCAAGGATGTCATTGAAGAGTACTTCAAATGCAAGA'

Pseudogene Translated:
HGRGRQCCWRCNGH\*YCFTGGAEDRPHP\*WPSI\*NLQSCQSLRQVPSPSLCAVCACIQL\*
\*AYVCQVGGGPLC\*TPNQPN\*G\*\*PETRGIGRPL\*N\*QRGETV\*SGWLKLYSSYELWQGV
SGQGCH\*RVLQMQX

### Protein Family: PF00248 (Aldo/Keto Reductase), Parent Protein: ENSP00000235835, Pseudogene Transcript: ENST00000460134

In [36]:
server = "https://rest.ensembl.org/sequence/id/ENST00000460134?"
r = requests.get(server, headers={ "Content-Type" : "text/plain"})
pseudogene_sequence=r.text
pseudogene_sequence

'ATGTCCCGGCCCTCCACGGTGTTGGGTGCCATGGAGATGGGGCGCCGTAGTCACCCGCGCCTTCCTGGAGCACGGCCACACCAAGATAGACACGGCCTTCGTGTACAGCGACAGCCAGTCCAAGACCATCCTGGGTGGCCTGAAAATTGATACCAAGGCCAATCCATTGTTTGGGAACTCCCTGAAACCTGACAGTCTCCGGTTCCAGCTGGAGACGTCACTGAAGTGGCTGCAGTGTCCCCGAGTGGACCTCTTCTACCTGCTCAGGCCAGACCACAGCACCCCGGTGGAAGAGACACTGAGTGCCTGCCACCAGCTGCACCAGGAGGGCAAGTTCCTGGATCTTGGCCTCTCCAACTATGCCACCTGGGAAGTGGCCAAGATCTGTAACCTCTGCAAGAGCAATGGCTGGACCCTGCCCACTGTGTACCAGGGCATGTACAACGCCACCACCCAGCAGGTGAAAACGGAGCTCTTCCCCTGCCTCAGGCACTCTGGACTGAGGTTCTATGCCTTCAACCCTCTGGCTGGGGACCTGCTGACTGGCAAGTACAAGTATAAGGACAAGGACAGGAAACAGCCCATGGGCCGCTTCTTTGGGACTCAATGGGCAGAGATCTACAGGAATCGCTTCTGGAAGAAGCACCACTTCGATGGCATTGCCCTGGTAGAGAAGGCCCTGCAGGCCGCGTATGGCGCCAGCGCCCCCAGCATGACCTCGGCTGCCCTCTGGTGGATGTACCACCACTCACAGCTGCAGGGTGCCCACGGGGACGCAGTCATCCTGGGCATGTCCAGCTTGGAGCAGTTGGAGCAGAACTTGGCAGCGGCAGAGGAAGGGCCCCTGGAGCCAGATGTTGTGGAGGCCTTTAATCAAGCCTGGCAATTGGTTGCTCACGAATGTCCCAACTACTT'

Pseudogene Translated: MSRPSTVLGAMEMGRRSHPRLPGARPHQDRHGLRVQRQPVQDHPGWPEN\*YQGQSIVWEL
PET\*QSPVPAGDVTEVAAVSPSGPLLPAQARPQHPGGRDTECLPPAAPGGQVPGSWPLQL
CHLGSGQDL\*PLQEQWLDPAHCVPGHVQRHHPAGENGALPLPQALWTEVLCLQPSGWGPA
DWQVQV\*GQGQETAHGPLLWDSMGRDLQESLLEEAPLRWHCPGREGPAGRVWRQRPQHDL
GCPLVDVPPLTAAGCPRGRSHPGHVQLGAVGAELGSGRGRAPGARCCGGL\*SSLAIGCSR
MSQLL

### Protein Family: PF01459 (Eukaryotic Porin), Parent Protein: ENSP00000265333, Pseudogene Transcript: ENST00000524809

In [38]:
server = "https://rest.ensembl.org/sequence/id/ENST00000524809?"
r = requests.get(server, headers={ "Content-Type" : "text/plain"})
pseudogene_sequence=r.text
pseudogene_sequence

'ATGGCTGTGCCACCCAAGTATGCCAATCTTGGCAAATCTGCCAGGAATGTCTTCAACAAGGGCTACGGATATGGCCTAATAAACCTTTTGAAAACAAAATCTGAGAATGCATTGGAATTTGAAAGCTTAGGCTCAGCCAACACTGAAATCACCAAAGTGACAGGCAGTCATGGAGACCAAGTACAGATGGACTGAGTACGGCCTGATGTTTATGAAGTGAAACACACAACACACCAGGACTGAGATTACTGTGGAAGATCATCTTGCAAGTGGACTGAAGCTGACCCTCAATTCACCCTTGTCACCTAATACTGGGGGAAAAATGCTAAAATCAACACCAGGTACAAGCAGGAGCACATCAACTTGGGCTGTAACCTGGATTTTGACATCACTGGACCTTCCATCTGGGGCGCTCTGGTGCTGGGTTACAAGGGTTGACTGGCTGGCTACCAGATAAATTTTGAGACTGCAAAATCCTGAGTGACCCAGAGCAACTCTGCAGTTGGTTACAAGTCTGATGAATTCCAACTTCACACTAACGTAAATAATGGGACAGTTTGGTGGCTCCATTTACCAGAAGGTGAGTAAGAAGTTGGAGACCACTGTCAATTTCACCTGGACAGCAGGAAACAGTAACACTAATACGGTTTGTCTGTGTCCCCACCCAAATCTAATCTTGAATTGTAGCTCCCATAATTCCCACGTGTGTGGGACTGACCTGGTGGGAGATAATTGAATCATGGGGGCAGTTTCCCCATACTGTTCTCATGGTAGTGAATGTCTCACGGGATCTGATGGTTTTATAAGGGGAAACTCCTTTCACTTGGCTTTCATTTTCTCTCCCGCCTGCCACCATGTAAGATGTGGCTTTCGCCTTCCCCCACAATGGTGAGGCCTCTCCAGCCACGTGGAACTGTGTGAGCCCATTAAACCTTTTCCTTTATAAATTACCCAGTCCCAAGGTGTATCTTTACTAGCAGTGTGAAAACAGACTAATAC

Pseudogene Translated: MAVPPKYANLGKSARNVFNKGYGYGLINLLKTKSENALEFESLGSANTEITKVTGSHGDQ
VQMD\*VRPDVYEVKHTTHQD\*DYCGRSSCKWTEADPQFTLVT\*YWGKNAKINTRYKQEHI
NLGCNLDFDITGPSIWGALVLGYKG\*LAGYQINFETAKS\*VTQSNSAVGYKSDEFQLHTN
VNNGTVWWLHLPEGE\*EVGDHCQFHLDSRKQ\*H\*YGLSVSPPKSNLEL\*LP\*FPRVWD\*P
GGR\*LNHGGSFPILFSW\*\*MSHGI\*WFYKGKLLSLGFHFLSRLPPCKMWLSPSPTMVRPL
QPRGTV\*AH\*TFSFINYPVPRCIFTSSVKTD\*YKHSLWNTQVSDQP\*HLLLG\*VNNSSLT
GLEYTETLKPGIKMTLSALLDGKNNNASGYTLGLELKX

### Protein Family: PF00210 (Ferritin), Parent Protein: ENSP00000273550, Pseudogene Transcript: ENST00000437933

In [39]:
server = "https://rest.ensembl.org/sequence/id/ENST00000437933?"
r = requests.get(server, headers={ "Content-Type" : "text/plain"})
pseudogene_sequence=r.text
pseudogene_sequence

'ATGACGACTGCGCGTCCCCCTCGCAGGTGCGCCAGAACTACCGCCATCAACCGCCAGATCAACGTGGAGCTCTATGCCTCCTACATCTACCTGTCCATGTCTTACTACTTTGACCACAATGATGTGGCTTTGAAGAACTTTGCCAAATACTTTCTTCACCAATCTCATGAGGAGAGGGAACATGCCAAGAAACTGATGAAGCTGCAGAACCAATGAGGTGGCTGAATCTTCTTCCTTCGGGACTTCAAGAAACCAGACCATGATGACTGGGAGAGCAGGCTGAATGCGATGGAGCATGCATTACATTTGGAAAAACTTGGGAATCAGTCACTACTGGAACTGCACAAACTGGCCACCGACAAAATTACCCCCATTTGTGTGACTTCACTGAAACACATTACCTGAATGAGCAGGGGAAATCCATCAAGAATTGGGTGACCATGTGACCAACTTGCGCAAGATGAGGGCACCCGAATCTGGCTTGGCAGAATATTTCTCTGACAAGCACACCCCGGGGAGACAGTGATAATGA'

Pseudogene Translated: MTTARPPRRCARTTAINRQINVELYASYIYLSMSYYFDHNDVALKNFAKYFLHQSHEERE
HAKKLMKLQNQ\*GG\*IFFLRDFKKPDHDDWESRLNAMEHALHLEKLGNQSLLELHKLATD
KITPICVTSLKHIT\*MSRGNPSRIG\*PCDQLAQDEGTRIWLGRIFL\*QAHPGETVIMX

### Protein Family: PF03465 (eRF1 domain 3), PF03464 (eRF1 domain 2), PF03463 (eRF1 domain 1); Parent Protein: ENSP00000353741; Pseudogene Transcript: ENST00000478788

In [41]:
server = "https://rest.ensembl.org/sequence/id/ENST00000478788?"
r = requests.get(server, headers={ "Content-Type" : "text/plain"})
pseudogene_sequence=r.text
pseudogene_sequence

'ATGGTGGACGACCCCAGTGCTGTGGACAGGAACGTGGAGTTCTGGAAGATCAAGAAGCTCATTAAGAGCTTGGAGGCGGGCTGGGCGCCGTGGCTCACGCCTGTAATCCCAGCACTTTGAGGCCGAGGCGGGCAGATCACCTGAGGTCAGGAGTTCAAGACCTGCCTGGCCAATATGGTGAAACCCTGTCTCTACTAAAAATACAAAAATTAGCTGGGTGTGGTGGCACATGCCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAGAAGAATCTTGTGAACCCGGGAGGCTTATGTGGCAGTGAGGCAAGATCACGCCACTGTACTCCAGCCTGGGCGACAGAGCTGTCTCAAAAAAAAAAAAAAAAAAAGGAGCTTGGAGGCAGCCTGCAGCAATGGCACTAGCATGATATCACTGATCATTCCTCCCAAAGACCAGATTTCACCAGTGGCAAAAATGTTATCAGATGAGTTTGGAACTGCATCTAACATTAAGGCAGGAGTAAACCGCCATCCAGTCCTGGGAGCCATTACATCTATACAACAAAGGCTCAAACTTTATAACAAAGTACCTCCAGATGGTCTGGTTGTTTACTGTGGAACAACTGTAACAGAAGAAGGAAAGGAAAAGAAAGTCAACGTTGACTTTGAATCTTTCAAACCAATTAATACGTCATTGTATTGTGTGACAACAAATTCCATAAAGGGGCTCTTACAGCACTACTTTCAGATGACAGCAAGTTTGGATTCATTGTAATAGATGGTAGTGGTGCACTTTTTGGCACACTCCAAGGAAACACAAGAGAAGTCCTGCACAAATTCACTGTGGATCTCCCAAAGAAACACAGTAGAGGCGTTTTGCCCGTTTAAAAATGGAAAAGCGACATAACTATGTTCAGAAAGTAGCAGAGACTGCTGTGCAGCTATTTCTGGGGACAAAGTGAATGTGGCTGGTCTAGTTTTAGCTGGATCTGCTGACTTTGAACTAAGTCAATCTGAT

Pseudogene Translated: MVDDPSAVDRNVEFWKIKKLIKSLEAGWAPWLTPVIPAL\*GRGGQIT\*GQEFKTCLANMV
KPCLY\*KYKN\*LGVVAHACSPSYLGG\*GRRIL\*TREAYVAVRQDHATVLQPGRQSCLKKK
KKKRSLEAACSNGTSMISLIIPPKDQISPVAKMLSDEFGTASNIKAGVNRHPVLGAITSI
QQRLKLYNKVPPDGLVVYCGTTVTEEGKEKKVNVDFESFKPINTSLYCVTTNSIKGLLQH
YFQMTASLDSL\*\*MVVVHFLAHSKETQEKSCTNSLWISQRNTVEAFCPFKNGKAT\*LCSE
SSRDCCAAISGDKVNVAGLVLAGSADFELSQSDMFDQRLQSKVLKLVDISYGGENGFNQA
TELSTEVLSEVKFIQEKKSIERYFDEITQDTRKYCFGVEDTLKTLEKGAVEILMVYENLD
IMRYVLYCQGTEKEKIL\*LQSKKRINLISQTKRPDRNMSLSRACPCWNGLLTTIKNLELC
WK\*SQIKCKTGLGL\*KDLVELEVSCSTE\*ISREWNTKEETMNFLTLMTX

### Protein Family: PF01652 (Initiation Factor 4E), Parent Protein: ENSP00000389624, Pseudogene Transcript: ENST00000422311

In [43]:
server = "https://rest.ensembl.org/sequence/id/ENST00000422311?"
r = requests.get(server, headers={ "Content-Type" : "text/plain"})
pseudogene_sequence=r.text
pseudogene_sequence

'ACTGTCAAACCAGAAACCACCCCTACTCCTAATCCCCCTGTGAAGAGGAGAAAACAGAATCTAATCAGGAGGTTGCTAACCCAGAACACTATATTAAACATCCTCTACAGAACAGATGGGCACTCTGGTTTTTTTAAAAAAGATAAAAGCAAAACTTGACAAGCAAACCTGCCGCTGATCTCTAAGTTTGATCCTGTTGACTTTTGGGCTCTGTACAACCATATCCAGTTGTCTAGTAATTTAATGTCTGGCTGTGACTACTCACTTTTTAAGGATGGTGTTGAGCCCATGTGGGAAGATGAGAAAAACAAACAGGGAGGATGATGGCTAATTTACATTGAACAAACAGCAGAGATGAAGGGACCTCAATCACTTTTGGCTAGAGACTCTACTGTGTCTTATTGGAGAATCTTTTGATGACTCAGTGATGATATATGTGCAGCTGTTCTTAATGTTAGAGCTAAAGGTGATAAGATAGCAGTATGGACTACTGAATGTGAAAACAGAGAAGCTGTTACACATATAGAGAGGGTATACAAGGAAAGGTTAGGACTTCCTCCAAACATCGTGATTGGTTATCAGTCCCATGCAGACACAGCTACTAAAAACAGCTCCACCACTAAAAATAGGTTTGTTGTTTAA'

Pseudogene Translated: TVKPETTPTPNPPVKRRKQNLIRRLLTQNTILNILYRTDGHSGFFKKDKSKT\*QANLPLI
SKFDPVDFWALYNHIQLSSNLMSGCDYSLFKDGVEPMWEDEKNKQGG\*WLIYIEQTAEMK
GPQSLLARDSTVSYWRIF\*\*LSDDICAAVLNVRAKGDKIAVWTTECENREAVTHIERVYK
ERLGLPPNIVIGYQSHADTATKNSSTTKNRFVV\*

### Protein Family: PF00179 (Ubiquitin Conjugating Enzyme), Parent Protein: ENSP00000429419, Pseudogene Transcript: ENST00000433562

In [44]:
server = "https://rest.ensembl.org/sequence/id/ENST00000433562?"
r = requests.get(server, headers={ "Content-Type" : "text/plain"})
pseudogene_sequence=r.text
pseudogene_sequence

'CATGATGACATCACTCCAGATGGTATCTTCCAGTTCTCAGGTCAAAACCTTTGAGTTATGCTTATGCAGCTATTGATAGGCAATGGTATGATTAACTGGGGCCTTAAAGATGATGAAGATATGACATTTACAAGGTGGACAGGCATGATTATTGGGCCACCAAGGACAAATTATGAAAAGAGAATATAGGCCGGGCATGGCAGTTCACACCTATAATCCCAGAACTTTGGGAGGCCAAGGAGGGCAGATCACGAGGTCAGGAGTTTGAGACTAGTCTGCCCAACATAGTGAAACCTCATCTCTACTAAAAATACAAAAAATTAGCTGGGTGTGGTGGTGTGCACCTGTAATCCCAGCTACTCGGGAAGCTGAGGCAGGAGAATCACATGAACCTAGGAAGCAGAGGTTGCAGTGAGCCGAGATCACGCCACTGCGCTCCCGTGTGGGCCACAGCGCGAGCCTCTGTCTCAAAAATAAAAAAAAAGAATATATAGCCTGAAAATAGAAAGTGGACCCAAATACCCAGAAGCTCCCCCATCAGTTAGATTTGTAACAAGAATTAATATGAATGGAATAAATAACTCCAGTGGGATGGTGGATGACCGCAGCACACCAGTGTTAGCAAAATGACAAAATTCATATAGCATTAAAGTTGTACTTCAGGAGCTAAGACGTCTAATGATGTCCAAAGAAAATATGAAGCTTCCACAGCCACTGGAAGGACAAACATACAACA'

Pseudogene Translated: HDDITPDGIFQFSGQNL\*VMLMQLLIGNGMINWGLKDDEDMTFTRWTGMIIGPPRTNYEK
RI\*AGHGSSHL\*SQNFGRPRRADHEVRSLRLVCPT\*\*NLISTKNTKN\*LGVVVCTCNPSY
SGS\*GRRIT\*T\*EAEVAVSRDHATALPCGPQREPLSQK\*KKRIYSLKIESGPKYPEAPPS
VRFVTRINMNGINNSSGMVDDRSTPVLAK\*QNSYSIKVVLQELRRLMMSKENMKLPQPLE
GQTYNX

### Protein Family: PF00012 (HSP70-heat shock proteins), Parent Protein: ENSP00000432884; Pseudogene Transcript: ENST00000401676 (deleted?)

In [45]:
server = "https://rest.ensembl.org/sequence/id/ENST00000401676?"
r = requests.get(server, headers={ "Content-Type" : "text/plain"})
pseudogene_sequence=r.text
pseudogene_sequence

'GCAGTTGTCACTGATCTTGGCACCACCTATTCCTCTGTAGGTGTCTTCCAGCACAGGACAGTAGAGATCATCACCACTCGGCAAGAAAACCAAGCTATGTTGCTTTTAGCTGACACAGAAAGATTGATTAGTGATGGTTTAGGAATCAAATTCAGTGGACTCCATCAACACAGTTTTAGATAACAGAGGTGTAATTAGACACCGATTTGACAGTCCAACTGTCTGGTCAAACATGAAAAGCACTGGACTTTCCTGGTTGTCGACTGTGCACACAGGCTGAAGATCCAGAGGAGGAAAGGGAGGGATACTAAAAGCTTTATACAGTTGAGGTATTTCCTGTGGTTGTGTTAGCTATTGCTGTGTAACAAATTACCCCAAAACTTGGTGGACTGAAACACCACTTACTGGCTGGGCACGGTGGCTCACGCCTGTTACCCCAGCACTTTGGTAGGCCAAGGCAGGCAGGTCACATGAGGCCAGGAGTTCAAGACCAGCCTGGCCAACATGGTGAAACCCCATAAATTAGTCGGGCATGGTGGCATGCACCTGTAGTCCCAAGTAGCTGACTCTGGAGGCTGAGGCATGAGAATCACTTGAACCCAGGAGGCAGAGGTTGGACTGAGCTGAGACTGCACCACTGCACTCCAGCCTGGGTGACAGAGTGAGACTGTCTCAAAAAATAAAAAGAAAAAAGAAACACCACTTACTATTTAAGTTTTTGTGGGTCAGGAATCAGTGCAGCTTAGCTGGGTCCTCTGATTCTGTGTTTCTCACAAGGCTGCAGTCATCTCAGAGATTCACTTCCTAGTTCACTCGTGGTTATTGGCTGGATTTGGTTCCTCTCAGGCTATTAGATTGAGGCCTCAGTTCCTCATAAGCTGATGTCCAGAAGCTTCTCCATAGAGCATCTCACAACATGGCAGTGTGCTTCATCAGATGAACAAGTAGGGGAGTGATATGGTTTAGATCTGTATCCCTGCCCAAATCTTGTGTCGAATG

Pseudogene Translated: AVVTDLGTTYSSVGVFQHRTVEIITTRQENQAMLLLADTERLISDGLGIKFSGLHQHSFR
\*QRCN\*TPI\*QSNCLVKHEKHWTFLVVDCAHRLKIQRRKGRDTKSFIQLRYFLWLC\*LLL
CNKLPQNLVD\*NTTYWLGTVAHACYPSTLVGQGRQVT\*GQEFKTSLANMVKPHKLVGHGG
MHL\*SQVADSGG\*GMRIT\*TQEAEVGLS\*DCTTALQPG\*QSETVSKNKKKKETPLTI\*VF
VGQESVQLSWVL\*FCVSHKAAVISEIHFLVHSWLLAGFGSSQAIRLRPQFLIS\*CPEASP
\*SISQHGSVLHQMNK\*GSDMV\*ICIPAQILCRMVMPVLEVGPGGGVTGSSG\*ISHEWFGT
IPLVLFL\*Q\*VLVRLFKSV\*HLHLPLFALLRFCHVRYLLSLHLPL\*L\*VSGGFPRSQAHA
SIMLPVEQCRTMSQLGLFSL\*ITPSQVFLYSSERLD\*YREQERLSGREC\*\*NRSHSLLQP
NHRSDFCCALFVKNKLGPGTVAHAYNPSPLGG\*DGRVT\*GQEFKTSLVNIVIPPPISLKK
KKN\*KASYGQSWWLTPVIPALREAEVSGSPEVRSSRPAWLTW\*NPVSTKNTKISWAWWWT
PVIPVTWEAEAGKSLEPKGGRGCSEPRSAWMKQQNFVSRKKKASY\*TEPTLEGRG\*HKSE
HQEVGSLWVAS\*KSAYHIGFEKDGENSGNLLYDP\*PTRSSYYQLL\*WLPASGHQSWWNHY
\*SHCVLNCQQAKCRWSQLRRER\*CQI\*\*SDLLPGWHFLSVHPQYSRWNY\*SQVCN\*GYPP
E\*RVP\*QLSGQLFHG\*DQLQL\*KGHPLRTSRLPKPLLPLLNVLFILVSRPI\*D\*VPP\*RG
SHLYLCAHEQLEESKATPVPGTLHLWRKP\*AMSPRHITSPSRVALFISPRFKISYX