In [13]:
import pandas as pd
import tkinter as tk
from tkinter import filedialog

def get_file_path():
    root = tk.Tk()
    root.withdraw()
    return filedialog.askopenfilename(title="Select Excel File", filetypes=[("Excel Files", "*.xlsx *.xls")])

def print_sequences(excel_file):
    """
    Reads an Excel file with 'Entry Name' and 'Sequence' columns,
    then prints out the sequences in the desired format.
    
    Parameters:
    excel_file (str): Path to the Excel file.
    """
    try:
        # Read the Excel file
        df = pd.read_excel(excel_file)
        
        # Ensure required columns exist
        required_columns = ['Entry Name', 'Sequence']
        if not all(col in df.columns for col in required_columns):
            print("Error: The Excel file must contain 'Entry Name' and 'Sequence' columns.")
            return
        
        # Print sequences in the desired format
        for index, row in df.iterrows():
            print(f"# {row['Entry Name']}")
            if index < len(df) - 1:
                print(f"\"{row['Sequence']}\",")
            else:
                print(f"\"{row['Sequence']}\"") # No comma after the last sequence
            
    except FileNotFoundError:
        print("Error: The specified Excel file was not found.")
    except Exception as e:
        print(f"An error occurred: {e}")

def print_entry_names(excel_file):
    try:
        df = pd.read_excel(excel_file)
        if 'Entry Name' in df.columns:
            names = [f"\"{name}\"" for name in df['Entry Name']]
            print(",\n".join(names))
        else:
            print("Error: The Excel file must contain 'Entry Name' column.")
            
    except FileNotFoundError:
        print("Error: The specified Excel file was not found.")
    except Exception as e:
        print(f"An error occurred: {e}")

In [60]:
excel_file_path = get_file_path()

In [62]:
#I wanna count how many entry names are in the file
def count_entry_names(excel_file):
    try:
        df = pd.read_excel(excel_file)
        if 'Entry Name' in df.columns:
            count = df['Entry Name'].nunique()
            print(f"Number of unique entry names: {count}")
        else:
            print("Error: The Excel file must contain 'Entry Name' column.")
            
    except FileNotFoundError:
        print("Error: The specified Excel file was not found.")
    except Exception as e:
        print(f"An error occurred: {e}")

# Call the function to count entry names
count_entry_names(excel_file_path)

Number of unique entry names: 628


In [63]:
print_sequences(excel_file_path)


# TM129_HUMAN
"MDSPEVTFTLAYLVFAVCFVFTPNEFHAAGLTVQNLLSGWLGSEDAAFVPFHLRRTAATLLCHSLLPLGYYVGMCLAASEKRLHALSQAPEAWRLFLLLAVTLPSIACILIYYWSRDRWACHPLARTLALYALPQSGWQAVASSVNTEFRRIDKFATGAPGARVIVTDTWVMKVTTYRVHVAQQQDVHLTVTESRQHELSPDSNLPVQLLTIRVASTNPAVQAFDIWLNSTEYGELCEKLRAPIRRAAHVVIHQSLGDLFLETFASLVEVNPAYSVPSSQELEACIGCMQTRASVKLVKTCQEAATGECQQCYCRPMWCLTCMGKWFASRQDPLRPDTWLASRVPCPTCRARFCILDVCTVR",
# ELOV7_HUMAN
"MAFSDLTSRTVHLYDNWIKDADPRVEDWLLMSSPLPQTILLGFYVYFVTSLGPKLMENRKPFELKKAMITYNFFIVLFSVYMCYEFVMSGWGIGYSFRCDIVDYSRSPTALRMARTCWLYYFSKFIELLDTIFFVLRKKNSQVTFLHVFHHTIMPWTWWFGVKFAAGGLGTFHALLNTAVHVVMYSYYGLSALGPAYQKYLWWKKYLTSLQLVQFVIVAIHISQFFFMEDCKYQFPVFACIIMSYSFMFLLLFLHFWYRAYTKGQRLPKTVKNGTCKNKDN",
# NBAS_HUMAN
"MAAPESGPALSPGTAEGEEETILYDLLVNTEWPPETEVQPRGNQKHGASFIITKAIRDRLLFLRQYIWYSPAPFLLPDGLVRLVNKQINWHLVLASNGKLLAAVQDQCVEIRSAKDDFTSIIGKCQVPKDPKPQWRRVAWSYDCTLLAYAESTGTVRVFDLMGSELFVISPASSFIGDLSYAIAGLIFLEYKASAQWSAELLVINYRGELRSYLVSVGTNQSYQESHCFSFSSHYPHGINTAIYHPGHRLLLVGGCETAEVGMSKASSCGLSAWRVLSGSPYYKQVTNGGDGVTAVPKTLGLLRMLS

In [64]:
print_entry_names(excel_file_path)

"TM129_HUMAN",
"ELOV7_HUMAN",
"NBAS_HUMAN",
"PEDS1_HUMAN",
"UBXN8_HUMAN",
"RN103_HUMAN",
"PGRC1_HUMAN",
"DNM1L_HUMAN",
"PLOD2_HUMAN",
"SCD_HUMAN",
"TOR1A_HUMAN",
"EI24_HUMAN",
"RGRF2_HUMAN",
"MGST3_HUMAN",
"CLGN_HUMAN",
"SC16A_HUMAN",
"PLCB_HUMAN",
"DEGS1_HUMAN",
"SPTC1_HUMAN",
"GPR37_HUMAN",
"INSI1_HUMAN",
"TPSN_HUMAN",
"ZW10_HUMAN",
"HTRA2_HUMAN",
"ERD23_HUMAN",
"G6PT1_HUMAN",
"CALU_HUMAN",
"CREB3_HUMAN",
"EXTL3_HUMAN",
"AT10A_HUMAN",
"MARH6_HUMAN",
"PLOD3_HUMAN",
"UD19_HUMAN",
"ICMT_HUMAN",
"DPM1_HUMAN",
"TMCC2_HUMAN",
"RTN2_HUMAN",
"PDCD6_HUMAN",
"ERN1_HUMAN",
"ERLN1_HUMAN",
"PR15A_HUMAN",
"SOAT2_HUMAN",
"WFS1_HUMAN",
"ERG24_HUMAN",
"AT10B_HUMAN",
"UFL1_HUMAN",
"ERLN2_HUMAN",
"SC31A_HUMAN",
"RTN3_HUMAN",
"STBD1_HUMAN",
"VAPB_HUMAN",
"SGPL1_HUMAN",
"ACSL3_HUMAN",
"FTCD_HUMAN",
"NB5R3_HUMAN",
"HMDH_HUMAN",
"RPN1_HUMAN",
"CP17A_HUMAN",
"SRPRA_HUMAN",
"STS_HUMAN",
"HMOX1_HUMAN",
"BCL2_HUMAN",
"MGST1_HUMAN",
"CP2D6_HUMAN",
"BIP_HUMAN",
"CP19A_HUMAN",
"PA2GA_HUMAN",
"ENPL_HUMAN",
"CD1D_H

In [10]:
#Print the list of compartments, compartments being every column after the "sequence" column.
def print_compartments(excel_file):
    try:
        df = pd.read_excel(excel_file)
        sequence_index = df.columns.get_loc('Sequence')
        compartments = df.columns[sequence_index + 1:]  # Get all columns after 'Sequence'
        
        if len(compartments) == 0:
            print("No compartments found.")
            return
        
        print("Compartments:")
        for compartment in compartments:
            print(f"\"{compartment}\"")
            
    except FileNotFoundError:
        print("Error: The specified Excel file was not found.")
    except Exception as e:
        print(f"An error occurred: {e}")

print_compartments(excel_file_path)

Compartments:
"cytosol"
"ER"
"mitochondrion"
"nucleus"
"stress_granule"
"Excitatory Synapse"
"Inhibitory Synapses"
"Dopaminergic Synapses"
"synapse"
