In [1]:
PROTGPS_PARENT_DIR = "/home/shd-sun-lab/protgps" # point to the protgps local repo

In [2]:
import sys
import os
sys.path.append(PROTGPS_PARENT_DIR) # append the path of protgps
from argparse import Namespace
import pickle
from tqdm import tqdm
import pandas as pd
import torch 
from protgps.utils.loading import get_object
from tkinter import Tk, filedialog

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [4]:
COMPARTMENT_CLASSES = [
            "cytosol",
            "ER",
            "mitochondrion",
            "nucleus",
            "synapse"
]

def load_model(snargs):
    """
    Loads classifier model from args file
    """
    modelpath = snargs.model_path
    model = get_object(snargs.lightning_name, "lightning")(snargs)
    model = model.load_from_checkpoint(
        checkpoint_path = modelpath,
        strict=not snargs.relax_checkpoint_matching,
        **{"args": snargs},
    )
    return model

@torch.no_grad()
def predict_condensates(model, sequences, batch_size=1, round=True):
    scores = []
    for i in tqdm(range(0, len(sequences), batch_size), ncols=100):
        batch = sequences[ i : (i + batch_size)]
        out = model.model({"x": batch})    
        s = torch.sigmoid(out['logit']).to("cpu")
        scores.append(s)
    scores = torch.vstack(scores)
    if round:
        scores = torch.round(scores, decimals=3)
    return scores

In [None]:
# Use a file dialog to select the .args file
Tk().withdraw()  # Hide the main tkinter window
args_path = filedialog.askopenfilename(title="Select .args file", filetypes=[("Args files", "*.args")])

# Load args
args = Namespace(**pickle.load(open(args_path, 'rb')))

# Prompt to select the .ckpt file
ckpt_path = filedialog.askopenfilename(title="Select .ckpt file", filetypes=[("Checkpoint files", "*.ckpt")])
args.model_path = ckpt_path

# Set the pretrained hub directory manually (if static)
args.pretrained_hub_dir = "/home/shd-sun-lab/protgps/checkpoints/esm2"

# Load and prepare model
model = load_model(args)
model.eval()
model = model.to(device)

In [None]:
#Predict sequences
sequences = [
    #Gene_name1
    "Sequence1 here",
    "Sequence2 here"
]


# Add protein names corresponding to the sequences
protein_names = [
    "Gene_name1 here",
    "Gene_name2 here"
]

In [None]:
scores = predict_condensates(model, sequences, batch_size=1)

In [8]:
data = {"Protein": protein_names, "sequences": sequences}
for j, condensate in enumerate(COMPARTMENT_CLASSES):
    data[f"{condensate.upper()}_Score"] = scores[:, j].tolist()

In [None]:
pd.DataFrame(data)

In [77]:
# Your data (assuming it's already available as `data`)
df = pd.DataFrame(data)

# Prompt user to choose save location and filename
Tk().withdraw()  # Hide the root window
save_path = filedialog.asksaveasfilename(
    defaultextension=".xlsx",
    filetypes=[("Excel files", "*.xlsx")],
    title="Save predictions as...",
    initialfile="Predictions.xlsx"  # Default filename suggestion
)

# Save to Excel
if save_path:  # Only save if user didn't cancel
    df.to_excel(save_path, index=False)