In [None]:
# First, let's set up our base directories and sample data
import os
from pathlib import Path

# Base directory structure
workdir = "/home/ec2-user/SageMaker/Proteus/notebooks"
BASE_DIR = Path(workdir)
FEATURE_DIR = BASE_DIR / "features"
OUTPUT_DIR = BASE_DIR / "outputs/geopoc"
PDB_DIR = BASE_DIR / "data/catsol_pdbs"

# Create directories if they don't exist
for dir_path in [FEATURE_DIR, OUTPUT_DIR]:
    dir_path.mkdir(parents=True, exist_ok=True)

# Real sequences for testing
sequences = [
    'MTKEEGRTYFESLCEEEQSLQESQTHLLNILDILSVLADPRSSDDLLTESLKKLPDLHRELINSSIRLRYDKYQTREAQLLEDTKTGRDVAAGVQNPKSISEYYSTFEHLNRDTLRYINLLKRLSVDLAKQVEVSDPSVTVYEMDKWVPSEKLQGILEQYCAPDTDIRGVDAQIKNYLDQIKMARAKFGLENKYSLKERLSTLTKELNHWRKEWDDIEMLMFGDDAHSMKKMIQKIDSLKSEINAPSESYPVDKEGDIVLE',
    'MGNIMSASFAPECTDLKTKYDSCFNEWYSEKFLKGKSVENECSKQWYAYTTCVNAALVKQGIKPALDEAREEAPFENGGKLKEVDK'
]

sequence_ids = ['seq1', 'seq2']

## Predict Optimal Temperature

In [None]:
# Initialize GeoPoc predictors for temperature and pH
from proteus.fitness_predictors import GeoPocPredictor

# Initialize predictor for temperature predictions
temp_predictor = GeoPocPredictor(
    task="temp",
    device="cpu",
    save_directory=str(OUTPUT_DIR),
    pdb_dir=str(PDB_DIR),
    parent_temp_dir="/home/ec2-user/SageMaker/Protscout/notebooks/tmp/geopoc",
    docker_image="ghcr.io/new-atlantis-labs/geopoc:latest"
)

# Get temperature predictions
temp_predictions = temp_predictor.infer_fitness(
    sequences=sequences,
    sequence_ids=sequence_ids,
    generation_id="001"
)

print("Temperature predictions:", temp_predictions)

Temperature predictions: tensor([[33.2223],
        [17.3567]])


## Classify Optimal pH

In [3]:
# Initialize predictor for pH predictions
ph_predictor = GeoPocPredictor(
    task="pH",
    device="cuda",
    gpu_id="0",
    save_directory=str(OUTPUT_DIR),
    pdb_dir=str(PDB_DIR),
    docker_image="ghcr.io/new-atlantis-labs/geopoc:latest"
)

# Get pH predictions
ph_predictions = ph_predictor.infer_fitness(
    sequences=sequences,
    sequence_ids=sequence_ids,
    generation_id="001"
)

print("pH predictions:", ph_predictions)

pH predictions: tensor([[7.],
        [7.]])


## Classify Optimal Salt Concentration

In [4]:
# Initialize predictor for salt concentration predictions
salt_predictor = GeoPocPredictor(
    task="salt",
    device="cuda",
    gpu_id="0",
    save_directory=str(OUTPUT_DIR),
    pdb_dir=str(PDB_DIR),
    docker_image="ghcr.io/new-atlantis-labs/geopoc:latest"
)

# Get salt concentration predictions
salt_predictions = salt_predictor.infer_fitness(
    sequences=sequences,
    sequence_ids=sequence_ids,
    generation_id="001"
)

print("Salt concentration predictions:", salt_predictions)

Salt concentration predictions: tensor([[2.0250],
        [5.0000]])
