In [12]:
!cd .. && pip uninstall -y protscout && poetry build && pip install -q dist/protscout-0.1.0-py3-none-any.whl

Found existing installation: protscout 0.1.0
Uninstalling protscout-0.1.0:
  Successfully uninstalled protscout-0.1.0
Building [36mprotscout[39m ([39;1m0.1.0[39;22m)
Building [34msdist[39m
  - Building [34msdist[39m
  - Built [32mprotscout-0.1.0.tar.gz[39m
Building [34mwheel[39m
  - Building [34mwheel[39m
  - Built [32mprotscout-0.1.0-py3-none-any.whl[39m


In [5]:
import os
import glob
from pathlib import Path
from protscout.predictor_helpers import read_fasta_to_dict

# Base directory structure
BASE_DIR = Path("/home/ec2-user/SageMaker/ProtScout")
FEATURE_DIR = BASE_DIR / "outputs/geopoc/features"
OUTPUT_DIR = BASE_DIR / "outputs/geopoc"

FASTA_PATH = "/home/ec2-user/SageMaker/mangrove-plastic-degrading/data/protein_sequences_plastic_degrading_representatives/PET.faa"
PDB_DIR = "/home/ec2-user/SageMaker/mangrove-plastic-degrading/outputs/structures/PET"

# Create directories if they don't exist
for dir_path in [FEATURE_DIR, OUTPUT_DIR]:
    dir_path.mkdir(parents=True, exist_ok=True)

# Real sequences for testing
sequence_dict = read_fasta_to_dict(FASTA_PATH)
sequence_ids, sequences = list(sequence_dict.keys()), list(sequence_dict.values())
pdb_files = {
    os.path.splitext(os.path.basename(filepath))[0]: filepath 
    for filepath in glob.glob(os.path.join(PDB_DIR, "*.pdb"))
    if os.path.splitext(os.path.basename(filepath))[0] in sequence_ids
}

sequence_ids

## Predict Optimal Temperature

In [6]:
# Initialize GeoPoc predictors for temperature and pH
from protscout.predictors import GeoPocPredictor

# Initialize predictor for temperature predictions
temp_predictor = GeoPocPredictor(
    task="temp",
    device="cuda",
    save_directory=str(OUTPUT_DIR),
    parent_temp_dir="/home/ec2-user/SageMaker/ProtScout/outputs/tmp/geopoc",
    docker_image="ghcr.io/new-atlantis-labs/geopoc:latest",
    model_weights_dir="/home/ec2-user/SageMaker/models",
)

# Get temperature predictions
temp_predictions = temp_predictor.infer_fitness(
    sequences=[seq for seq in sequence_dict.values()],
    pdb_files=[pdb_files[seqid] for seqid in sequence_dict],
    generation_id="001"
)

print("Temperature predictions:", temp_predictions)

Temperature predictions: tensor([[31.2698],
        [25.5224],
        [25.8323],
        [25.9285],
        [25.1254],
        [27.3422],
        [26.1657],
        [26.6567],
        [31.9171],
        [28.0328]])


## Classify Optimal pH

In [7]:
# Initialize predictor for pH predictions
ph_predictor = GeoPocPredictor(
    task="pH",
    device="cuda",
    save_directory=str(OUTPUT_DIR),
    parent_temp_dir="/home/ec2-user/SageMaker/ProtScout/outputs/tmp/geopoc",
    docker_image="ghcr.io/new-atlantis-labs/geopoc:latest",
    model_weights_dir="/home/ec2-user/SageMaker/models",
)

# Get pH predictions
ph_predictions = ph_predictor.infer_fitness(
    sequences=[seq for seq in sequence_dict.values()],
    pdb_files=[pdb_files[seqid] for seqid in sequence_dict],
    generation_id="001"
)

print("pH predictions:", ph_predictions)

pH predictions: tensor([[ 7.0000],
        [11.5000],
        [ 7.0000],
        [ 7.0000],
        [ 7.0000],
        [ 7.0000],
        [ 7.0000],
        [ 7.0000],
        [11.5000],
        [ 7.0000]])


## Classify Optimal Salt Concentration

In [8]:
# Initialize predictor for salt concentration predictions
salt_predictor = GeoPocPredictor(
    task="salt",
    device="cuda",
    save_directory=str(OUTPUT_DIR),
    parent_temp_dir="/home/ec2-user/SageMaker/ProtScout/outputs/tmp/geopoc",
    docker_image="ghcr.io/new-atlantis-labs/geopoc:latest",
    model_weights_dir="/home/ec2-user/SageMaker/models",
)

# Get salt concentration predictions
salt_predictions = salt_predictor.infer_fitness(
    sequences=[seq for seq in sequence_dict.values()],
    pdb_files=[pdb_files[seqid] for seqid in sequence_dict],
    generation_id="001"
)

print("Salt concentration predictions:", salt_predictions)

Salt concentration predictions: tensor([[5.0000],
        [5.0000],
        [5.0000],
        [2.0250],
        [5.0000],
        [5.0000],
        [5.0000],
        [2.0250],
        [5.0000],
        [5.0000]])
