# Benchmark property models

Specifically, EpHod, TemBERTure vs GeoPoc

In [3]:
from proteus.utils import read_fasta_to_dict

sequences_dict = read_fasta_to_dict("test/sequences.fasta")
sequences = list(sequences_dict.values())
sequences_dict

{'Q2YPV0': 'MTAIIDIVGREILDSRGNPTVEVDVVLEDGSFGRAAVPSGASTGAHEAVELRDGGSRYLGKGVEKAVEVVNGKIFDAIAGMDAESQLLIDQTLIDLDGSANKGNLGANAILGVSLAVAKAAAQASGLPLYRYVGGTNAHVLPVPMMNIINGGAHADNPIDFQEFMILPVGATSIREAVRYGSEVFHTLKKRLKDAGHNTNVGDEGGFAPNLKNAQAALDFIMESIEKAGFKPGEDIALGLDCAATEFFKDGNYVYEGERKTRDPKAQAKYLAKLASDYPIVTIEDGMAEDDWEGWKYLTDLIGNKCQLVGDDLFVTNSARLRDGIRLGVANSILVKVNQIGSLSETLDAVETAHKAGYTAVMSHRSGETEDSTIADLAVATNCGQIKTGSLARSDRTAKYNQLIRIEEELGKQARYAGRSALKLL',
 'A0A142LT01': 'MRMDKNYMIAMHNPATIDHPNQPNEFVEFPVYTVLIDHPEGKILFDTACNPNSMGPEGRWGEFTQKAFPINMPEECYLHHRLEELNVRPEDIKYVVASHLHLDHAGCLELFTNATIIVQEDEFNGTLQTYARNVKDGAYVWGDIDMWIKNNLQWRLIKRGEDQVKLADGIQVLNFGSGHAWGMLGLHIQMPDTGGIILASDAIYTAESFGPPVKPPGIIYDSVGYNTTVERIRRLANETNSQVWFGHDPIQFKSFRKSTEGYYE',
 'Q9LCC6': 'MNTDVRIEKDFLGEKEIPKDAYYGVQTIRATENFPITGYRIHPELIKSLGIVKKSAALANMEVGLLDKEVGQYIVKAADEVIEGKWNDQFIVDPIQGGAGTSINMNANEVIANRALELMGEEKGNYSKISPNSHVNMSQSTNDAFPTATHIAVLSLLNQLIETTKYMQQEFMKKADEFAGVIKMGRTHLQDAVPILLGQEFEAYARVIARDIERIANTRNNLYDINMGATAVGTGLNADPEYISIVTEHLAKFSGHPLRSAQHLV

## EpHod

In [None]:
%%bash

docker run --rm \
  -v $(pwd)/example:/app/example \
  -v /home/robaina/Documents/NewAtlantis/tools/EpHod/models:/app/models \
  -v $(pwd)/outputs:/app/output \
  ephod \
  --fasta_path "test/sequences.fasta" \
  --save_dir "test/outputs" \
  --csv_name "predictions_ephod.csv" \
  --verbose 1 \
  --weights_dir "/app/models"

## TemBERTure

In [None]:
from proteus.fitness_predictors import ThermostabilityPredictor


# Initialize the predictor
predictor = ThermostabilityPredictor(
    weight=1.0,
    device="cpu",
    docker_image="temberture:latest",
    verbose=True
)

# Make predictions
try:
    predictions = predictor.infer_fitness(sequences)
    
    # Print results
    print("\nPredicted melting temperatures (average of three replicas):")
    for i, (seq, pred) in enumerate(zip(sequences, predictions), 1):
        print(f"\nSequence {i} (length: {len(seq)})")
        print(f"Predicted Tm: {pred.item():.2f}°C")
        
except Exception as e:
    print(f"Error during prediction: {e}")

## GeoPoc: pH and temperature

In [None]:
%%bash

docker run --rm --gpus "all" \
  -v /home/ec2-user/SageMaker/GeoPoc/tests/:/app/GeoPoc/input \
  -v /home/ec2-user/SageMaker/GeoPoc/tests/outputs:/app/GeoPoc/output \
  -v /home/ec2-user/SageMaker/GeoPoc/tests/features:/app/GeoPoc/features \
  ghcr.io/new-atlantis-labs/geopoc:latest \
  -i /app/GeoPoc/input/test.fasta \
  --feature_path /app/GeoPoc/features/ \
  -o /app/GeoPoc/output/ \
  --task temp \
  --gpu 0 2>&1

docker run --rm --gpus "all" \
  -v /home/ec2-user/SageMaker/GeoPoc/tests/:/app/GeoPoc/input \
  -v /home/ec2-user/SageMaker/GeoPoc/tests/outputs:/app/GeoPoc/output \
  -v /home/ec2-user/SageMaker/GeoPoc/tests/features:/app/GeoPoc/features \
  ghcr.io/new-atlantis-labs/geopoc:latest \
  -i /app/GeoPoc/input/test.fasta \
  --feature_path /app/GeoPoc/features/ \
  -o /app/GeoPoc/output/ \
  --task pH \
  --gpu 0 2>&1