In [16]:
from keras.models import load_model
from keras.layers import LSTM, Bidirectional, Dense, Conv1D, MaxPooling1D, Flatten, Dropout, Embedding, Input
import tensorflow as tf

# Now load the model
model = load_model("model 1.h5")

# You can print a summary to check if it works
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 1024, 128)         512       
                                                                 
 batch_normalization (BatchN  (None, 1024, 128)        512       
 ormalization)                                                   
                                                                 
 max_pooling1d (MaxPooling1D  (None, 512, 128)         0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 512, 128)          0         
                                                                 
 conv1d_1 (Conv1D)           (None, 512, 128)          49280     
                                                                 
 batch_normalization_1 (Batc  (None, 512, 128)         5

In [17]:
import numpy as np
from tensorflow.keras.models import load_model
import joblib
from transformers import BertModel, BertTokenizer
import torch

# === Load model and scaler ===
model = load_model('model 1.h5')
scaler = joblib.load('scaler.pkl')

# === Load ProtBERT model ===
tokenizer = BertTokenizer.from_pretrained("Rostlab/prot_bert", do_lower_case=False)
bert_model = BertModel.from_pretrained("Rostlab/prot_bert")

# === ProtBERT Preprocessing ===
def embed_with_protbert(sequence: str) -> np.ndarray:
    # Add spaces between amino acids
    spaced_seq = ' '.join(sequence)
    encoded_input = tokenizer(spaced_seq, return_tensors='pt', padding=True)
    
    with torch.no_grad():
        outputs = bert_model(**encoded_input)
    
    # Take mean across all token embeddings (excluding [CLS], [SEP])
    embeddings = outputs.last_hidden_state.squeeze(0)[1:-1].mean(dim=0)
    return embeddings.cpu().numpy().reshape(1, -1)  # shape (1, 1024)

# === Predict function ===
def predict_therapeutic(sequence: str):
    embedded = embed_with_protbert(sequence)
    scaled_input = scaler.transform(embedded)
    scaled_input = scaled_input.reshape(1, 1024, 1)  # match model input shape

    prediction = model.predict(scaled_input)
    predicted_class = np.argmax(prediction, axis=1)[0]
    return "Therapeutic" if predicted_class == 1 else "Non-Therapeutic"

# === Main Driver ===
if __name__ == "__main__":
    user_seq = input("Enter your peptide sequence: ").strip().upper()
    result = predict_therapeutic(user_seq)
    print(f"\n🧬 Prediction: {result}")


🧬 Prediction: Therapeutic


In [18]:
from keras.models import load_model
from keras.layers import LSTM, Bidirectional, Dense, Conv1D, MaxPooling1D, Flatten, Dropout, Embedding, Input
import tensorflow as tf

# Now load the model
model = load_model("model 2.h5")

# You can print a summary to check if it works
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 48, 32)            128       
                                                                 
 max_pooling1d (MaxPooling1D  (None, 24, 32)           0         
 )                                                               
                                                                 
 lstm (LSTM)                 (None, 64)                24832     
                                                                 
 dropout (Dropout)           (None, 64)                0         
                                                                 
 dense (Dense)               (None, 8)                 520       
                                                                 
Total params: 25,480
Trainable params: 25,480
Non-trainable params: 0
____________________________________________________

In [23]:
import numpy as np
import joblib
import tensorflow as tf
from transformers import BertTokenizer, TFBertModel

# Load Model 2
model = tf.keras.models.load_model("model 2.h5")

# Load dependencies with joblib
scaler = joblib.load("scaler 2.pkl")
pca = joblib.load("pca_model.pkl")

# Load label encoder and category mapping
with open("label_encoder.pkl", "rb") as f:
    label_encoder = joblib.load(f)

with open("category_mapping.pkl", "rb") as f:
    category_mapping = joblib.load(f)

# Load ProtBERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained("Rostlab/prot_bert", do_lower_case=False)
protbert = TFBertModel.from_pretrained("Rostlab/prot_bert", from_pt=True)

# 🧬 ProtBERT embedding function
def preprocess_protbert(sequence):
    formatted_seq = " ".join(list(sequence.strip().upper()))
    tokens = tokenizer([formatted_seq], return_tensors="tf", padding=True)
    with tf.device("/CPU:0"):  # Use CPU for compatibility with TF models
        output = protbert(**tokens)
    embeddings = tf.reduce_mean(output.last_hidden_state, axis=1).numpy()
    return embeddings

# 🧠 Model 2 Prediction Pipeline
def predict_category(sequence: str):
    embedding = preprocess_protbert(sequence)

    # Apply scaler and PCA
    scaled = scaler.transform(embedding)
    reduced = pca.transform(scaled)

    # Prediction
    prediction = model.predict(reduced)
    predicted_class_index = np.argmax(prediction, axis=1)[0]
    predicted_label = label_encoder.inverse_transform([predicted_class_index])[0]
    predicted_category = category_mapping.get(predicted_label, "Unknown Category")

    return predicted_category

# Run the model on user input
if __name__ == "__main__":
    user_seq = input("Enter peptide sequence: ").strip().upper()
    category_result = predict_category(user_seq)
    print(f"\n🧬 Model 2 Prediction: {category_result}")

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint


🧬 Model 2 Prediction: Anti Fungal Peptide


In [29]:
import h5py

# Open the HDF5 file
with h5py.File("Model_3.h5", "r") as file:
    print("🗂️ Model keys (groups):", list(file.keys()))
    
    # Check layer configuration (if available)
    if "model_config" in file.attrs:
        print("\n✅ Found 'model_config'!")
        print(file.attrs["model_config"][:500])  # print first 500 chars
    else:
        print("\n⚠️ No model_config attribute found.")

    # List model layers
    print("\n📋 Layers in model:")
    for layer in file["model_weights"].keys():
        print("🔹", layer)

🗂️ Model keys (groups): ['model_weights', 'optimizer_weights']

✅ Found 'model_config'!
{"class_name": "Sequential", "config": {"name": "sequential", "trainable": true, "dtype": {"module": "keras", "class_name": "DTypePolicy", "config": {"name": "float32"}, "registered_name": null}, "layers": [{"class_name": "InputLayer", "config": {"batch_shape": [null, 38], "dtype": "float32", "sparse": false, "name": "input_layer"}}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": {"module": "keras", "class_name": "DTypePolicy", "config": {"name": "float32"}, "reg

📋 Layers in model:
🔹 dense
🔹 dense_1
🔹 dense_2
🔹 top_level_model_weights


In [3]:
import numpy as np
import pickle
from keras.models import load_model
from Bio.SeqUtils.ProtParam import ProteinAnalysis

# Load model and scaler
model = load_model("model 3.h5")
with open("scaler 3.pkl", "rb") as f:
    scaler = pickle.load(f)

# Feature labels
feature_names = [
    "Molecular Weight",
    "Aromaticity",
    "Instability Index",
    "Isoelectric Point",
    "Hydrophobicity (GRAVY)"
]

# Function to extract and print features
def get_bio_features(sequence):
    analyzer = ProteinAnalysis(sequence)
    features = [
        analyzer.molecular_weight(),
        analyzer.aromaticity(),
        analyzer.instability_index(),
        analyzer.isoelectric_point(),
        analyzer.gravy()
    ]
    return features

# Ask user to enter a sequence
input_sequence = input("🔡 Enter a peptide sequence (only standard amino acids): ").strip().upper()

# Validate sequence
valid_amino_acids = set("ACDEFGHIKLMNPQRSTVWY")
if not input_sequence or any(aa not in valid_amino_acids for aa in input_sequence):
    print("❌ Invalid sequence. Please enter a valid peptide sequence using only standard amino acids (ACDEFGHIKLMNPQRSTVWY).")
else:
    # Extract features
    features = get_bio_features(input_sequence)

    print("\n🔬 Extracted Biological Features:")
    for name, value in zip(feature_names, features):
        print(f"{name}: {value:.4f}")

    # Scale and predict
    features_scaled = scaler.transform([features])
    predicted_score = model.predict(features_scaled)[0][0]

    print(f"\n🎯 Predicted Biological Score: {predicted_score:.4f}")



🔬 Extracted Biological Features:
Molecular Weight: 59908.9278
Aromaticity: 0.1243
Instability Index: 53.0476
Isoelectric Point: 8.0923
Hydrophobicity (GRAVY): -0.3132

🎯 Predicted Biological Score: 656.3642
