In [None]:
import torch
from torch import nn


class CNNLSTM(nn.Module):
    def __init__(self, vocab_size, embedding_dim, cnn_filters, lstm_hidden, num_classes):
        super(CNNLSTM, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.conv = nn.Conv1d(embedding_dim, cnn_filters, kernel_size=20)
        self.lstm = nn.LSTM(cnn_filters, lstm_hidden, batch_first=True)
        self.fc = nn.Linear(lstm_hidden, num_classes)

    def forward(self, x):
        x = self.embedding(x)
        x = x.permute(0, 2, 1)
        x = self.conv(x)
        x, _ = self.lstm(x.permute(0, 2, 1))
        x = self.fc(x[:, -1, :])
        return torch.sigmoid(x)


def load_model(model_path, model):
    model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
    return model


aa_to_int = {'A':1, 'R':2, 'N':3, 'D':4, 'C':5, 'E':6, 'Q':7, 'G':8, 'H':9, 'I':10, 
             'L':11, 'K':12, 'M':13, 'F':14, 'P':15, 'S':16, 'T':17, 'W':18, 'Y':19, 
             'V':20, 'U':21, 'X':22}  


def encode_sequence(seq, max_length):
    encoded_seq = [aa_to_int.get(aa, 22) for aa in seq]  
    padding = [0] * (max_length - len(encoded_seq))  
    return torch.tensor([encoded_seq + padding], dtype=torch.long)  


def predict(model, sequence, max_length):
    model.eval()
    with torch.no_grad():
        encoded_seq = encode_sequence(sequence, max_length)
        prediction = model(encoded_seq)
        return prediction.item() * 100  


max_length = 1000  
model_path = r""  


cnn_filters = 64
lstm_hidden = 128
embedding_dim = 8
vocab_size = len(aa_to_int) + 1  
num_classes = 1  


model = CNNLSTM(vocab_size, embedding_dim, cnn_filters, lstm_hidden, num_classes)


model = load_model(model_path, model)


unknown_sequence = ""


percentage = predict(model, unknown_sequence, max_length)
print(f"该氨基酸序列为抗氧化蛋白的可能性为：{percentage:.2f}%")