In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [None]:
import os

# I'm using TATA box promoters as the positive class since they're well-characterized
# and represent a clear biological signal for transcription initiation
promoter_file = "human_TATA_5000.fa"
non_promoter_file = "human_nonprom_big.fa"

# Standardizing to 200bp ensures all models see the same input length
# This length captures typical promoter regions while remaining computationally efficient
seq_len = 200

# Encoding nucleotides as integers: A=1, C=2, G=3, T=4, unknown=5, padding=0
# This mapping allows the embedding layer to learn meaningful representations
BASE_TO_ID = {"A": 1, "C": 2, "G": 3, "T": 4}
UNK_ID = 5
vocab_size = 6


def read_fasta(path):
    """Read a FASTA file and return a list of sequence strings."""
    sequences = []
    current = []
    with open(path, "r") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            if line.startswith(">"):
                if current:
                    sequences.append("".join(current))
                    current = []
            else:
                current.append(line)
    if current:
        sequences.append("".join(current))
    return sequences


def center_crop_or_pad(seq, length, pad_char="N"):
    """Standardize sequence length by center-cropping longer sequences or symmetric padding shorter ones."""
    if len(seq) == length:
        return seq
    if len(seq) > length:
        # For longer sequences, I extract the center region to preserve the most informative part
        mid = len(seq) // 2
        half = length // 2
        start = max(0, mid - half)
        end = start + length
        return seq[start:end]
    # Shorter sequences get padded symmetrically with N to maintain positional context
    pad_total = length - len(seq)
    left = pad_total // 2
    right = pad_total - left
    return pad_char * left + seq + pad_char * right


def encode_sequence(seq):
    """Convert a DNA string to a list of integer token IDs."""
    seq = seq.upper()
    return [BASE_TO_ID.get(base, UNK_ID) for base in seq]


promoter_seqs = read_fasta(promoter_file)
non_promoter_seqs = read_fasta(non_promoter_file)

print(f"Loaded {len(promoter_seqs)} promoter sequences and {len(non_promoter_seqs)} non-promoter sequences.")

# Balancing the classes prevents the model from learning a trivial majority-class predictor
num_samples = min(len(promoter_seqs), len(non_promoter_seqs))
promoter_seqs = promoter_seqs[:num_samples]
non_promoter_seqs = non_promoter_seqs[:num_samples]

all_seqs = []
all_labels = []

for s in promoter_seqs:
    s_fixed = center_crop_or_pad(s, seq_len)
    all_seqs.append(encode_sequence(s_fixed))
    all_labels.append(1)

for s in non_promoter_seqs:
    s_fixed = center_crop_or_pad(s, seq_len)
    all_seqs.append(encode_sequence(s_fixed))
    all_labels.append(0)

all_seqs = np.array(all_seqs, dtype=np.int64)
all_labels = np.array(all_labels, dtype=np.int64)

print(f"Final encoded dataset shape: {all_seqs.shape}, labels shape: {all_labels.shape}")

# Using stratified split ensures both classes are represented proportionally in train/test
X_train, X_test, y_train, y_test = train_test_split(
    all_seqs,
    all_labels,
    test_size=0.2,
    random_state=42,
    stratify=all_labels,
)

print(f"X_train: {X_train.shape}, X_test: {X_test.shape}")

# Saving the processed data allows me to skip preprocessing in future runs
processed_path = "dna_dataset_processed.npz"
np.savez(
    processed_path,
    X_train=X_train,
    X_test=X_test,
    y_train=y_train,
    y_test=y_test,
    seq_len=seq_len,
    vocab_size=vocab_size,
)

print(f"Processed dataset saved to {processed_path}")




Loaded 2067 promoter sequences and 27731 non-promoter sequences.
Final encoded dataset shape: (4134, 200), labels shape: (4134,)
X_train: (3307, 200), X_test: (827, 200)
Processed dataset saved to dna_dataset_processed.npz


In [None]:
class DNASequenceDataset(Dataset):
    def __init__(self, sequences, labels):
        # LongTensor is required for embedding layers and integer class labels
        self.sequences = torch.LongTensor(sequences)
        self.labels = torch.LongTensor(labels)

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        return self.sequences[idx], self.labels[idx]

train_dataset = DNASequenceDataset(X_train, y_train)
test_dataset = DNASequenceDataset(X_test, y_test)

# Batch size of 32 provides a good balance between memory usage and gradient stability
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

Using device: cuda


In [None]:
# Step 4: Define LSTM Model
class DNAClassifierLSTM(nn.Module):
    """
    LSTM-based classifier for DNA promoter prediction.
    
    The architecture learns sequence-level patterns by processing nucleotides
    sequentially, which is well-suited for capturing dependencies in DNA sequences.
    """
    def __init__(self, vocab_size, embedding_dim=64, hidden_dim=128,
                 num_layers=2, num_classes=2, dropout=0.3, bidirectional=True):
        super(DNAClassifierLSTM, self).__init__()

        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)

        self.lstm = nn.LSTM(
            embedding_dim,
            hidden_dim,
            num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0,
            bidirectional=bidirectional
        )

        lstm_output_size = hidden_dim * 2 if bidirectional else hidden_dim
        self.fc1 = nn.Linear(lstm_output_size, 64)
        self.dropout = nn.Dropout(dropout)
        self.fc2 = nn.Linear(64, num_classes)
        self.relu = nn.ReLU()

    def forward(self, x):
        embedded = self.embedding(x)

        lstm_out, (hidden, cell) = self.lstm(embedded)

        # For bidirectional LSTMs, I concatenate forward and backward final states
        # to capture context from both sequence directions
        if self.lstm.bidirectional:
            forward_hidden = hidden[-2]
            backward_hidden = hidden[-1]
            final_hidden = torch.cat([forward_hidden, backward_hidden], dim=1)
        else:
            final_hidden = hidden[-1]

        out = self.dropout(self.relu(self.fc1(final_hidden)))
        out = self.fc2(out)

        return out


In [None]:
def train_model(model, train_loader, test_loader, num_epochs=50, patience=10, device='cuda'):
    """Train model with early stopping to prevent overfitting."""
    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    # StepLR reduces learning rate every 5 epochs to fine-tune convergence
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

    history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}

    best_val_loss = float('inf')
    epochs_no_improve = 0
    best_state_dict = None

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0

        for sequences, labels in train_loader:
            sequences, labels = sequences.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(sequences)
            loss = criterion(outputs, labels)
            loss.backward()

            # Gradient clipping prevents exploding gradients in RNN architectures
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            train_total += labels.size(0)
            train_correct += (predicted == labels).sum().item()

        train_acc = 100 * train_correct / train_total
        avg_train_loss = train_loss / len(train_loader)

        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for sequences, labels in test_loader:
                sequences, labels = sequences.to(device), labels.to(device)
                outputs = model(sequences)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        val_acc = 100 * val_correct / val_total
        avg_val_loss = val_loss / len(test_loader)

        scheduler.step()

        history['train_loss'].append(avg_train_loss)
        history['train_acc'].append(train_acc)
        history['val_loss'].append(avg_val_loss)
        history['val_acc'].append(val_acc)

        print(f'Epoch [{epoch+1}/{num_epochs}]')
        print(f'Train Loss: {avg_train_loss:.4f}, Train Acc: {train_acc:.2f}%')
        print(f'Val Loss: {avg_val_loss:.4f}, Val Acc: {val_acc:.2f}%')

        # Early stopping monitors validation loss to avoid overfitting
        if avg_val_loss < best_val_loss - 1e-4:
            best_val_loss = avg_val_loss
            epochs_no_improve = 0
            best_state_dict = model.state_dict()
        else:
            epochs_no_improve += 1
            print(f'No improvement for {epochs_no_improve} epoch(s)')
            if epochs_no_improve >= patience:
                print(f'Early stopping triggered at epoch {epoch+1}')
                break

        print('-' * 60)

    if best_state_dict is not None:
        model.load_state_dict(best_state_dict)

    return model, history


In [None]:
# Baseline unidirectional LSTM: processes sequences left-to-right only
# This serves as a reference point to compare against bidirectional architectures
model_lstm = DNAClassifierLSTM(
    vocab_size=vocab_size,
    embedding_dim=64,
    hidden_dim=128,
    num_layers=2,
    num_classes=2,
    bidirectional=False
)

trained_lstm, history_lstm = train_model(
    model_lstm, train_loader, test_loader,
    num_epochs=50,
    patience=10,
    device=device
)


Epoch [1/50]
Train Loss: 0.6334, Train Acc: 65.13%
Val Loss: 0.6140, Val Acc: 67.23%
------------------------------------------------------------
Epoch [2/50]
Train Loss: 0.6008, Train Acc: 68.43%
Val Loss: 0.6058, Val Acc: 69.29%
------------------------------------------------------------
Epoch [3/50]
Train Loss: 0.5798, Train Acc: 70.28%
Val Loss: 0.5532, Val Acc: 70.62%
------------------------------------------------------------
Epoch [4/50]
Train Loss: 0.5495, Train Acc: 71.48%
Val Loss: 0.5325, Val Acc: 73.28%
------------------------------------------------------------
Epoch [5/50]
Train Loss: 0.5368, Train Acc: 73.45%
Val Loss: 0.5877, Val Acc: 64.09%
No improvement for 1 epoch(s)
------------------------------------------------------------
Epoch [6/50]
Train Loss: 0.5294, Train Acc: 71.15%
Val Loss: 0.5457, Val Acc: 70.98%
No improvement for 2 epoch(s)
------------------------------------------------------------
Epoch [7/50]
Train Loss: 0.5188, Train Acc: 73.90%
Val Loss: 0.5

In [None]:
trained_lstm.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for sequences, labels in test_loader:
        sequences = sequences.to(device)
        outputs = trained_lstm(sequences)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.numpy())

accuracy_lstm = accuracy_score(all_labels, all_preds)
print(f"\n[Model 1 - LSTM] Test Accuracy: {accuracy_lstm:.4f}")

print("\n[Model 1 - LSTM] Classification Report:")
print(classification_report(all_labels, all_preds, target_names=['Non-Promoter', 'Promoter']))

torch.save(trained_lstm.state_dict(), 'dna_lstm_classifier_lstm.pth')
print("Model 1 saved to dna_lstm_classifier_lstm.pth")


[Model 1 - LSTM] Test Accuracy: 0.7437

[Model 1 - LSTM] Classification Report:
              precision    recall  f1-score   support

Non-Promoter       0.69      0.88      0.77       414
    Promoter       0.83      0.61      0.70       413

    accuracy                           0.74       827
   macro avg       0.76      0.74      0.74       827
weighted avg       0.76      0.74      0.74       827

Model 1 saved to dna_lstm_classifier_lstm.pth


In [None]:
# Bidirectional LSTM captures context from both sequence directions
# This should help identify promoter motifs that depend on surrounding sequence context
model_bilstm = DNAClassifierLSTM(
    vocab_size=vocab_size,
    embedding_dim=64,
    hidden_dim=128,
    num_layers=2,
    num_classes=2,
    bidirectional=True
)

trained_bilstm, history_bilstm = train_model(
    model_bilstm, train_loader, test_loader,
    num_epochs=50,
    patience=10,
    device=device
)

trained_bilstm.eval()
all_preds_bi = []
all_labels_bi = []

with torch.no_grad():
    for sequences, labels in test_loader:
        sequences = sequences.to(device)
        outputs = trained_bilstm(sequences)
        _, predicted = torch.max(outputs, 1)
        all_preds_bi.extend(predicted.cpu().numpy())
        all_labels_bi.extend(labels.numpy())

accuracy_bilstm = accuracy_score(all_labels_bi, all_preds_bi)
print(f"\n[Model 2 - BiLSTM] Test Accuracy: {accuracy_bilstm:.4f}")

print("\n[Model 2 - BiLSTM] Classification Report:")
print(classification_report(all_labels_bi, all_preds_bi, target_names=['Non-Promoter', 'Promoter']))

torch.save(trained_bilstm.state_dict(), 'dna_lstm_classifier_bilstm.pth')
print("Model 2 saved to dna_lstm_classifier_bilstm.pth")



Epoch [1/50]
Train Loss: 0.6106, Train Acc: 65.92%
Val Loss: 0.6508, Val Acc: 61.79%
------------------------------------------------------------
Epoch [2/50]
Train Loss: 0.5884, Train Acc: 69.01%
Val Loss: 0.5588, Val Acc: 70.74%
------------------------------------------------------------
Epoch [3/50]
Train Loss: 0.5642, Train Acc: 71.73%
Val Loss: 0.5478, Val Acc: 72.19%
------------------------------------------------------------
Epoch [4/50]
Train Loss: 0.5858, Train Acc: 69.40%
Val Loss: 0.5620, Val Acc: 71.70%
No improvement for 1 epoch(s)
------------------------------------------------------------
Epoch [5/50]
Train Loss: 0.5650, Train Acc: 70.58%
Val Loss: 0.5415, Val Acc: 72.43%
------------------------------------------------------------
Epoch [6/50]
Train Loss: 0.5391, Train Acc: 72.69%
Val Loss: 0.5693, Val Acc: 69.53%
No improvement for 1 epoch(s)
------------------------------------------------------------
Epoch [7/50]
Train Loss: 0.5384, Train Acc: 72.75%
Val Loss: 0.5

In [None]:
# GRU is computationally more efficient than LSTM while often achieving similar performance
# The simplified gating mechanism may help with faster convergence
class DNAClassifierGRU(nn.Module):
    def __init__(self, vocab_size, embedding_dim=64, hidden_dim=128,
                 num_layers=2, num_classes=2, dropout=0.3, bidirectional=True):
        super(DNAClassifierGRU, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.gru = nn.GRU(
            embedding_dim,
            hidden_dim,
            num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0,
            bidirectional=bidirectional
        )
        gru_output_size = hidden_dim * 2 if bidirectional else hidden_dim
        self.fc1 = nn.Linear(gru_output_size, 64)
        self.dropout = nn.Dropout(dropout)
        self.fc2 = nn.Linear(64, num_classes)
        self.relu = nn.ReLU()

    def forward(self, x):
        embedded = self.embedding(x)
        gru_out, hidden = self.gru(embedded)
        if self.gru.bidirectional:
            forward_hidden = hidden[-2]
            backward_hidden = hidden[-1]
            final_hidden = torch.cat([forward_hidden, backward_hidden], dim=1)
        else:
            final_hidden = hidden[-1]
        out = self.dropout(self.relu(self.fc1(final_hidden)))
        out = self.fc2(out)
        return out

model_gru = DNAClassifierGRU(
    vocab_size=vocab_size,
    embedding_dim=64,
    hidden_dim=128,
    num_layers=2,
    num_classes=2,
    bidirectional=True
)

trained_gru, history_gru = train_model(
    model_gru, train_loader, test_loader,
    num_epochs=50,
    patience=10,
    device=device
)

trained_gru.eval()
all_preds_gru = []
all_labels_gru = []

with torch.no_grad():
    for sequences, labels in test_loader:
        sequences = sequences.to(device)
        outputs = trained_gru(sequences)
        _, predicted = torch.max(outputs, 1)
        all_preds_gru.extend(predicted.cpu().numpy())
        all_labels_gru.extend(labels.numpy())

accuracy_gru = accuracy_score(all_labels_gru, all_preds_gru)
print(f"\n[Model 3 - GRU] Test Accuracy: {accuracy_gru:.4f}")

print("\n[Model 3 - GRU] Classification Report:")
print(classification_report(all_labels_gru, all_preds_gru, target_names=['Non-Promoter', 'Promoter']))

torch.save(trained_gru.state_dict(), 'dna_lstm_classifier_gru.pth')
print("Model 3 saved to dna_lstm_classifier_gru.pth")



Epoch [1/50]
Train Loss: 0.6237, Train Acc: 64.38%
Val Loss: 0.5802, Val Acc: 70.50%
------------------------------------------------------------
Epoch [2/50]
Train Loss: 0.5534, Train Acc: 71.82%
Val Loss: 0.5264, Val Acc: 72.07%
------------------------------------------------------------
Epoch [3/50]
Train Loss: 0.5163, Train Acc: 74.48%
Val Loss: 0.4706, Val Acc: 77.15%
------------------------------------------------------------
Epoch [4/50]
Train Loss: 0.4776, Train Acc: 76.93%
Val Loss: 0.3713, Val Acc: 83.31%
------------------------------------------------------------
Epoch [5/50]
Train Loss: 0.3157, Train Acc: 86.12%
Val Loss: 0.2129, Val Acc: 92.14%
------------------------------------------------------------
Epoch [6/50]
Train Loss: 0.1933, Train Acc: 92.56%
Val Loss: 0.1793, Val Acc: 93.71%
------------------------------------------------------------
Epoch [7/50]
Train Loss: 0.1682, Train Acc: 93.62%
Val Loss: 0.1723, Val Acc: 93.59%
---------------------------------------

In [None]:
# Hybrid CNN-LSTM architecture: CNNs detect local motifs, then LSTM models sequence-level dependencies
# This combines the strengths of both architectures for DNA sequence analysis
class DNAClassifierCNNLSTM(nn.Module):
    def __init__(self, vocab_size, embedding_dim=64, num_filters=128,
                 kernel_size=7, hidden_dim=128, num_layers=1,
                 num_classes=2, dropout=0.3, bidirectional=True):
        super(DNAClassifierCNNLSTM, self).__init__()

        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)

        # Convolutional layer acts as a motif detector, scanning for short sequence patterns
        self.conv1 = nn.Conv1d(
            in_channels=embedding_dim,
            out_channels=num_filters,
            kernel_size=kernel_size,
            padding=kernel_size // 2
        )
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool1d(kernel_size=2)

        # LSTM processes the detected motifs to capture longer-range dependencies
        self.lstm = nn.LSTM(
            input_size=num_filters,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            bidirectional=bidirectional,
            dropout=dropout if num_layers > 1 else 0
        )

        lstm_output_size = hidden_dim * 2 if bidirectional else hidden_dim
        self.fc1 = nn.Linear(lstm_output_size, 64)
        self.dropout = nn.Dropout(dropout)
        self.fc2 = nn.Linear(64, num_classes)

    def forward(self, x):
        embedded = self.embedding(x)
        conv_input = embedded.permute(0, 2, 1)
        conv_out = self.relu(self.conv1(conv_input))
        pooled = self.pool(conv_out)
        lstm_input = pooled.permute(0, 2, 1)

        lstm_out, (hidden, cell) = self.lstm(lstm_input)
        if self.lstm.bidirectional:
            forward_hidden = hidden[-2]
            backward_hidden = hidden[-1]
            final_hidden = torch.cat([forward_hidden, backward_hidden], dim=1)
        else:
            final_hidden = hidden[-1]

        out = self.dropout(self.relu(self.fc1(final_hidden)))
        out = self.fc2(out)
        return out

model_cnnlstm = DNAClassifierCNNLSTM(
    vocab_size=vocab_size,
    embedding_dim=64,
    num_filters=128,
    kernel_size=7,
    hidden_dim=128,
    num_layers=1,
    num_classes=2,
    bidirectional=True
)

trained_cnnlstm, history_cnnlstm = train_model(
    model_cnnlstm, train_loader, test_loader,
    num_epochs=50,
    patience=10,
    device=device
)

trained_cnnlstm.eval()
all_preds_cnnlstm = []
all_labels_cnnlstm = []

with torch.no_grad():
    for sequences, labels in test_loader:
        sequences = sequences.to(device)
        outputs = trained_cnnlstm(sequences)
        _, predicted = torch.max(outputs, 1)
        all_preds_cnnlstm.extend(predicted.cpu().numpy())
        all_labels_cnnlstm.extend(labels.numpy())

accuracy_cnnlstm = accuracy_score(all_labels_cnnlstm, all_preds_cnnlstm)
print(f"\n[Model 4 - CNN-LSTM] Test Accuracy: {accuracy_cnnlstm:.4f}")

print("\n[Model 4 - CNN-LSTM] Classification Report:")
print(classification_report(all_labels_cnnlstm, all_preds_cnnlstm, target_names=['Non-Promoter', 'Promoter']))

torch.save(trained_cnnlstm.state_dict(), 'dna_lstm_classifier_cnnlstm.pth')
print("Model 4 saved to dna_lstm_classifier_cnnlstm.pth")



Epoch [1/50]
Train Loss: 0.5886, Train Acc: 68.43%
Val Loss: 0.5425, Val Acc: 71.70%
------------------------------------------------------------
Epoch [2/50]
Train Loss: 0.5371, Train Acc: 73.15%
Val Loss: 0.6007, Val Acc: 67.59%
No improvement for 1 epoch(s)
------------------------------------------------------------
Epoch [3/50]
Train Loss: 0.5270, Train Acc: 73.90%
Val Loss: 0.5607, Val Acc: 75.09%
No improvement for 2 epoch(s)
------------------------------------------------------------
Epoch [4/50]
Train Loss: 0.5130, Train Acc: 74.72%
Val Loss: 0.5011, Val Acc: 75.82%
------------------------------------------------------------
Epoch [5/50]
Train Loss: 0.4723, Train Acc: 76.84%
Val Loss: 0.4623, Val Acc: 77.99%
------------------------------------------------------------
Epoch [6/50]
Train Loss: 0.4337, Train Acc: 79.38%
Val Loss: 0.4339, Val Acc: 78.60%
------------------------------------------------------------
Epoch [7/50]
Train Loss: 0.4009, Train Acc: 81.13%
Val Loss: 0.4

In [None]:
# Compare all models and identify the best performer
required_accuracies = ['accuracy_lstm', 'accuracy_bilstm', 'accuracy_gru', 'accuracy_cnnlstm']
missing = [acc for acc in required_accuracies if acc not in globals()]

if missing:
    raise RuntimeError(f"Missing accuracy values. Please run evaluation cells first. Missing: {missing}")

model_accuracies = {
    'LSTM': accuracy_lstm,
    'BiLSTM': accuracy_bilstm,
    'GRU': accuracy_gru,
    'CNN-LSTM': accuracy_cnnlstm
}

for name, acc in model_accuracies.items():
    print(f"{name}: {acc:.4f}")

best_model_name = max(model_accuracies, key=model_accuracies.get)
best_accuracy = model_accuracies[best_model_name]
print(f"\nBest model: {best_model_name} with accuracy {best_accuracy:.4f}")

name_to_model = {
    'LSTM': trained_lstm,
    'BiLSTM': trained_bilstm,
    'GRU': trained_gru,
    'CNN-LSTM': trained_cnnlstm
}

best_model = name_to_model[best_model_name]

best_model_path = 'dna_best_model.pth'
torch.save(best_model.state_dict(), best_model_path)
print(f"Best model saved to {best_model_path}")



LSTM: 0.7437
BiLSTM: 0.7304
GRU: 0.9516
CNN-LSTM: 0.8210

Best model: GRU with accuracy 0.9516
Best model saved to dna_best_model.pth
