In [1]:
import os
import torch
import torch.optim as optim
from torch.utils.data import DataLoader
from BassetModel import DNASeqClassifier, DNADataset, train_model, validate_model
import pickle
import nbformat
from nbconvert.preprocessors import ExecutePreprocessor

In [2]:
def load_data(filename):
    with open(filename, 'rb') as f:
        data = pickle.load(f)
    return data

def setup_data_loaders(dataframe):
    dataset = DNADataset(dataframe)
    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])
    train_loader = DataLoader(train_dataset, batch_size=100, shuffle=True, num_workers=4)
    test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False, num_workers=4)
    return train_loader, test_loader

def run_notebook(notebook_path):
    with open(notebook_path) as f:
        nb = nbformat.read(f, as_version=4)
    ep = ExecutePreprocessor(timeout=600, kernel_name='python3')
    try:
        ep.preprocess(nb, {'metadata': {'path': 'directory_containing_notebook/'}})
        print("Notebook executed successfully.")
    except Exception as e:
        print("Error executing the notebook.")
        print(e)
        raise e

In [3]:
pkl_file = 'encoded_df.pkl'
if not os.path.exists(pkl_file):
    print(f"{pkl_file} not found. Running DataPreparation.ipynb...")
    run_notebook('DataPreparation.ipynb')

print("Loading encoded data from Data Preparation Step...")
encoded_df = load_data(pkl_file)
print("Data loaded successfully.")

print("Setting up data loaders...")
train_loader, test_loader = setup_data_loaders(encoded_df)
print("Data loaders ready.")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using: {device}")

model = DNASeqClassifier(sequence_length=800).to(device)

criterion = torch.nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.02, momentum=0.9)

Loading encoded data from Data Preparation Step...
Data loaded successfully.
Setting up data loaders...
Data loaders ready.
Using: cuda


In [4]:
# Whether to save the model as a file or not
saveModel = True

print("Starting training...")
train_model(model, train_loader, criterion, optimizer, device, num_epochs=20)
print("Training completed.")

print("Starting validation...")
validate_model(model, test_loader, criterion, device)
print("Validation completed.")

if saveModel:
    print("Saving the model...")
    torch.save(model.state_dict(), 'dna_seq_classifier.pth')
    print("Model saved successfully.")

Starting training...


Epoch 1/20: 100%|██████████| 3116/3116 [01:24<00:00, 36.98it/s, loss=0.0191] 


Epoch 1/20, Loss: 0.0631


Epoch 2/20: 100%|██████████| 3116/3116 [01:22<00:00, 37.79it/s, loss=0.0108]  


Epoch 2/20, Loss: 0.0297


Epoch 3/20: 100%|██████████| 3116/3116 [01:22<00:00, 37.71it/s, loss=0.0318]  


Epoch 3/20, Loss: 0.0219


Epoch 4/20: 100%|██████████| 3116/3116 [01:22<00:00, 37.86it/s, loss=0.000615]


Epoch 4/20, Loss: 0.0171


Epoch 5/20: 100%|██████████| 3116/3116 [01:22<00:00, 37.80it/s, loss=0.00857] 


Epoch 5/20, Loss: 0.0147


Epoch 6/20: 100%|██████████| 3116/3116 [01:22<00:00, 37.71it/s, loss=0.00216] 


Epoch 6/20, Loss: 0.0123


Epoch 7/20: 100%|██████████| 3116/3116 [01:22<00:00, 37.57it/s, loss=0.0108]  


Epoch 7/20, Loss: 0.0105


Epoch 8/20: 100%|██████████| 3116/3116 [01:22<00:00, 37.55it/s, loss=0.00576] 


Epoch 8/20, Loss: 0.0092


Epoch 9/20: 100%|██████████| 3116/3116 [01:23<00:00, 37.29it/s, loss=0.000567]


Epoch 9/20, Loss: 0.0085


Epoch 10/20: 100%|██████████| 3116/3116 [01:23<00:00, 37.37it/s, loss=0.0329]  


Epoch 10/20, Loss: 0.0077


Epoch 11/20: 100%|██████████| 3116/3116 [01:23<00:00, 37.29it/s, loss=0.000221]


Epoch 11/20, Loss: 0.0070


Epoch 12/20: 100%|██████████| 3116/3116 [01:23<00:00, 37.26it/s, loss=0.0572]  


Epoch 12/20, Loss: 0.0057


Epoch 13/20: 100%|██████████| 3116/3116 [01:23<00:00, 37.43it/s, loss=0.00109] 


Epoch 13/20, Loss: 0.0058


Epoch 14/20: 100%|██████████| 3116/3116 [01:23<00:00, 37.22it/s, loss=0.00883] 


Epoch 14/20, Loss: 0.0051


Epoch 15/20: 100%|██████████| 3116/3116 [01:23<00:00, 37.35it/s, loss=0.00623] 


Epoch 15/20, Loss: 0.0047


Epoch 16/20: 100%|██████████| 3116/3116 [01:23<00:00, 37.35it/s, loss=0.0472]  


Epoch 16/20, Loss: 0.0050


Epoch 17/20: 100%|██████████| 3116/3116 [01:23<00:00, 37.25it/s, loss=3.85e-5] 


Epoch 17/20, Loss: 0.0044


Epoch 18/20: 100%|██████████| 3116/3116 [01:23<00:00, 37.45it/s, loss=0.000163]


Epoch 18/20, Loss: 0.0041


Epoch 19/20: 100%|██████████| 3116/3116 [01:23<00:00, 37.28it/s, loss=0.00105] 


Epoch 19/20, Loss: 0.0039


Epoch 20/20: 100%|██████████| 3116/3116 [01:23<00:00, 37.21it/s, loss=0.000664]


Epoch 20/20, Loss: 0.0036
Training completed.
Starting validation...


Validating: 100%|██████████| 779/779 [00:06<00:00, 118.65it/s, accuracy=99.60%, loss=0.00867] 


Validation Loss: 0.0118, Accuracy: 99.60%
Validation completed.
Saving the model...
Model saved successfully.
