In [1]:
import torch
from enformer_pytorch import Enformer
from enformer_pytorch.finetune import HeadAdapterWrapper
import sys; sys.path.append('../')
from enformer_utils.data.bigwig import BigwigFetcher
from enformer_utils.data.fasta import FastaFetcher
from enformer_utils.data.bed import BedIterator

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
!nvidia-smi

Thu Apr  6 03:14:35 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 530.30.02              Driver Version: 530.30.02    CUDA Version: 12.1     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                  Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf            Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce GTX 1080         Off| 00000000:02:00.0 Off |                  N/A |
| 35%   45C    P0               41W / 180W|      0MiB /  8192MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  NVIDIA GeForce GTX 1080 Ti      Off| 00000000:83:00.0 Off |  

In [3]:
device = torch.device('cuda:0')
torch.cuda.set_device(device=device)

# Check if GPU is available
if torch.cuda.is_available():
    # Get the device name
    device_name = torch.cuda.get_device_name(torch.cuda.current_device())
    print('The current GPU: ', device_name)
else:
    print('GPU is not aviailable')

The current GPU:  NVIDIA GeForce GTX 1080 Ti


In [4]:
# assign the data files for training
bigwig_files = [
    "../data/LW-2W-1-Muscle-H3K4me3-AD1_merge_LW-2W-2-Muscle-H3K4me3-AD7.nodup.tagAlign_x_LW-2W-2-Muscle-Input-AD9_merge_LW-2W-1-Muscle-Input-AD13.nodup.tagAlign.pval.signal.bw",
    "../data/MS-2W-2-Muscle-H3K27ac-AD1_merge_MS-2W-4-Muscle-H3K27ac-AD22.nodup.tagAlign_x_MS-2W-2-Muscle-Input-AD12-1_merge_MS-2W-4-Muscle-Input-AD25.nodup.tagAlign.pval.signal.bw",
]

fa_file = "../data/Sus_scrofa.Sscrofa11.1.dna.toplevel.chr.fa"
target_region_bed = "../data/target_regions.bed"

In [5]:
enformer = Enformer.from_pretrained('EleutherAI/enformer-official-rough')

model = HeadAdapterWrapper(
    enformer = enformer,
    num_tracks = len(bigwig_files),
    post_transformer_embed = False,
).cuda()

In [6]:
epochs = 10
bin_size = 200

fa_fetcher = FastaFetcher(fa_file)
bigwig_fetcher = BigwigFetcher(bigwig_files, bins=bin_size)
target_region_iter = BedIterator(target_region_bed)

for epoch in range(epochs):
    for region in target_region_iter:
        chrom, start, end = region
        seq = fa_fetcher.fetch_tensor(chrom, start, end).cuda()
        target = bigwig_fetcher.fetch_tensor(chrom, start, end).cuda()
        loss = model(seq, target=target)
        print(f"Epoch: {epoch}, {chrom}:{start}-{end}, loss: {loss}")
        loss.backward()


Epoch: 0, chr1:0-98304, loss: 2.1053252595515883
Epoch: 0, chr1:98304-196608, loss: 1.7726745579496668
Epoch: 0, chr1:196608-294912, loss: 3.0932596674492374
Epoch: 0, chr1:294912-393216, loss: 0.9147566630917908
Epoch: 0, chr1:393216-491520, loss: 1.121840956972941
Epoch: 0, chr1:491520-589824, loss: 3.039342317576095
Epoch: 0, chr1:589824-688128, loss: 0.8242230878449354
Epoch: 0, chr1:688128-786432, loss: 0.861079661131393
Epoch: 0, chr1:786432-884736, loss: 1.4838105301601985
Epoch: 0, chr1:884736-983040, loss: 0.9021866240476862
Epoch: 0, chr1:983040-1081344, loss: 0.8179367523388418
Epoch: 0, chr1:1081344-1179648, loss: 0.8174810998310738
Epoch: 0, chr1:1179648-1277952, loss: 0.8499894725652004
Epoch: 0, chr1:1277952-1376256, loss: 1.1409548687903963
Epoch: 0, chr1:1376256-1474560, loss: 0.8734160058737024
Epoch: 0, chr1:1474560-1572864, loss: 1.1333819258690778
Epoch: 0, chr1:1572864-1671168, loss: 1.1755563072518558
Epoch: 0, chr1:1671168-1769472, loss: 1.6620199251723415
Epoch