In [2]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('..')
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [3]:
from rnacomp.fastai_fit import *
from rnacomp.dataset import RNA_DatasetV1, LenMatchBatchSampler, DeviceDataLoader
from rnacomp.models import GraphTransformer
import gc

import torch
from x_transformers import TransformerWrapper, Decoder, Encoder
from transformers.optimization import (
    get_cosine_schedule_with_warmup,
)



caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']
caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']


In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
    
def loss(pred,target):
    p = pred[target['mask'][:,:pred.shape[1]]]
    y = target['react'][target['mask']].clip(0,1)
    loss = F.l1_loss(p, y, reduction='none')
    loss = loss[~torch.isnan(loss)].mean()
    
    return loss

def loss_unc(pred,target):
    p = pred[target['mask'][:,:pred.shape[1]]]
    y = target['react'][target['mask']]#.clip(0,1)
    loss = F.l1_loss(p, y, reduction='none')
    loss = loss[~torch.isnan(loss)].mean()
    
    return loss

class MAE(Metric):
    def __init__(self): 
        self.reset()
        
    def reset(self): 
        self.x,self.y = [],[]
        
    def accumulate(self, learn):
        x = learn.pred[learn.y['mask'][:,:learn.pred.shape[1]]].clip(0,1)
        y = learn.y['react'][learn.y['mask']].clip(0,1)
        self.x.append(x)
        self.y.append(y)

    @property
    def value(self):
        x,y = torch.cat(self.x,0),torch.cat(self.y,0)
        loss = F.l1_loss(x, y, reduction='none')
        loss = loss[~torch.isnan(loss)].mean()
        return loss
    
def loss_laplace(pred,target):
    p = pred[target['mask'][:,:pred.shape[1]]].float()
    y = target['react'][target['mask']].clip(0,1).float()
    y_err = target['react_err'][target['mask']].float()
    
    loss = F.l1_loss(p, y, reduction='none')
    m = ~torch.isnan(loss)
    loss = (loss[m]/torch.sqrt(1.0+torch.nan_to_num(y_err[m],100.0).clip(0,100.0))).mean()
    
    return loss


In [5]:
from timm.models.layers import drop_path, to_2tuple, trunc_normal_
from einops import rearrange, repeat, reduce, pack, unpack
from einops.layers.torch import Rearrange, Reduce
from typing import Union, Tuple

class DropPath(nn.Module):
    def __init__(self, drop_prob=None):
        super(DropPath, self).__init__()
        self.drop_prob = drop_prob

    def forward(self, x):
        return drop_path(x, self.drop_prob, self.training)
    
    def extra_repr(self) -> str:
        return 'p={}'.format(self.drop_prob)
    
class Mlp(nn.Module):
    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
        super().__init__()
        out_features = out_features or in_features
        hidden_features = hidden_features or in_features
        self.fc1 = nn.Linear(in_features, hidden_features)
        self.act = act_layer()
        self.fc2 = nn.Linear(hidden_features, out_features)
        self.drop = nn.Dropout(drop)

    def forward(self, x):
        x = self.fc1(x)
        x = self.act(x)
        x = self.fc2(x)
        x = self.drop(x)
        return x
    
class RotaryEmbedding(nn.Module):
    def __init__(self, dim, scale_base = 512, use_xpos = True):
        super().__init__()
        inv_freq = 1.0 / (10000 ** (torch.arange(0, dim, 2).float() / dim))
        self.register_buffer("inv_freq", inv_freq)

        self.use_xpos = use_xpos
        self.scale_base = scale_base
        scale = (torch.arange(0, dim, 2) + 0.4 * dim) / (1.4 * dim)
        self.register_buffer('scale', scale)

    def forward(self, seq_len, device='cuda'):
        t = torch.arange(seq_len, device = device).type_as(self.inv_freq)
        freqs = torch.einsum('i , j -> i j', t, self.inv_freq)
        freqs = torch.cat((freqs, freqs), dim = -1)

        if not self.use_xpos:
            return freqs, torch.ones(1, device = device)

        power = (t - (seq_len // 2)) / self.scale_base
        scale = self.scale ** rearrange(power, 'n -> n 1')
        scale = torch.cat((scale, scale), dim = -1)

        return freqs, scale

def rotate_half(x):
    x1, x2 = x.chunk(2, dim=-1)
    return torch.cat((-x2, x1), dim=-1)


def apply_rotary_pos_emb(pos, t, scale = 1.):
    return (t * pos.cos() * scale) + (rotate_half(t) * pos.sin() * scale)

class Conv1D(nn.Conv1d):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.src_key_padding_mask = None

    def forward(self, x, src_key_padding_mask=None):
        if src_key_padding_mask is not None: 
            self.src_key_padding_mask = src_key_padding_mask
        if self.src_key_padding_mask is not None:
            x = torch.where(self.src_key_padding_mask.unsqueeze(-1
                    ).expand(-1,-1,x.shape[-1]).bool(), torch.zeros_like(x), x)
        return super().forward(x.permute(0,2,1)).permute(0,2,1)
    
class ResBlock(nn.Sequential):
    def __init__(self, d_model):
        super().__init__(nn.LayerNorm(d_model), nn.GELU(), 
                         Conv1D(d_model,d_model,3,padding=1))
        self.src_key_padding_mask = None
        
    def forward(self, x, src_key_padding_mask=None):
        self[-1].src_key_padding_mask = src_key_padding_mask if \
            src_key_padding_mask is not None else self.src_key_padding_mask
        return x + super().forward(x)
    
class Extractor(nn.Sequential):
    def __init__(self, d_model, in_ch=4):
        super().__init__(nn.Embedding(in_ch,d_model//4), 
                Conv1D(d_model//4,d_model,7,padding=3), 
                ResBlock(d_model))
        
    def forward(self, x, src_key_padding_mask=None):
        for i in [1,2]:
            self[i].src_key_padding_mask = src_key_padding_mask
        return super().forward(x)

#BEiTv2 block
class Block(nn.Module):
    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
                 drop_path=0., init_values=None, act_layer=nn.GELU, norm_layer=nn.LayerNorm,
                 window_size=None, attn_head_dim=None, **kwargs):
        super().__init__()
        self.norm1 = norm_layer(dim)
        self.attn = nn.MultiheadAttention(dim, num_heads, dropout=drop, batch_first=True)
        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
        self.norm2 = norm_layer(dim)
        mlp_hidden_dim = int(dim * mlp_ratio)
        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)

        if init_values is not None:
            self.gamma_1 = nn.Parameter(init_values * torch.ones((dim)),requires_grad=True)
            self.gamma_2 = nn.Parameter(init_values * torch.ones((dim)),requires_grad=True)
        else:
            self.gamma_1, self.gamma_2 = None, None
            
        self.emb = RotaryEmbedding(dim)

    def forward(self, x, attn_mask=None, key_padding_mask=None):
        q = k = v = self.norm1(x)
        positions, scale = self.emb(x.shape[1],x.device)
        q = apply_rotary_pos_emb(positions, q, scale)
        k = apply_rotary_pos_emb(positions, k, scale ** -1)
        
        if self.gamma_1 is None:
            x = x + self.drop_path(self.attn(q,k,v,
                            attn_mask=attn_mask,
                            key_padding_mask=key_padding_mask,
                            need_weights=False)[0])
            x = x + self.drop_path(self.mlp(self.norm2(x)))
        else:
            x = x + self.drop_path(self.gamma_1 * self.attn(q,k,v,
                            attn_mask=attn_mask,
                            key_padding_mask=key_padding_mask,
                            need_weights=False)[0])
            x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
        return x
    
class Block_conv(Block):
    def __init__(self, dim, mlp_ratio, *args, **kwargs):
        super().__init__(dim, *args, **kwargs)
        self.mlp.fc1= Conv1D(dim,dim,3,padding=1)
        self.mlp.fc2 = Conv1D(dim,dim,3,padding=1)

    def forward(self, *args, key_padding_mask=None, **kwargs):
        self.mlp.fc1.src_key_padding_mask = key_padding_mask
        self.mlp.fc2.src_key_padding_mask = key_padding_mask
        return super().forward(*args, **kwargs)
    
class RNA_Model(nn.Module):
    def __init__(self, dim=192, depth=12, head_size=32, **kwargs):
        super().__init__()
        #self.emb = nn.Sequential(nn.Embedding(4,dim//4), Conv1D(dim//4,dim,7,padding=3),
        #                        nn.LayerNorm(dim), nn.GELU(), Conv1D(dim,dim,3,padding=1))
        self.extractor = Extractor(dim)
        #self.pos_enc = SinusoidalPosEmb(dim)
        
        self.blocks = nn.ModuleList([ 
            Block_conv(
                dim=dim, num_heads=dim//head_size, mlp_ratio=4, drop_path=0.2*(i/(depth-1)), init_values=1,
                drop=0.1)
            for i in range(depth)])
        
        #self.transformer = nn.TransformerEncoder(
        #    TransformerEncoderLayer_conv(d_model=dim, nhead=dim//head_size, dim_feedforward=4*dim,
        #        dropout=0.1, activation=nn.GELU(), batch_first=True, norm_first=True), depth)
        self.proj_out = nn.Linear(dim,2)
    
    def forward(self, x0):
        mask = x0['mask']
        L0 = mask.shape[1]
        Lmax = mask.sum(-1).max()
        mask = mask[:,:Lmax]
        x = x0['seq'][:,:Lmax]
        
        #pos = torch.arange(Lmax, device=x.device).unsqueeze(0)
        #pos = self.pos_enc(pos)
        #self.emb[1].src_key_padding_mask=~mask
        #self.emb[4].src_key_padding_mask=~mask
        #x = self.emb(x)
        x = self.extractor(x, src_key_padding_mask=~mask)
        #x = x + pos
        
        #x = self.transformer(x, src_key_padding_mask=~mask)
        
        for blk in self.blocks:
            x = blk(x,key_padding_mask=~mask)   
        x = self.proj_out(x)
        
        x = F.pad(x,(0,0,0,L0-Lmax,0,0))
        return x

In [6]:
class CFG:
    path = Path("../data/split")
    pathbb = Path("../data/Ribonanza_bpp_files")
    split_id = 'v0'
    bs = 1024
    num_workers = 8
    device = 'cuda'
    adjnact_prob = 0.5
    seed = 2023
    folder = Path('exp_test')
    out = 'exp_05_lbs_asam'
    
    dim = 192
    depth = 12
    dim_head = 32
    
    
    lr=5e-4
    wd =5e-4
    warm_up_pct = 0.02
    epochs = 16

seed_everything(CFG.seed)
os.makedirs(CFG.out, exist_ok=True)

In [7]:
fns = list(CFG.pathbb.rglob("*.txt"))
bpp_df = pd.DataFrame({"bpp": fns})
bpp_df['sequence_id'] = bpp_df['bpp'].apply(lambda x: x.stem)
df_train = pd.read_parquet(CFG.path / f"train_data_{CFG.split_id}.parquet")
df_valid = pd.read_parquet(CFG.path / f"valid_data_{CFG.split_id}.parquet")
df_train = df_train.merge(bpp_df, on="sequence_id", how="left").reset_index(drop=True)
df_valid = df_valid.merge(bpp_df, on="sequence_id", how="left").reset_index(drop=True)


ds_train = RNA_DatasetV1(df_train)
ds_train_len = RNA_DatasetV1(df_train, mask_only=True)
sampler_train = torch.utils.data.RandomSampler(ds_train_len)
len_sampler_train = LenMatchBatchSampler(sampler_train, batch_size=CFG.bs,
            drop_last=True)
dl_train = DeviceDataLoader(torch.utils.data.DataLoader(ds_train, 
            batch_sampler=len_sampler_train, num_workers=CFG.num_workers,
            persistent_workers=True), CFG.device)


ds_val = RNA_DatasetV1(df_valid)
ds_val_len = RNA_DatasetV1(df_valid,mask_only=True)
sampler_val = torch.utils.data.SequentialSampler(ds_val_len)
len_sampler_val = LenMatchBatchSampler(sampler_val, batch_size=CFG.bs, 
               drop_last=True)
dl_val= DeviceDataLoader(torch.utils.data.DataLoader(ds_val, 
               batch_sampler=len_sampler_val, num_workers=CFG.num_workers), CFG.device)
gc.collect()

0

<function fastai.optimizer.RAdam(params: 'Tensor | Iterable', lr: 'float | slice', mom: 'float' = 0.9, sqr_mom: 'float' = 0.99, eps: 'float' = 1e-05, wd: 'Real' = 0.0, beta: 'float' = 0.0, decouple_wd: 'bool' = True) -> 'Optimizer'>

In [11]:

custom_model = RNA_Model()
opt = torch.optim.SGD(custom_model.parameters(), lr=CFG.lr, weight_decay=CFG.wd)
loss_func = loss_laplace
warmup_steps = int(len(dl_train) * CFG.warm_up_pct * CFG.epochs)
total_steps = int(len(dl_train) * CFG.epochs)
scheduler = get_cosine_schedule_with_warmup(
    opt,
    num_warmup_steps=warmup_steps,
    num_training_steps=total_steps,
)

In [12]:
class SaveModelEpoch:
    def __init__(self, folder, exp_name, best=-np.inf):
        self.best = best
        self.folder = Path(folder)
        self.exp_name = exp_name

    def __call__(self, score, model, epoch):
        self.best = score
        print(f"Better model found at epoch {epoch} with value: {self.best}.")
        torch.save(model.state_dict(), f"{self.folder/self.exp_name}_{epoch}.pth")

        
def custom_metric(x, y):
        loss = F.l1_loss(x, y, reduction='none')
        loss = loss[~torch.isnan(loss)].mean()
        return loss.item()
        

def fit(
    epochs,
    model,
    train_dl,
    valid_dl,
    loss_fn,
    opt,
    metric,
    folder="models",
    exp_name="exp_00",
    device=None,
    sched=None,
    save_md=SaveModelEpoch,
):
    if device is None:
        device = (
            torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
        )

    os.makedirs(folder, exist_ok=True)

    mb = master_bar(range(epochs))
    mb.write(["epoch", "train_loss", "valid_loss", "val_metric"], table=True)
    model.to(device)  # we have to put our model on gpu
    scaler = torch.cuda.amp.GradScaler()  # this for half precision training
    save_md = save_md(folder, exp_name)

    for i in mb:  # iterating  epoch
        trn_loss, val_loss = 0.0, 0.0
        model.train()  # set model for training
        for batch in progress_bar(train_dl, parent=mb):
            # putting batches to device
            x, y = batch
            with torch.cuda.amp.autocast():  # half precision
                out = model(x)  # forward pass
                loss = loss_fn(out, y)  # calulation loss

            trn_loss += loss.item()

            scaler.scale(loss).backward()  # backward
            scaler.step(opt)  # optimzers step
            scaler.update()  # for half precision
            opt.zero_grad()  # zeroing optimizer
            if sched is not None:
                sched.step()  # scuedular step

        trn_loss /= mb.child.total

        # putting model in eval mode
        model.eval()
        gt = []
        pred = []
        # after epooch is done we can run a validation dataloder and see how are doing
        with torch.no_grad():
            for batch in progress_bar(valid_dl, parent=mb):
                x, y = batch
                with torch.cuda.amp.autocast():  # half precision
                    out = model(x)  # forward pass
                    loss = loss_fn(out, y)  # calulation loss
                val_loss += loss.item()

                gt.append(y['react'][y['mask']].clip(0,1).detach())
                pred.append(out[y['mask'][:,:out.shape[1]]].clip(0,1).detach())
        # calculating metric
        metric_ = metric(torch.cat(pred), torch.cat(gt))
        # saving model if necessary
        save_md(metric_, model, i)
        val_loss /= mb.child.total
        res = pd.DataFrame(
            {
                "epoch": [i],
                "train_loss": [trn_loss],
                "valid_loss": [val_loss],
                "metric": [metric_],
            }
        )
        res.to_csv(f"{Path(folder)/exp_name}_{i}.csv", index=False)
        mb.write(
            [
                i,
                f"{trn_loss:.6f}",
                f"{val_loss:.6f}",
                f"{metric_:.6f}",
            ],
            table=True,
        )
        gc.collect()
    print("Training done")

In [13]:
fit(    train_dl = dl_train,
        valid_dl = dl_val,
        epochs=CFG.epochs,
        model=custom_model,
        loss_fn=loss_laplace,
        opt=opt,
        metric=custom_metric,
        folder=CFG.folder/CFG.out,
        exp_name=f"{CFG.out}",
        device=CFG.device,
        sched=scheduler,
    )


epoch,train_loss,valid_loss,val_metric
0,0.254781,0.249498,0.325098
1,0.212609,0.237302,0.308779
2,0.205578,0.228639,0.296958
3,0.200865,0.221793,0.287387
4,0.19754,0.217176,0.280869
5,0.195017,0.211003,0.271894
6,0.192988,0.20747,0.266651
7,0.191508,0.203579,0.260649
8,0.190316,0.201192,0.25684
9,0.189457,0.198993,0.253089


Better model found at epoch 0 with value: 0.3250979466443659.
Better model found at epoch 1 with value: 0.30877905826308033.
Better model found at epoch 2 with value: 0.29695820473827766.
Better model found at epoch 3 with value: 0.2873868415822642.
Better model found at epoch 4 with value: 0.2808691671122807.
Better model found at epoch 5 with value: 0.2718935343029247.
Better model found at epoch 6 with value: 0.2666510134021157.
Better model found at epoch 7 with value: 0.2606492465488646.
Better model found at epoch 8 with value: 0.2568397928802657.
Better model found at epoch 9 with value: 0.25308930475239655.
Better model found at epoch 10 with value: 0.2500666057521908.
Better model found at epoch 11 with value: 0.24816982319985417.
Better model found at epoch 12 with value: 0.2465007240385073.
Better model found at epoch 13 with value: 0.24584259637350458.
Better model found at epoch 14 with value: 0.24538674807148417.
Better model found at epoch 15 with value: 0.24533563495958