In [1]:
import hydra
import wandb
import torch
from torch.cuda.amp import autocast
from torch.cuda.amp import GradScaler
import genova
import numpy as np
import pandas as pd
from omegaconf import OmegaConf, open_dict
from genova.utils.BasicClass import Residual_seq
from torch.utils.data import DataLoader

In [2]:
hydra.initialize('configs')
cfg = hydra.compose('config.yaml')
with open_dict(cfg):
    cfg.task = 'optimum_path'

In [3]:
spec_header = pd.read_csv('/home/z37mao/genova_dataset_index.csv',low_memory=False,index_col='Spec Index')
spec_header = spec_header[spec_header['MSGP File Name']=='1_3.msgp']
#spec_header = spec_header[spec_header['Node Number']<=512]

In [4]:
model = genova.models.Genova(cfg).to('cuda')
ds = genova.data.GenovaDataset(cfg,spec_header=spec_header,dataset_dir_path='/home/z37mao/')
sampler = genova.data.GenovaBatchSampler(cfg,'cuda',0.95,spec_header,[0,128,256,512], model)
collate_fn = genova.data.GenovaCollator(cfg)
dl = DataLoader(ds, batch_sampler=sampler, collate_fn=collate_fn, pin_memory=True, num_workers=4, prefetch_factor=4)
dl = genova.data.DataPrefetcher(dl,'cuda')
loss_fn = torch.nn.KLDivLoss(reduction='batchmean')
optimizer = torch.optim.Adam(model.parameters(),lr=2e-4)
scaler = GradScaler()

In [5]:
def train(dl,loss_fn,optimizer,scaler,model):
    total_step = 1
    for epoch in range(0, 40):
        print('new epoch')
        for encoder_input, decoder_input, graph_probability, label, label_mask in dl:
            if total_step%100 == 1: loss_cum = 0
            elif total_step%100 == 0 and total_step != 0: yield loss_cum/100, total_step
            optimizer.zero_grad()
            with autocast():
                output = model(encoder_input=encoder_input, decoder_input=decoder_input, graph_probability=graph_probability)
                output = output.log_softmax(-1)
                loss = loss_fn(output[label_mask],label[label_mask])
            assert loss.item()!=float('nan')
            loss_cum += loss
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            total_step += 1

In [8]:
for encoder_input, decoder_input, graph_probability, label, label_mask in dl:
    break

In [10]:
label_mask.sum()

tensor(55, device='cuda:0')

In [6]:
a=train(dl,loss_fn,optimizer,scaler,model)

In [7]:
for loss, total_step in a:
    #loss, total_step = next(a)
    print(loss, total_step)

new epoch


KeyboardInterrupt: 

In [None]:
model.state_dict()

In [None]:
for encoder_input, decoder_input, graph_probability, label, label_mask in dl:
    break

In [None]:
with autocast():
    output = model(encoder_input=encoder_input, decoder_input=decoder_input, graph_probability=graph_probability)
    output = output.log_softmax(-1)
    loss = loss_fn(output[label_mask],label[label_mask])

In [None]:
loss

In [None]:
import os

In [None]:
if os.('/home/z37mao/genova/save'):
    print('kfjsadlkf')

In [None]:
os.path.exists(os.path.join('/home/z37mao/genova/save','fjklsfj.pt'))

In [None]:
torch.save({'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict()}, '/home/z37mao/genova/save/test.pt')

In [None]:
torch.load('/home/z37mao/genova/save/test.pt')

In [None]:
DDP(model,device_ids=[0])