In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES']="0"

#### 定义采样生成序列的函数

In [None]:
import os
import torch
import torch.nn as nn
from matplotlib import pyplot as plt
from torch import optim
from tqdm import tqdm
import logging
from torch.utils.tensorboard import SummaryWriter

from utils import *
from modules import UNet
from torch.cuda.amp import autocast, GradScaler
from torch.optim.lr_scheduler import StepLR

logging.basicConfig(format="%(asctime)s - %(levelname)s: %(message)s", level=logging.INFO, datefmt="%H:%M:%S")


class Diffusion:
    def __init__(self, noise_steps=1000, beta_start=1e-4, beta_end=0.02, protein_high=560, protein_width=8, device="cuda"):
        self.noise_steps = noise_steps
        self.beta_start = beta_start
        self.beta_end = beta_end
        self.protein_high = protein_high
        self.protein_width = protein_width
        self.device = device

        self.beta = self.prepare_noise_schedule().to(device)
        self.alpha = 1. - self.beta
        self.alpha_hat = torch.cumprod(self.alpha, dim=0)  

    def prepare_noise_schedule(self):
        return torch.linspace(self.beta_start, self.beta_end, self.noise_steps)

    def noise_images(self, x, t):
        sqrt_alpha_hat = torch.sqrt(self.alpha_hat[t])[:, None, None, None]  
        sqrt_one_minus_alpha_hat = torch.sqrt(1. - self.alpha_hat[t])[:, None, None, None] 
        ε = torch.randn_like(x)  
        return sqrt_alpha_hat * x + sqrt_one_minus_alpha_hat * ε, ε


    def sample_timesteps(self, n):
        return torch.randint(low=1, high=self.noise_steps, size=(n,))

    # 采样生成序列
    def sample(self, model, n):
        logging.info(f"Sampling {n} new sequences......")
        model.eval()
        with torch.no_grad():
            x = torch.randn((n, 1, self.protein_high, self.protein_width)).to(self.device) 
            for i in tqdm(reversed(range(1, self.noise_steps)), position=0):    
                t = (torch.ones(n) * i).long().to(self.device)      
                predicted_noise = model(x, t)     
                alpha = self.alpha[t][:, None, None, None]
                alpha_hat = self.alpha_hat[t][:, None, None, None]
                beta = self.beta[t][:, None, None, None]
                if i > 1:
                    noise = torch.randn_like(x)
                else:
                    noise = torch.zeros_like(x)
                x = 1 / torch.sqrt(alpha) * (x - ((1 - alpha) / (torch.sqrt(1 - alpha_hat))) * predicted_noise) + torch.sqrt(beta) * noise

        model.train()
        return x


#### 使用训练好的模型进行采样，循环生成序列并保存到fasta文件中去

In [None]:
import os
import torch
import torch.nn as nn
from matplotlib import pyplot as plt
from torch import optim
from tqdm import tqdm
import logging
from torch.utils.tensorboard import SummaryWriter

from utils import *
from modules import UNet

device = "cuda:0"
model = UNet().to(device)
model_path = "model_pt/P450Diffusion_pre_trained_model.pt"
save_fasta_path = "P450Diffusion_pre_trained_model_gen_sequences/"

ckpt = torch.load(model_path)
model.load_state_dict(ckpt)
diffusion = Diffusion(protein_high=560, protein_width=8, device=device)

for i in range(400):
    sample_sequences = diffusion.sample(model, n=32)
    save_sequence(sample_sequences, os.path.join(save_fasta_path, "gen_{0}.fasta".format(i)))
print("over!")