In [1]:
# 我们要定位文件位置，使用这里的代码，具体要将此notebook文件和function文件（或文件夹）放在同一个文件夹路径下，具体路径是什么可以左侧找到目标文件夹然后右击复制文件夹路径来获得
import sys
sys.path.insert(0,'/content/drive/MyDrive/JPMorgan/自写代码/TensorFlow_codes')

In [None]:
pip install einops

In [None]:
pip install wandb

In [None]:
pip install pytorch_lightning

In [None]:
pip install pykeops

In [6]:
pip install -q -U tensorflow-addons

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m1.0/1.1 MB[0m [31m35.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m20.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [7]:
import os
import argparse
import json
import numpy as np
#------------------------------------------------------------------
import tensorflow as tf
from keras.models import Sequential

import torch
#------------------------------------------------------------------
from utils.util import get_mask_mnr, get_mask_bm, get_mask_rm
from utils.util import find_max_epoch, print_size, sampling, calc_diffusion_hyperparams

from imputers.DiffWaveImputer import DiffWaveImputer
from imputers.SSSDSAImputer import SSSDSAImputer
from imputers.SSSDS4Imputer import SSSDS4Imputer

from sklearn.metrics import mean_squared_error
from statistics import mean



[KeOps] Compiling cuda jit compiler engine ... OK
[pyKeOps] Compiling nvrtc binder for python ... OK


In [8]:
def generate(output_directory,
             num_samples,
             ckpt_path,
             data_path,
             ckpt_iter,
             use_model,
             masking,
             missing_k,
             only_generate_missing):
    
    """
    Generate data based on ground truth 

    Parameters:
    output_directory (str):           save generated speeches to this path
    num_samples (int):                number of samples to generate, default is 4
    ckpt_path (str):                  checkpoint path
    ckpt_iter (int or 'max'):         the pretrained checkpoint to be loaded; 
                                      automitically selects the maximum iteration if 'max' is selected
    data_path (str):                  path to dataset, numpy array.
    use_model (int):                  0:DiffWave. 1:SSSDSA. 2:SSSDS4.
    masking (str):                    'mnr': missing not at random, 'bm': black-out, 'rm': random missing
    only_generate_missing (int):      0:all sample diffusion.  1:only apply diffusion to missing portions of the signal
    missing_k (int)                   k missing time points for each channel across the length.
    """

    # generate experiment (local) path
    local_path = "T{}_beta0{}_betaT{}".format(diffusion_config["T"],
                                              diffusion_config["beta_0"],
                                              diffusion_config["beta_T"])

    # Get shared output_directory ready
    output_directory = os.path.join(output_directory, local_path)
    if not os.path.isdir(output_directory):
        os.makedirs(output_directory)
        os.chmod(output_directory, 0o775)
    print("output directory", output_directory, flush=True)

    # map diffusion hyperparameters to gpu
    for key in diffusion_hyperparams:
        if key != "T":
            diffusion_hyperparams[key] = diffusion_hyperparams[key]

            
    # predefine model
    if use_model == 0:
        net = DiffWaveImputer(**model_config)
    elif use_model == 1:
        net = SSSDSAImputer(**model_config)
    elif use_model == 2:
        net = SSSDS4Imputer(**model_config)
    else:
        print('Model chosen not available.')
    print_size(net)

    
    # load checkpoint 
    ckpt_path = os.path.join(ckpt_path, local_path)
    print(ckpt_path)
    if ckpt_iter == 'max':
        ckpt_iter = find_max_epoch(ckpt_path)
    print('Print Ckpt_iter: ',ckpt_iter)
    
    model_path = os.path.join(ckpt_path, '{}.weight'.format(ckpt_iter))
    model = net
    try:
        model.load_weights(model_path, by_name=False)
        print('Successfully loaded model at iteration {}'.format(ckpt_iter))
    except:
        raise Exception('No valid model found')

        
        
    ### Custom data loading and reshaping ###
    
    testing_data = np.load(trainset_config['test_data_path'])
    testing_data = np.split(testing_data, 4, 0)
    testing_data = np.array(testing_data)
    testing_data = tf.convert_to_tensor(testing_data, dtype=tf.float32)
    print('Data loaded')

    all_mse = []

    print('This is the net used: ', model)

    
    for i, batch in enumerate(testing_data):
        print('Len testing data: ',len(testing_data))  # testing data里有多少batch
        print('This is number i++++++++++++++++++: ',i)  # 表示目前在循环中的是第几个batch, 目前我们在SSSD4模型的json里面设置T=10，原本T=200，为了节省时间，我们先设置只有10个扩散步，共4个batch，每个batch都进行10个扩散步的循环
        #print('Batch...',batch)

        if masking == 'mnr':
            mask_T = get_mask_mnr(batch[0], missing_k)
            mask = tf.transpose(mask_T, perm=[1, 0])
            mask = tf.expand_dims(mask, axis=0)    # 增加一个维度，原本mask是二维，现在在位置0增加一个维度，值为1，也就是增加了batch的第一维度
            mask = tf.repeat(mask, repeats=batch.shape[0],axis = 0)    
            #mask = mask.type(torch.float).cuda()

        elif masking == 'bm':
            mask_T = get_mask_bm(batch[0], missing_k)
            mask = tf.transpose(mask_T, perm=[1, 0])
            mask = tf.expand_dims(mask, axis=0)    # 增加一个维度，原本mask是二维，现在在位置0增加一个维度，值为1，也就是增加了batch的第一维度
            mask = tf.repeat(mask, repeats=batch.shape[0],axis = 0)    
            #mask = mask.type(torch.float).cuda()

        elif masking == 'rm':
            mask_T = get_mask_rm(batch[0], missing_k)
            mask = tf.transpose(mask_T, perm=[1, 0])
            mask = tf.expand_dims(mask, axis=0)    # 增加一个维度，原本mask是二维，现在在位置0增加一个维度，值为1，也就是增加了batch的第一维度
            mask = tf.repeat(mask, repeats=batch.shape[0],axis = 0)    
            #mask = mask.type(torch.float).cuda()

            
            
        batch = tf.transpose(batch, perm=[0,2,1])
        
        start = torch.cuda.Event(enable_timing=True)
        end = torch.cuda.Event(enable_timing=True)
        start.record()

        sample_length = batch.shape[2]
        sample_channels = batch.shape[1]
        generated_audio = sampling(model, (num_samples, sample_channels, sample_length),
                                   diffusion_hyperparams,
                                   cond=batch,
                                   mask=mask,
                                   only_generate_missing=only_generate_missing)

        end.record()
        torch.cuda.synchronize()

        print('generated {} utterances of random_digit at iteration {} in {} seconds'.format(num_samples,
                                                                                             ckpt_iter,
                                                                                             int(start.elapsed_time(
                                                                                                 end) / 1000)))

        
        generated_audio = generated_audio.numpy()
        batch = batch.numpy()
        mask = mask.numpy() 
        
        
        outfile = f'imputation{i}.npy'
        new_out = os.path.join(output_directory, outfile)
        np.save(new_out, generated_audio)

        outfile = f'original{i}.npy'
        new_out = os.path.join(output_directory, outfile)
        np.save(new_out, batch)

        outfile = f'mask{i}.npy'
        new_out = os.path.join(output_directory, outfile)
        np.save(new_out, mask)

        print('saved generated samples at iteration %s' % ckpt_iter)
        
        mse = mean_squared_error(generated_audio[~mask.astype(bool)], batch[~mask.astype(bool)])
        all_mse.append(mse)
    
    print('Total MSE:', mean(all_mse))

In [9]:
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('-c', '--config', type=str, default='/content/drive/MyDrive/JPMorgan/自写代码/TensorFlow_codes/config/config_DiffWave.json',
                        help='JSON file for configuration')
    parser.add_argument('-ckpt_iter', '--ckpt_iter', default='max',
                        help='Which checkpoint to use; assign a number or "max"')
    parser.add_argument('-n', '--num_samples', type=int, default=500,         #default=500
                        help='Number of utterances to be generated')
    args = parser.parse_args("")

    # Parse configs. Globals nicer in this case
    with open(args.config) as f:
        data = f.read()
    config = json.loads(data)
    print(config)

    gen_config = config['gen_config']

    train_config = config["train_config"]  # training parameters

    global trainset_config
    trainset_config = config["trainset_config"]  # to load trainset

    global diffusion_config
    diffusion_config = config["diffusion_config"]  # basic hyperparameters

    global diffusion_hyperparams
    diffusion_hyperparams = calc_diffusion_hyperparams(
        **diffusion_config)  # dictionary of all diffusion hyperparameters

    global model_config
    if train_config['use_model'] == 0:
        model_config = config['wavenet_config']
    elif train_config['use_model'] == 1:
        model_config = config['sashimi_config']
    elif train_config['use_model'] == 2:
        model_config = config['wavenet_config']

    

{'diffusion_config': {'T': 10, 'beta_0': 0.0001, 'beta_T': 0.02}, 'wavenet_config': {'in_channels': 14, 'out_channels': 14, 'num_res_layers': 12, 'res_channels': 256, 'skip_channels': 256, 'dilation_cycle': 12, 'diffusion_step_embed_dim_in': 128, 'diffusion_step_embed_dim_mid': 512, 'diffusion_step_embed_dim_out': 512}, 'train_config': {'output_directory': '/content/drive/MyDrive/JPMorgan/自写代码/TensorFlow_codes/Results_CSDI/Mujoco/train_90/', 'ckpt_iter': 'max', 'iters_per_ckpt': 100, 'iters_per_logging': 100, 'n_iters': 500, 'learning_rate': 0.0002, 'only_generate_missing': 0, 'use_model': 0, 'masking': 'rm', 'missing_k': 90}, 'trainset_config': {'train_data_path': '/content/drive/MyDrive/JPMorgan/自写代码/Data/Mujoco/train_mujoco.npy', 'test_data_path': '/content/drive/MyDrive/JPMorgan/自写代码/Data/Mujoco/test_mujoco.npy', 'segment_length': 100, 'sampling_rate': 100}, 'gen_config': {'output_directory': '/content/drive/MyDrive/JPMorgan/自写代码/TensorFlow_codes/Results_CSDI/Mujoco/test_90', 'ckpt

In [10]:
generate(**gen_config,
             ckpt_iter=args.ckpt_iter,
             num_samples=args.num_samples,
             use_model=train_config["use_model"],
             data_path=trainset_config["test_data_path"],
             masking=train_config["masking"],
             missing_k=train_config["missing_k"],
             only_generate_missing=train_config["only_generate_missing"])

output directory /content/drive/MyDrive/JPMorgan/自写代码/TensorFlow_codes/Results_CSDI/Mujoco/test_90/T10_beta00.0001_betaT0.02
/content/drive/MyDrive/JPMorgan/自写代码/TensorFlow_codes/Results_CSDI/Mujoco/train_90/T10_beta00.0001_betaT0.02
Print Ckpt_iter:  600
Successfully loaded model at iteration 600
Data loaded
This is the net used:  <imputers.DiffWaveImputer.DiffWaveImputer object at 0x7f0983e76580>
Len testing data:  4
This is number i++++++++++++++++++:  0
begin sampling, total number of reverse steps = 10
This is the shape of h: ---------------------------------- (500, 256, 100)
This is the shape of h: ---------------------------------- (500, 256, 100)
This is the shape of h: ---------------------------------- (500, 256, 100)
This is the shape of h: ---------------------------------- (500, 256, 100)
This is the shape of h: ---------------------------------- (500, 256, 100)
This is the shape of h: ---------------------------------- (500, 256, 100)
This is the shape of h: -------------

In [None]:
torch.ones((2, 3))

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [None]:
torch.ones(2, 3)

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [None]:
tf.ones([2,3],tf.float32)

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[1., 1., 1.],
       [1., 1., 1.]], dtype=float32)>

In [None]:
torch.sqrt(torch.tensor(4))

tensor(2.)

In [None]:
tf.math.sqrt(tf.convert_to_tensor(4.0))

<tf.Tensor: shape=(), dtype=float32, numpy=2.0>