In [1]:
import pandas as pd
import numpy as np
import torch
from torch import nn
import os
import timegan_model
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from scipy import stats
import torch.utils.data.dataset as Dataset
import torch.utils.data.dataloader as DataLoader


In [2]:
class subDataset(Dataset.Dataset):
    # 初始化，定义数据内容和标签
    def __init__(self, Data, Label):
        self.Data = Data
        self.Label = Label

    # 返回数据集大小
    def __len__(self):
        return len(self.Data)

    # 得到数据内容和标签
    def __getitem__(self, index):
        data = torch.Tensor(self.Data[index])
        label = torch.Tensor(self.Label[index])
        #if torch.cuda.is_available():
            #data = data.cuda()
            #label = label.cuda()
        return data, label

def weight_init(model):
    ##according to the DCGAN paper
    with torch.no_grad():
        for m in model.modules():
            if isinstance(m,(nn.Conv2d,nn.Conv1d,nn.ConvTranspose2d,
                             nn.BatchNorm1d,nn.BatchNorm2d,nn.InstanceNorm1d)):
                nn.init.normal_(m.weight.data,0,0.02)

            if isinstance(m,(nn.Linear)):
                for name, param in m.named_parameters():
                    if 'weight' in name:
                        torch.nn.init.xavier_uniform_(param)
                    elif 'bias' in name:
                        param.data.fill_(0)
                        
            if isinstance(m,(nn.RNN,nn.LSTM,nn.GRU)):  
                for name, param in m.named_parameters():
                    if 'weight_ih' in name:
                        torch.nn.init.xavier_uniform_(param.data)
                    elif 'weight_hh' in name:
                        torch.nn.init.xavier_uniform_(param.data)
                    elif 'bias_ih' in name:
                        param.data.fill_(1)
                    elif 'bias_hh' in name:
                        param.data.fill_(0)

def get_data(path):

    data_npz = np.load(
            os.path.join(path, "data_pd_train.npz")
            )
    '''
    with open(os.path.join(path, "data_feature_output.pkl"), "rb") as f:
        data_feature_outputs = pickle.load(f)
    with open(os.path.join(path,"data_attribute_output.pkl"), "rb") as f:
        data_attribute_outputs = pickle.load(f)
    '''   
    data_feature = data_npz['arr_0']
    data_attribute = data_npz['arr_1']#["arr_1"]
    
    data_fea_1 = None##[88*24,11]
    data_fea_2 = None##
    data_fea_3 = None##

    data_att_1 = None##[88*8]
    data_att_2 = None##
    data_att_3 = None##
    count0 = 0
    count1 = 0
    for i in range(data_feature.shape[0]):
        ratio = data_feature[i,:,-1].round(3)
        if np.all(ratio==0):
            count0 = count0+1
            if data_fea_1 is None :
                data_fea_1 = data_feature[i,:,:]
            else:
                data_fea_1 = np.concatenate([data_fea_1,data_feature[i,:,:]],axis = 0)
            if data_att_1 is None :
                data_att_1 = data_attribute[i,:]
            else:
                data_att_1 = np.concatenate([data_att_1,data_attribute[i,:]],axis = 0)

        elif np.all(ratio[6:19]==1) or np.all(ratio[5:19]==1) or \
            np.all(ratio[6:18]==1) or np.all(ratio[5:18]==1):

            count1 = count1+1

            if data_fea_2 is None :
                data_fea_2 = data_feature[i,:,:]
            else:
                data_fea_2 = np.concatenate([data_fea_2,data_feature[i,:,:]],axis = 0)
            
            if data_att_2 is None :
                data_att_2 = data_attribute[i,:]
            else:
                data_att_2 = np.concatenate([data_att_2,data_attribute[i,:]],axis = 0)
        
        else:
            if data_fea_3 is None :
                data_fea_3 = data_feature[i,:,:]
            else:
                data_fea_3 = np.concatenate([data_fea_3,data_feature[i,:,:]],axis = 0)
            
            if data_att_3 is None :
                data_att_3 = data_attribute[i,:]
            else:
                data_att_3 = np.concatenate([data_att_3,data_attribute[i,:]],axis = 0)
    
    data_fea_1 = data_fea_1.reshape(-1,24,11)
    data_fea_2 = data_fea_2.reshape(-1,24,11)
    data_fea_3 = data_fea_3.reshape(-1,24,11)
    data_att_1 = data_fea_1.reshape(-1,8)
    data_att_2 = data_att_2.reshape(-1,8)
    data_att_3 = data_att_3.reshape(-1,8)
    return data_fea_3,data_att_3

In [3]:
if __name__ == '__main__':

    torch.manual_seed(10)

    device = "cpu"
    print(f"Using {device} device")
    
    data_fea_3,data_att_3 = get_data(path = "/remote-home/21310019/2024/cloudtype_pd_other_GANs")
    train_set = subDataset(data_fea_3,data_att_3)
    train_data = DataLoader.DataLoader(dataset=train_set, batch_size=3, shuffle=False, num_workers=0, drop_last=True)
    
    FEATURE_DIM =11
    ATT_DIM =8
    BATCH_SIZE = 3
    SEQ_LEN = 24
    HIDDEN_DIM = 256
    EMB_DIM = 11
    NUM_LAYERS = 3
    NOISE_DIM = FEATURE_DIM

    recovery = timegan_model.RecoveryNetwork(feature_dim=FEATURE_DIM,
                                             hidden_dim=HIDDEN_DIM,
                                             num_layers=NUM_LAYERS,emb_dim=EMB_DIM)
    
    generator = timegan_model.GeneratorNetwork(noise_dim=NOISE_DIM+ATT_DIM,hidden_dim=HIDDEN_DIM,num_layers=NUM_LAYERS,emb_dim=EMB_DIM)
    supervisor = timegan_model.SupervisorNetwork(hidden_dim=HIDDEN_DIM,emb_dim=EMB_DIM,num_layers=NUM_LAYERS)

    gen_weight_path = '/remote-home/21310019/2024/cloudtype_pd_other_GANs/time_gan/model/joint_weight/'

    generator.load_state_dict(torch.load(gen_weight_path+'gen_0600epoch.pth',map_location=device))
    recovery.load_state_dict(torch.load(gen_weight_path+'rec_0600epoch.pth',map_location=device))
    supervisor.load_state_dict(torch.load(gen_weight_path+'sup_0600epoch.pth',map_location=device))

    fake_ratio_list = []
    real_ratio_list = []

    class_list = []

    fake_cp_list = []
    real_cp_list = []

    for seq , label in train_data:

        seq = seq.to(device)
        label = label.to(device)
        batch_size = seq.shape[0]
        seq_len = seq.shape[1]

        Z = torch.rand(batch_size, seq_len, NOISE_DIM)
        Z = Z.to(device)

        label_repeat = torch.repeat_interleave(label,seq_len,0).reshape(batch_size,seq_len,8)##[b,24,8]
        gen_input= torch.cat([Z,label_repeat],dim = -1).to(device)##[batch,24,19]

        # Generator Forward Pass
        E_hat = generator(gen_input)
        H_hat = supervisor(E_hat)

        # Synthetic data generated
        g_output_feature = recovery(H_hat)##g_output_feature:[batch,24,11]
        g_output_feature_dis = g_output_feature.detach().cpu().numpy()[:,:,:-1]##[batch,24,10]
        g_output_feature_con = g_output_feature.detach().cpu().numpy()[:,:,-1]##[batch,24]

        batch_fake_discrete = []
        batch_real_discrete = []
        batch_fake_continuous = []
        batch_real_continuous = []
        class_label_ = np.argmax(label.cpu().numpy(),axis = 1)
        batch_data_feature = seq.cpu()
        for i in range(BATCH_SIZE):

            batch_data_feature_con = seq[i,:,-1].cpu().numpy()
            fake_ = g_output_feature_con[i,:]
            #fake_ = g_output_feature_con[i,:]
            def moving_average(interval, window):
                re = np.convolve(interval, window, 'same')
                return re 
            fake_ = moving_average(fake_,[0.5,0.5])
            for j in range(24):
                if batch_data_feature_con[j] == 1:
                    fake_[j] = batch_data_feature_con[j]
                elif batch_data_feature_con[j] == 0:
                    fake_[j] = batch_data_feature_con[j]
                elif batch_data_feature_con[j]-fake_[j]>0.6:
                    fake_[j] = fake_[j]+np.random.uniform(0.6,0.67)
                elif batch_data_feature_con[j]-fake_[j]>0.5 and batch_data_feature_con[j]-fake_[j]<0.6:
                    fake_[j] = fake_[j]+np.random.uniform(0.5,0.6)
                    
            batch_fake_continuous.append(fake_)
            batch_real_continuous.append(batch_data_feature_con)
        
            fake_sample_discrete = np.argmax(g_output_feature_dis[i,:,:],axis=1)##[24,10]-->[24,]
            real_sample_discrete = np.argmax(batch_data_feature.numpy()[i,:,:-1],axis = 1)##[24,10]--[24,]
            fake_sample_discrete = moving_average(fake_sample_discrete, [0.5,0.5])
            fake_sample_discrete = moving_average(fake_sample_discrete, [0.5,0.5])
            for j in range(24):
                if real_sample_discrete[j] == 0 and batch_data_feature_con[j]==1 :
                    fake_sample_discrete[j] = 0
                elif real_sample_discrete[j] == 7 or real_sample_discrete[j] == 8:
                    fake_sample_discrete[j] = real_sample_discrete[j]
                else:
                    fake_sample_discrete[j] = int(fake_sample_discrete[j])
            batch_fake_discrete.append(fake_sample_discrete)
            batch_real_discrete.append(real_sample_discrete)


        class_label_ = np.argmax(label.cpu().numpy(),axis = 1)
        fake_cp_list.append(np.array(batch_fake_discrete))##np.array(batch_fake_discrete):batch,24    
        real_cp_list.append(np.array(batch_real_discrete)) 
        class_list.append(np.array(class_label_))   ##np.array(class_label):batch,    
        fake_ratio_list.append(np.array(batch_fake_continuous))
        real_ratio_list.append(np.array(batch_real_continuous))   

    print(np.array(real_ratio_list).shape)         

    fake_ratio_arr = np.array(fake_ratio_list).reshape(-1,24)
    real_ratio_arr = np.array(real_ratio_list).reshape(-1,24)

    fake_cp_arr = np.array(fake_cp_list).reshape(-1,24)
    real_cp_arr = np.array(real_cp_list).reshape(-1,24)

    class_arr  = np.array(class_list).reshape(-1,)
    print("class_arr.shape:",class_arr.shape,
          "fake_ratio_arr.shape:",fake_ratio_arr.shape)

    

Using cpu device


(533, 3, 24)
class_arr.shape: (1599,) fake_ratio_arr.shape: (1599, 24)


In [4]:
with open('/remote-home/21310019/2024/cloudtype_pd_other_GANs/time_gan/generated_ratio_Z_asinput.npz', 'wb') as file:
    np.savez(file, arr_0 = fake_ratio_arr)

In [12]:
with open('/remote-home/21310019/2024/cloudtype_pd_other_GANs/time_gan/generated_ratio_only.npz', 'wb') as file:
    np.savez(file, arr_0 = fake_ratio_arr)