In [4]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import pickle 

import os, time
import math

import argparse
import tabulate

import utils, models, ml_algorithm
import wandb
from torch.utils.data import DataLoader, random_split, ConcatDataset, Subset, TensorDataset
from collections import defaultdict
import warnings
import torch
import torch.nn as nn
import torch.nn.functional as F

from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.linear_model import LinearRegression

import numpy as np
import pandas as pd

import pickle
import random
import math
from torch.utils.data import Dataset
from torch.distributions import Normal
from collections import defaultdict
from prettytable import PrettyTable
warnings.filterwarnings('ignore')

## MinMax Scaling Functions ------------------------------------
def minmax_col(data, name):
    minval , maxval = data[name].min(), data[name].max()
    data[name]=(data[name]-data[name].min())/(data[name].max()-data[name].min())
    return minval, maxval

def minmax_tensor(tensor):
    minvals = tensor.min()
    maxvals = tensor.max()
    
    normalized = (tensor - minvals) / (maxvals - minvals)
    return normalized, minvals, maxvals

def restore_minmax(data, minv, maxv):
    minv=0 if minv==None else minv
    maxv=0 if maxv==None else maxv
    data = (data * (maxv - minv)) + minv
    return data

In [8]:
class Tabledata(Dataset):
    def __init__(self, model_name, data, scale='minmax', binary_t=False):
        self.use_treatment = not (model_name == 'iTransformer')
        # padding tensors
        self.diff_tensor = torch.zeros([124,1])
        if self.use_treatment:
            if not model_name == 'cevae':
                self.cont_tensor = torch.zeros([124,3])
            else:
                self.cont_tensor = torch.zeros([124,4])
        else:
            self.cont_tensor = torch.zeros([124,5])

        self.cat_tensor = torch.zeros([124,7])
        yd=[]
        for _, group in data.groupby('cluster'):
            yd.append(group[['y', 'd']].tail(1))
        yd = pd.concat(yd)

        ## 데이터 전처리 ##
        # 연속 데이터 정규화 #
        for c in ["age", "dis", "danger", "CT_R", "CT_E"]:
            # dis : 0~6
            # danger : 3~11
            minmax_col(data, c) 
        self.a_y, self.b_y = minmax_col(yd,"y")
        self.a_d, self.b_d = minmax_col(yd,"d")

        ## 데이터 특성 별 분류 및 저장 ##
        self.cluster = data.iloc[:,0].values.astype('float32')
        
        if not binary_t:
            self.treatment = data[['dis', 'danger']].values.astype('float32') if not model_name == 'cevae' else data['danger'].values.astype('float32')
        else:
            raise('do not use binary t')
            print("use binary t")
            self.treatment = (data['dis'].values >= 0.5).astype('float32')
            
        if self.use_treatment:
            drop_col = ['dis'] if model_name == 'cevae' else ['dis', 'danger']
            self.cont_X = data.iloc[:, 1:6].drop(columns=drop_col).values.astype('float32')
        else:
            self.cont_X = data.iloc[:, 1:6].values.astype('float32')
        
        self.cat_X = data.iloc[:, 6:13].astype('category')
        self.diff_days = data.iloc[:, 13].values.astype('float32')

        # y label tukey transformation
        # self.y = yd.values.astype('float32')
        y = torch.tensor(yd['y'].values.astype('float32'))
        d = torch.tensor(yd['d'].values.astype('float32'))
        
        self.yd = torch.stack([y, d], dim=1)
        
        # 이산 데이터 정렬 및 저장#
        self.cat_cols = self.cat_X.columns
        self.cat_map = {col: {cat: i for i, cat in enumerate(self.cat_X[col].cat.categories)} for col in self.cat_cols}
        self.cat_X = self.cat_X.apply(lambda x: x.cat.codes)
        self.cat_X = torch.from_numpy(self.cat_X.to_numpy()).long()
    def __len__(self):
        return len(np.unique(self.cluster))

    def __getitem__(self, index):
        '''
            [batch x padding x embedding]
            cont_tensor_p : 패딩이 씌워진 환자 관련 연속 데이터  
            cont_tensor_c : 패딩이 씌워진 클러스터 관련 연속 데이터  
            cat_tensor_p : 패딩이 씌워진 환자 관련 이산 데이터  
            cat_tensor_c : 패딩이 씌워진 클러스터 관련 이산 데이터  
            data_len : 클러스터별 유효 환자수 반환 데이터
            y : 정답 label
            diff_tensor : 클러스터별 유효 날짜 반환 데이터
        '''
        diff_days = torch.from_numpy(self.diff_days[self.cluster == index]).unsqueeze(1)
        diff_tensor = self.diff_tensor.clone()
        diff_tensor[:diff_days.shape[0]] = diff_days
        cont_X = torch.from_numpy(self.cont_X[self.cluster == index])
        data_len = cont_X.shape[0]
        cont_tensor = self.cont_tensor.clone()
        cont_tensor[:cont_X.shape[0],] = cont_X
        cat_X = self.cat_X[self.cluster == index]
        cat_tensor = self.cat_tensor.clone()
        cat_tensor[:cat_X.shape[0],] = cat_X
        cat_tensor_p = cat_tensor[:, :5]
        cat_tensor_c = cat_tensor[:, 5:]
        cont_tensor_p = cont_tensor[:, :3]
        cont_tensor_c = cont_tensor[:, 3:]
        yd = self.yd[index]
        
        treatment = torch.mean(torch.tensor(self.treatment[self.cluster == index]), dim=0) # t1: dis|t2: danger
        return cont_tensor_p, cont_tensor_c, cat_tensor_p, cat_tensor_c, data_len, yd, diff_tensor, treatment


In [9]:
dataset = Tabledata('cevae', pd.read_csv('/data1/bubble3jh/cluster-regression/data/'+f"data_cut_0.csv"), 'minmax')

train_dataset, val_dataset, test_dataset = random_split(dataset, utils.data_split_num(dataset))
tr_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
print(f"Number of training Clusters : {len(train_dataset)}")

val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=len(test_dataset), shuffle=False)

print("Successfully load data!")

Number of training Clusters : 1270
Successfully load data!


In [52]:
class CEVAEEmbedding(torch.nn.Module):
    '''
        output_size : embedding output의 크기
        disable_embedding : 연속 데이터의 임베딩 유무
        disable_pe : transformer의 sequance 기준 positional encoding add 유무
        reduction : "mean" : cluster 별 평균으로 reduction
                    "date" : cluster 내 date 평균으로 reduction
    '''
    def __init__(self,treatments, output_size=128, disable_embedding=False, disable_pe=True, reduction="date", shift=False, use_treatment = False):
        super().__init__()
        self.shift = shift
        self.reduction = reduction
        self.disable_embedding = disable_embedding
        self.disable_pe = disable_pe
        activation = nn.ELU()
        if not disable_embedding:
            print("Embedding applied to data")
            nn_dim = emb_hidden_dim = emb_dim = output_size//4
            if treatments=='single':
                self.cont_c_NN = nn.Sequential(nn.Linear(1 if use_treatment else 2, emb_hidden_dim),
                                    activation,
                                    nn.Linear(emb_hidden_dim, nn_dim))
            else:
                nn_dim = nn_dim * 2
                self.cont_c_NN = None
            self.cont_p_NN = nn.Sequential(nn.Linear(3 , emb_hidden_dim),
                                        activation,
                                        nn.Linear(emb_hidden_dim, nn_dim))
        else:
            emb_dim_p = 5
            emb_dim_c = 2
        self.lookup_gender  = nn.Embedding(2, emb_dim)
        self.lookup_korean  = nn.Embedding(2, emb_dim)
        self.lookup_primary  = nn.Embedding(2, emb_dim)
        self.lookup_job  = nn.Embedding(11, emb_dim)
        self.lookup_rep  = nn.Embedding(34, emb_dim)
        self.lookup_place  = nn.Embedding(19, emb_dim)
        self.lookup_add  = nn.Embedding(31, emb_dim)
        if not disable_pe:
            if shift:
                self.positional_embedding  = nn.Embedding(6, output_size)
            else:
                self.positional_embedding  = nn.Embedding(5, output_size)
            # self.positional_embedding = SinusoidalPositionalEncoding(output_size)

    def forward(self, cont_p, cont_c, cat_p, cat_c, val_len, diff_days):
        if not self.disable_embedding:
            cont_p_emb = self.cont_p_NN(cont_p)
            cont_c_emb = self.cont_c_NN(cont_c) if self.cont_c_NN != None else None
                
        a1_embs = self.lookup_gender(cat_p[:,:,0].to(torch.int))
        a2_embs = self.lookup_korean(cat_p[:,:,1].to(torch.int))
        a3_embs = self.lookup_primary(cat_p[:,:,2].to(torch.int))
        a4_embs = self.lookup_job(cat_p[:,:,3].to(torch.int))
        a5_embs = self.lookup_rep(cat_p[:,:,4].to(torch.int))
        a6_embs = self.lookup_place(cat_c[:,:,0].to(torch.int))
        a7_embs = self.lookup_add(cat_c[:,:,1].to(torch.int))
        
        cat_p_emb = torch.mean(torch.stack([a1_embs, a2_embs, a3_embs, a4_embs, a5_embs]), axis=0)
        cat_c_emb = torch.mean(torch.stack([a6_embs, a7_embs]), axis=0)

        if not self.disable_embedding:
            tensors_to_concat = [tensor for tensor in [cat_p_emb, cat_c_emb, cont_p_emb, cont_c_emb] if tensor is not None]
            x = torch.cat(tensors_to_concat, dim=2)
            # x = torch.cat((cat_p_emb, cat_c_emb, cont_p_emb, cont_c_emb), dim=2)
        else:
            x = torch.cat((cat_p_emb, cat_c_emb, cont_p, cont_c), dim=2)
            
        if not self.disable_pe:
            x = x + self.positional_embedding(diff_days.int().squeeze(2))
        # return reduction_cluster(x, diff_days, val_len, self.reduction)
        if self.reduction == "none":   
            if self.shift:
                return (x, diff_days, val_len), self.positional_embedding(torch.tensor([5]).cuda())
            else:
                return (x, diff_days, val_len), None
        else:
            return models.reduction_cluster(x, diff_days, val_len, self.reduction)
        

class CETransformer(nn.Module):
    def __init__(self):
        super(CETransformer, self).__init__()
        d_model=64
        nhead=4
        d_hid=128
        nlayers=5
        dropout=0.1
        pred_layers=1
        self.shift = False
        self.unidir = False
        self.is_variational = False
        self.is_synthetic = False
        
        if self.is_variational:
            print("variational z sampling")
        else:
            print("determinant z ")
            
        if self.unidir:
            print("unidirectional attention applied")
        else:
            print("maxpool applied")
        self.embedding = CEVAEEmbedding(treatments='double',output_size=d_model, disable_embedding = False, disable_pe=False, reduction="none", shift= False, use_treatment=True)
         
        encoder_layers = models.TransformerEncoderLayer(d_model, nhead, d_hid, dropout, batch_first=True, norm_first=True)
        self.transformer_encoder = models.customTransformerEncoder(encoder_layers, nlayers, d_model, pred_layers=pred_layers, residual_t=False, residual_x=True)

        # Vairatioanl Z
        self.fc_mu = nn.Linear(d_model, d_model)
        self.fc_logvar = nn.Linear(d_model, d_model)

        decoder_layers = models.TransformerDecoderLayer(d_model, nhead, d_hid, dropout, batch_first=True, norm_first=True)
        self.transformer_decoder = models.TransformerDecoder(decoder_layers, nlayers)
        self.max_pool = nn.MaxPool1d(kernel_size=124, stride=1)

        self.d_model = d_model
        
        self.z2t = models.MLP(d_model, d_model//2, 1, num_layers=pred_layers)
        self.t1_emb = models.MLP(1, d_model//2, d_model, num_layers=pred_layers)
        self.t2_emb = models.MLP(1, d_model//2, d_model, num_layers=pred_layers)
        self.zt12t2 = models.MLP(d_model, d_model//2, 1, num_layers=pred_layers)
        self.zt2yd = models.MLP(d_model, d_model//2, 2, num_layers=pred_layers)

        self.linear_decoder = models.MLP(d_model, d_model, d_model, num_layers=1) # Linear
    
    def generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.masked_fill(mask == 0, True).masked_fill(mask == 1, False)
        return mask

    def init_weights(self, c):
        initrange = 0.1
        # For embedding layers
        if hasattr(self.embedding, 'weight'):
            self.embedding.weight.data.uniform_(-initrange, initrange)
        
        # For transformer encoder and decoder
        for module in [self.transformer_encoder, self.transformer_decoder]:
            for param in module.parameters():
                if param.dim() > 1:
                    nn.init.xavier_uniform_(param)
        
        # For models.MLP layers
        for mlp in [self.z2t, self.t_emb, self.zt2yd]:
            for layer in mlp.layers:
                if isinstance(layer, nn.Linear):
                    layer.weight.data.uniform_(-initrange, initrange)
                    if layer.bias is not None:
                        layer.bias.data.zero_()

    def forward(self, cont_p, cont_c, cat_p, cat_c, val_len, diff_days, is_MAP=False):
        # Encoder
        if self.embedding.reduction != "none":
            x = self.embedding(cont_p, cont_c, cat_p, cat_c, val_len, diff_days).unsqueeze(1)
        else:
            (x, diff_days, _), start_tok = self.embedding(cont_p, cont_c, cat_p, cat_c, val_len, diff_days) # embedded:(32, 124, 128)
        index_tensor = torch.arange(x.size(1), device=x.device)[None, :, None]
        src_key_padding_mask = ~(torch.arange(x.size(1)).expand(x.size(0), -1).cuda() < val_len.unsqueeze(1)).cuda()
        src_mask = self.generate_square_subsequent_mask(x.size(1)).cuda() if self.unidir else None
        
        # Z ------
        # CETransformer encoder
        z, (enc_t1, enc_t2), enc_yd = self.transformer_encoder(x, mask=src_mask, src_key_padding_mask=src_key_padding_mask, val_len=val_len)
        if self.unidir:
            idx = val_len - 1
            z = z[torch.arange(z.size(0)), idx] # padding 이 아닌값에 해당하는 seq중 마지막 값 사용
        else:
            val_mask = torch.arange(z.size(1))[None, :].cuda() < val_len[:, None]
            valid_z = z * val_mask[:, :, None].float().cuda()
            z = valid_z.max(dim=1)[0] # padding 이 아닌값에 해당하는 seq 들 max pool
        
        # z_mu, z_logvar = self.fc_mu(z), self.fc_logvar(z)
        z_mu, z_logvar = z, self.fc_logvar(z)
            
        if is_MAP:
            z=z_mu
        elif self.is_variational:
            z = models.reparametrize(z_mu, z_logvar)
        else:
            z_logvar = torch.full_like(z_mu, -100.0).cuda()
            z = models.reparametrize(z_mu, z_logvar)
        
        dec_t1 = self.z2t(z.squeeze())
        t1_emb = self.t1_emb(dec_t1)
        dec_t2 = self.zt12t2(z.squeeze()+t1_emb)
        t2_emb = self.t2_emb(dec_t2)
        
        # Linear Decoder
        dec_yd = self.zt2yd(z.squeeze() + t1_emb + t2_emb)
        
        pos_embeddings = self.embedding.positional_embedding(diff_days.squeeze().long()) if not self.is_synthetic else torch.zeros_like(z.unsqueeze(1))
        
        z_expanded = z.unsqueeze(1) + pos_embeddings  # [batch_size, 124, hidden_dim]
        z_expanded = torch.where(index_tensor < val_len[:, None, None], z_expanded, torch.zeros_like(z_expanded))
        
        z_flat = z_expanded.view(-1, z.shape[-1])  # [batch_size * 5, hidden_dim]
        x_recon_flat = self.linear_decoder(z_flat)  # [batch_size * 5, hidden_dim]

        x_recon = x_recon_flat.view(z_expanded.shape)  # [batch_size, 5, hidden_dim]
        
        x = torch.where(index_tensor < val_len[:, None, None], x, torch.zeros_like(x))
        x_recon = torch.where(index_tensor < val_len[:, None, None], x_recon, torch.zeros_like(x_recon))

        return x, x_recon, (enc_yd, torch.cat([enc_t1, enc_t2], dim=1)), (dec_yd, torch.cat([dec_t1, dec_t2], dim=1)), (z_mu, z_logvar)

class CEVAE(nn.Module):
    def __init__(self):
        super(CEVAE, self).__init__()
        d_model=128
        d_hid=128
        nlayers=4
        dropout=0.1
        pred_layers=3
        self.shift = False
        self.unidir = False
        self.is_variational = False
        
        self.embedding = CEVAEEmbedding(treatments='single', output_size=d_model, disable_embedding = False, disable_pe=True, reduction="mean", shift= self.shift, use_treatment=True)
        
        self.encoder = models.CEVAE_Encoder(input_dim=d_model, latent_dim=d_hid, hidden_dim=d_model, shared_layers=nlayers, t_pred_layers=pred_layers , pred_layers=pred_layers, drop_out=dropout, t_embed_dim=d_hid, yd_embed_dim=d_hid)
        self.decoder = models.CEVAE_Decoder(latent_dim=d_hid, output_dim=d_model, hidden_dim=d_hid, t_pred_layers=pred_layers, shared_layers=nlayers, drop_out=dropout, t_embed_dim=d_hid)

    def forward(self, cont_p, cont_c, cat_p, cat_c, _len, diff, t_gt=None, is_MAP=False):
        x = self.embedding(cont_p, cont_c, cat_p, cat_c, _len, diff)
        z_mu, z_logvar, enc_yd_pred, enc_t_pred = self.encoder(x, t_gt)
        
        # Sample z using reparametrization trick
        if is_MAP:
            z=z_mu
        elif self.is_variational:
            z = models.reparametrize(z_mu, z_logvar)
        else:
            z_logvar = torch.full_like(z_mu, -100.0).cuda()
            z = models.reparametrize(z_mu, z_logvar)
        
        # Decode z to get the reconstruction of x
        dec_t_pred, dec_yd_pred, x_reconstructed = self.decoder(z, t_gt)

        return x, x_reconstructed, (enc_yd_pred, torch.stack([enc_t_pred, torch.zeros_like(enc_t_pred)], dim=1)), (dec_yd_pred, torch.stack([dec_t_pred, torch.zeros_like(dec_t_pred)], dim=1)), (z_mu, z_logvar)
    
    
class iTransformer(nn.Module):
    def __init__(self, input_size=128, hidden_size=128, output_size=2, num_layers=3, num_heads=8, drop_out=0.0):
        super(iTransformer, self).__init__()
        self.is_synthetic = False
        self.max_len = 124 # hard-coding (seq_len)
        
        self.embedding = models.TableEmbedding_iTrans(output_size=input_size, disable_pe=True, use_treatment=True)
        
        # Encoder-only architecture
        self.encoder = models.Encoder_iTrans(
            [
                models.EncoderLayer_iTrans(
                    models.AttentionLayer_iTrans(
                        models.FullAttention(False, attention_dropout=drop_out), hidden_size, num_heads),
                    hidden_size,
                    dropout=drop_out,
                ) for l in range(num_layers)
            ],
            norm_layer=torch.nn.LayerNorm(hidden_size)
        )
        self.projector = nn.Linear(hidden_size, output_size, bias=True)

    def forward(self, cont_p, cont_c, cat_p, cat_c, val_len, diff_days):
        # Embedding
        # B L N -> B N E                (B L N -> B L E in the vanilla Transformer)
        (embedded, diff_days, _), _ = self.embedding(cont_p, cont_c, cat_p, cat_c, val_len, diff_days)  # (B, L, E) == (B, N, E)

        B, L, E = embedded.shape
        N = L
        # B: batch_size;    E: d_model; 
        # L: seq_len;       S: pred_len;
        # N: == L
        
        # B N E -> B N E                (B L E -> B L E in the vanilla Transformer)
        # the dimensions of embedded time series has been inverted, and then processed by native attn, layernorm and ffn modules
        enc_out = self.encoder(embedded, attn_mask=None)
    
        # B N E -> B N S -> B S N 
        dec_out = self.projector(enc_out).permute(0, 2, 1)[:, :, :N] # filter the covariates # (B, 2, L) == (B, 2, N)
        return dec_out[:,:,-1:].squeeze()

In [53]:
@torch.no_grad()
def test(model_name, data, model, scaling, a_y, b_y, a_d, b_d, use_treatment=False, MC_sample=1):
    
    criterion_mae = nn.L1Loss(reduction="sum")
    criterion_rmse = nn.MSELoss(reduction="sum")
    
    model.eval()

    batch_num, cont_p, cont_c, cat_p, cat_c, len, y, diff_days, *rest = utils.data_load(data)
    out = model(cont_p, cont_c, cat_p, cat_c, len, diff_days)
    max_unique_tensor = torch.tensor([batch.unique().max() for batch in diff_days], device='cuda:0') + 1

    accumulated_outputs = [0] * 6  # (x, x_reconstructed, enc_yd_pred, enc_t_pred, dec_yd_pred, dec_t_pred)
    
    if use_treatment:
        gt_t = rest[0]
        if model_name=='cet' or model_name=='cevae':
            for i in range(MC_sample):
                out = model(cont_p, cont_c, cat_p, cat_c, len, diff_days)
                x, x_reconstructed, (enc_yd_pred, enc_t_pred), (dec_yd_pred, dec_t_pred), (z_mu, z_logvar) = out
                
                # accumulate predictions
                outputs = [x, x_reconstructed, enc_yd_pred, enc_t_pred, dec_yd_pred, dec_t_pred]
                accumulated_outputs = [accumulated + output for accumulated, output in zip(accumulated_outputs, outputs)]
            
            # calculate average
            avg_outputs = [accumulated / MC_sample for accumulated in accumulated_outputs]
            x, x_reconstructed, enc_yd_pred, enc_t_pred, dec_yd_pred, dec_t_pred = avg_outputs
            
            
            # enc loss
            enc_pred_y, enc_pred_d, gt_y, gt_d = utils.reverse_scaling(scaling, enc_yd_pred, y, a_y, b_y, a_d, b_d)
            enc_loss_y = criterion_mae(enc_pred_y, gt_y)
            enc_loss_d = criterion_mae(enc_pred_d, gt_d)
            if not model_name=='cet':
                enc_loss_t2 = criterion_mae(enc_t_pred[:,0].squeeze(), gt_t)
                enc_loss_t1 = torch.zeros_like(enc_loss_t2)
            else:
                enc_loss_t1 = criterion_mae(enc_t_pred[:,0].squeeze(), gt_t[:,0])
                enc_loss_t2 = criterion_mae(enc_t_pred[:,1].squeeze(), gt_t[:,1])
            
            # dec loss
            dec_pred_y, dec_pred_d, gt_y, gt_d = utils.reverse_scaling(scaling, dec_yd_pred, y, a_y, b_y, a_d, b_d)
            dec_loss_y = criterion_mae(dec_pred_y, gt_y)
            dec_loss_d = criterion_mae(dec_pred_d, gt_d)
            # dec_loss_t = criterion_mae(dec_t_pred.squeeze(), gt_t)
            if not model_name=='cet':
                dec_loss_t2 = criterion_mae(dec_t_pred[:,0].squeeze(), gt_t)
                dec_loss_t1 = torch.zeros_like(dec_loss_t2)
            else:
                dec_loss_t1 = criterion_mae(dec_t_pred[:,0].squeeze(), gt_t[:,0])
                dec_loss_t2 = criterion_mae(dec_t_pred[:,1].squeeze(), gt_t[:,1])

            if enc_loss_y + enc_loss_d > dec_loss_y + dec_loss_d:
                mae_y, mae_d, loss_t1, loss_t2 = dec_loss_y, dec_loss_d, dec_loss_t1, dec_loss_t2
                rmse_y, rmse_d = criterion_rmse(dec_pred_y, gt_y), criterion_rmse(dec_pred_d, gt_d)
                out = dec_yd_pred
                eval_model = "Decoder"
            else:
                mae_y, mae_d, loss_t1, loss_t2 = enc_loss_y, enc_loss_d, enc_loss_t1, enc_loss_t2
                rmse_y, rmse_d = criterion_rmse(enc_pred_y, gt_y), criterion_rmse(enc_pred_d, gt_d)
                out = enc_yd_pred
                eval_model = "Encoder"
            mae = mae_y + mae_d
            rmse = rmse_y + rmse_d
        elif model_name == 'iTransformer':
            yd_pred = model(cont_p, cont_c, cat_p, cat_c, len, diff_days)

            pred_y, pred_d, gt_y, gt_d = utils.reverse_scaling(scaling, yd_pred, y, a_y, b_y, a_d, b_d)
            
            # MAE
            mae_y = criterion_mae(pred_y, gt_y)
            mae_d = criterion_mae(pred_d, gt_d)
            mae = mae_y + mae_d
            
            # RMSE
            rmse_y = criterion_rmse(pred_y, gt_y)
            rmse_d = criterion_rmse(pred_d, gt_d)
            rmse = rmse_y + rmse_d
            
            if not torch.isnan(mae) and not torch.isnan(rmse):
                return mae_d.item(), mae_y.item(), rmse_d.item(), rmse_y.item(), batch_num, yd_pred, y
            else:
                return 0, batch_num, yd_pred, y
    else:
        out = model(cont_p, cont_c, cat_p, cat_c, len, diff_days)
        if out.shape == torch.Size([2]):
            out = out.unsqueeze(0)
        pred_y, pred_d, gt_y, gt_d = utils.reverse_scaling(scaling, out, y, a_y, b_y, a_d, b_d)
        # MAE
        mae_y = criterion_mae(pred_y, gt_y)
        mae_d = criterion_mae(pred_d, gt_d)
        mae = mae_y + mae_d
        
        # RMSE
        rmse_y = criterion_rmse(pred_y, gt_y)
        rmse_d = criterion_rmse(pred_d, gt_d)
        rmse = rmse_y + rmse_d
        eval_model = "nan"
    
    if not torch.isnan(mae) and not torch.isnan(rmse):
        if use_treatment:
            return mae_d.item(), mae_y.item(), rmse_d.item(), rmse_y.item(), batch_num, out, y, loss_t1, loss_t2
        else:
            return mae_d.item(), mae_y.item(), rmse_d.item(), rmse_y.item(), batch_num, out, y
    else:
        return 0, batch_num, out, y

In [54]:
import torch
import os
import pandas as pd
import numpy as np

# 모델 초기화
cet = CETransformer()
cevae = CEVAE()
itrans = iTransformer()

# 모델 로드
model_path = "/data1/bubble3jh/cluster-regression/best_model_errorcase/seed_1000"
cet.load_state_dict(torch.load(os.path.join(model_path, "best_cet-adam-0.01-0.01-0.1-1000-date0_best_val.pt"))['state_dict'])
cevae.load_state_dict(torch.load(os.path.join(model_path, "best_cevae-adam-0.01-0.0001-0.1-1000-date0_best_val.pt"))['state_dict'])
itrans.load_state_dict(torch.load(os.path.join(model_path, "iTransformer-adam-0.001-0.0001-date0_best_val.pt"))['state_dict'])

# 결과 저장을 위한 리스트 초기화
results = []

# 테스트 수행
for itr, batch_data in enumerate(test_dataloader):
    for model in [cet, cevae, iTransformer]:
        model_name = model.__class__.__name__
        te_mae_batch_loss_d, te_mae_batch_loss_y, te_mse_batch_loss_d, te_mse_batch_loss_y, te_num_data, te_predicted, te_ground_truth, *t_loss = test(
            model_name, batch_data, model, 'minmax', test_dataset.dataset.a_y, test_dataset.dataset.b_y,
            test_dataset.dataset.a_d, test_dataset.dataset.b_d, use_treatment=not model_name=='iTransformer', MC_sample=1
        )
        
        # 결과 저장
        for i in range(te_num_data):
            results.append({
                'model': model_name,
                'data_index': itr * len(test_dataset) + i,
                'predicted_y': te_predicted[i].item(),
                'ground_truth_y': te_ground_truth[i].item(),
                'mae_loss_y': te_mae_batch_loss_y[i].item(),
                'mse_loss_y': te_mse_batch_loss_y[i].item(),
                'mae_loss_d': te_mae_batch_loss_d[i].item(),
                'mse_loss_d': te_mse_batch_loss_d[i].item(),
                # 필요한 경우 추가 데이터 정보를 여기에 저장
            })

# 결과를 pandas DataFrame으로 변환
df = pd.DataFrame(results)

# 모델별로 결과 분리
df_cet = df[df['model'] == 'CETransformer']
df_cevae = df[df['model'] == 'CEVAE']
df_itransformer = df[df['model'] == 'iTransformer']

# y와 d의 loss를 결합하여 전체 성능 평가
def combined_loss(row):
    return (row['mae_loss_y'] + row['mae_loss_d']) / 2  # 간단한 예시로 MAE의 평균을 사용

df_cet['combined_loss'] = df_cet.apply(combined_loss, axis=1)
df_cevae['combined_loss'] = df_cevae.apply(combined_loss, axis=1)
df_itransformer['combined_loss'] = df_itransformer.apply(combined_loss, axis=1)

# 성능 기준 설정 (예: 하위 25% 미만을 "좋은" 성능으로 간주)
threshold_cet = np.percentile(df_cet['combined_loss'], 25)
threshold_cevae = np.percentile(df_cevae['combined_loss'], 25)
threshold_itransformer = np.percentile(df_itransformer['combined_loss'], 25)

# CET와 CEVAE에서 좋은 성능을 보이면서 iTransformer에서 나쁜 성능을 보이는 케이스 찾기
good_cet_cevae_bad_itransformer = df_cet[
    (df_cet['combined_loss'] < threshold_cet) & 
    (df_cevae['combined_loss'] < threshold_cevae) & 
    (df_itransformer['combined_loss'] >= threshold_itransformer)
]

# CET와 iTransformer에서 좋은 성능을 보이면서 CEVAE에서 나쁜 성능을 보이는 케이스 찾기
good_cet_itransformer_bad_cevae = df_cet[
    (df_cet['combined_loss'] < threshold_cet) & 
    (df_itransformer['combined_loss'] < threshold_itransformer) & 
    (df_cevae['combined_loss'] >= threshold_cevae)
]

print("CET와 CEVAE에서 좋고 iTransformer에서 나쁜 케이스 수:", len(good_cet_cevae_bad_itransformer))
print("CET와 iTransformer에서 좋고 CEVAE에서 나쁜 케이스 수:", len(good_cet_itransformer_bad_cevae))

# 결과 저장
good_cet_cevae_bad_itransformer.to_csv('good_cet_cevae_bad_itransformer.csv', index=False)
good_cet_itransformer_bad_cevae.to_csv('good_cet_itransformer_bad_cevae.csv', index=False)

determinant z 
maxpool applied
Embedding applied to data
Embedding applied to data


RuntimeError: Error(s) in loading state_dict for iTransformer:
	size mismatch for embedding.cont_c_NN.0.weight: copying a param with shape torch.Size([32, 1]) from checkpoint, the shape in current model is torch.Size([32, 2]).