In [1]:
"""
This is the code for real data analysis with one dimensional response
"""
import sys
import os
current_dir = os.getcwd()  #use to import the defined functions
parent_dir = os.path.dirname(current_dir) 
sys.path.append(parent_dir)  

"""
incase the above code does not work, you can use the absolute path instead
sys.path.append(r".\")
"""

import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau 
from sklearn.model_selection import train_test_split

In [2]:
from utils.basic_utils import setup_seed, get_dimension
from models.generator import generator_fnn
from models.discriminator import discriminator_fnn
from utils.training_utils import train_WGR_fnn
from utils.evaluation_utils import eva_G_UniY

In [3]:
import argparse

if 'ipykernel_launcher.py' in sys.argv[0]:  #if not work in jupyter, you can delete this part
    import sys
    sys.argv = [sys.argv[0]] 


parser = argparse.ArgumentParser(description='Implementation of WGR for dataset with one dimensional response Y')

parser.add_argument('--Xdim', default=100, type=int, help='dimensionality of X')
parser.add_argument('--Ydim', default=1, type=int, help='dimensionality of Y')

parser.add_argument('--noise_dim', default=55, type=int, help='dimensionality of noise vector')
parser.add_argument('--noise_dist', default='gaussian', type=str, help='distribution of noise vector')

parser.add_argument('--train', default=40000, type=int, help='size of train dataset')
parser.add_argument('--val', default=3500, type=int, help='size of validation dataset')
parser.add_argument('--test', default=10000, type=int, help='size of test dataset')

parser.add_argument('--train_batch', default=64, type=int, metavar='BS', help='batch size while training')
parser.add_argument('--val_batch', default=100, type=int, metavar='BS', help='batch size while validation')
parser.add_argument('--test_batch', default=100, type=int, metavar='BS', help='batch size while testing')
parser.add_argument('--epochs', default=50, type=int, help='number of epochs to train')

args = parser.parse_args()

print(args)

Namespace(Xdim=100, Ydim=1, noise_dim=55, noise_dist='gaussian', train=40000, val=3500, test=10000, train_batch=64, val_batch=100, test_batch=100, epochs=50)


In [4]:
# import data
all_CT = pd.read_csv("../data/CT.csv")
all_CT = all_CT.iloc[:, 1:] 


In [5]:
setup_seed(1234)  
#split data into training dataset, testing dataset and validation dataset
train_val_data, test_data = train_test_split(all_CT, test_size=args.test)#, random_state=5678)
train_data, val_data = train_test_split(train_val_data, test_size=args.val)#, random_state=5678)

In [6]:
# Convert pandas DataFrames to PyTorch tensors
X_train = torch.tensor(train_data.values[:, :-1], dtype=torch.float32)
y_train = torch.tensor(train_data.values[:, -1], dtype=torch.float32)

X_val = torch.tensor(val_data.values[:, :-1], dtype=torch.float32)
y_val = torch.tensor(val_data.values[:, -1], dtype=torch.float32)

X_test = torch.tensor(test_data.values[:, :-1], dtype=torch.float32)
y_test = torch.tensor(test_data.values[:, -1], dtype=torch.float32)

In [7]:
args.Xdim = get_dimension(X_train)
args.Ydim = get_dimension(y_train)
print(args.Xdim, args.Ydim)

384 1


In [8]:
# Create TensorDatasets
train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)
test_dataset = TensorDataset(X_test, y_test)

In [9]:
# Create DataLoaders
loader_train = DataLoader(train_dataset, batch_size=args.train_batch, shuffle=True)
loader_val = DataLoader(val_dataset, batch_size=args.val_batch, shuffle=True)
loader_test = DataLoader(test_dataset, batch_size=args.test_batch, shuffle=False)

In [10]:

# Define generator network and discriminator network
G_net = generator_fnn(Xdim=args.Xdim, Ydim=args.Ydim, noise_dim=args.noise_dim, hidden_dims = [128, 64])
D_net = discriminator_fnn(input_dim=args.Xdim+args.Ydim, hidden_dims = [128, 64])

# Initialize RMSprop optimizers
D_solver = optim.Adam(D_net.parameters(), lr=0.001, betas=(0.9, 0.999))
G_solver = optim.Adam(G_net.parameters(), lr=0.001, betas=(0.9, 0.999))   

In [11]:
trained_G, trained_D = train_WGR_fnn(D=D_net, G=G_net, D_solver=D_solver, G_solver=G_solver, loader_train = loader_train, 
                                     loader_val=loader_val, noise_dim=args.noise_dim, Xdim=args.Xdim, Ydim=args.Ydim, J_size=100, 
                                     lambda_w=0.1, lambda_l=0.9, batch_size=args.train_batch, save_path='./', start_eva=2000,  eva_iter = 100,
                                     model_type='CT', device='cpu', num_epochs=50)

Mean L1 Loss: 47.040485, Mean L2 Loss: 2712.421387
Epoch 0 - D Loss: -0.6777, G Loss: 224.9659
Epoch 1 - D Loss: -0.0735, G Loss: 28.6452
Epoch 2 - D Loss: -0.0640, G Loss: 14.3801
Mean L1 Loss: 2.478719, Mean L2 Loss: 12.523778
Epoch 3, Iter 2000, D Loss: -0.0607, G Loss: 8.4871, L1: 2.4787, L2: 12.5238
Saved best model with L2: 12.5238
Mean L1 Loss: 2.469465, Mean L2 Loss: 12.204628
Epoch 3, Iter 2100, D Loss: -0.0558, G Loss: 8.3594, L1: 2.4695, L2: 12.2046
Saved best model with L2: 12.2046
Mean L1 Loss: 2.234936, Mean L2 Loss: 10.218590
Epoch 3, Iter 2200, D Loss: -0.0554, G Loss: 8.0822, L1: 2.2349, L2: 10.2186
Saved best model with L2: 10.2186
Mean L1 Loss: 2.183106, Mean L2 Loss: 9.822016
Epoch 3, Iter 2300, D Loss: -0.0519, G Loss: 7.9448, L1: 2.1831, L2: 9.8220
Saved best model with L2: 9.8220
Mean L1 Loss: 2.068280, Mean L2 Loss: 8.714594
Epoch 3, Iter 2400, D Loss: -0.0519, G Loss: 7.6567, L1: 2.0683, L2: 8.7146
Saved best model with L2: 8.7146
Mean L1 Loss: 1.997804, Mean L

In [12]:
CT_numerical_Results = eva_G_UniY(G=G_net, x=X_test, y=y_test, noise_dim=args.noise_dim, test_size=args.test,  J_t_size=500)

L1 Loss: 0.4530401825904846
L2 Loss: 0.4241640567779541
CP: 0.9598000049591064
PI length: 2.444796085357666
std of LBE: 0.3187739849090576
std of UBE: 0.3126869201660156
