In [14]:
import torch
import torch.nn.functional as F
from ours.encoder import TransformerEncoder, RNNEncoder, TransformerEncoderVAE
from ours.decoder import TransformerDecoder, RNNDecoder

In [15]:
# encoder = TransformerEncoderVAE(num_encoder_layers=2,
#                                 nhead=8,
#                                 vocab_size=7,
#                                 embedding_size=64,
#                                 dropout=0.1,
#                                 activation='relu',
#                                 dim_feedforward=256,
#                                 batch_first=True,
#                                 mlp_layers=1,
#                                 mlp_hidden_size=200,
#                                 mlp_dropout=0)
# encoder = RNNEncoder(layers=2, vocab_size=7, hidden_size=64, dropout=0.0, mlp_layers=3, mlp_hidden_size=200, mlp_dropout=0.0)
# decoder = RNNDecoder(layers=2, vocab_size=7, hidden_size=64, dropout=0.0, length=6, gpu=0)
encoder = TransformerEncoder(num_encoder_layers=2,
                             nhead=8,
                             vocab_size=7,
                             embedding_size=64,
                             dropout=0.1,
                             activation='relu',
                             dim_feedforward=256,
                             batch_first=True,
                             mlp_layers=3,
                             mlp_hidden_size=200,
                             mlp_dropout=0)
decoder = TransformerDecoder(num_decoder_layers=2,
                             nhead=8,
                             vocab_size=7,
                             embedding_size=64,
                             dropout=0.1,
                             activation='relu',
                             dim_feedforward=256,
                             batch_first=True,
                             length=6,
                             gpu=0)

In [16]:
encoder_input = torch.tensor([[0, 1, 2, 3, 5, 5],[1, 2, 3, 4, 5, 5]], dtype=torch.long)
encoder_target = torch.tensor([[0.91], [0.94]], dtype=torch.float32)
decoder_input = torch.tensor([[5, 0, 1, 2, 3, 5], [5, 1, 2, 3, 4, 5]], dtype=torch.long)
decoder_target = torch.tensor([[0, 1, 2, 3, 5, 5],[1, 2, 3, 4, 5, 5]], dtype=torch.long)

In [17]:
encoder_input = encoder_input.cuda(0)
encoder_target = encoder_target.cuda(0).requires_grad_()
decoder_input = decoder_input.cuda(0)
decoder_target = decoder_target.cuda(0)
encoder = encoder.cuda(0)
decoder = decoder.cuda(0)

In [18]:
encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=0.001, weight_decay=0.0)
decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=0.001, weight_decay=0.0)

In [46]:
for epoch in range(1000):
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    encoder_outputs, encoder_hidden, feat_emb, predict_value, mu, logvar = encoder.forward(encoder_input)
    decoder_outputs = decoder.forward_train_valid(decoder_input, encoder_outputs)
    _, feat = decoder_outputs.max(2, keepdim=True)
    feat = feat.reshape(encoder_input.size(0), encoder_input.size(1))
    # kl_loss = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    loss_1 = F.mse_loss(predict_value.squeeze(), encoder_target.squeeze())
    loss_2 = F.nll_loss(decoder_outputs.contiguous().view(-1, decoder_outputs.size(-1)), decoder_target.view(-1))
    loss = 0.8 * loss_1 + 0.2 * loss_2
    loss.backward()
    torch.nn.utils.clip_grad_norm_(encoder.parameters(), 5.0)
    torch.nn.utils.clip_grad_norm_(decoder.parameters(), 5.0)
    encoder_optimizer.step()
    decoder_optimizer.step()
    print(loss.data, loss_1.data, loss_2.data)


TypeError: generate_square_subsequent_mask() missing 1 required positional argument: 'sz'

In [None]:
encoder_outputs, encoder_hidden, feat_emb, predict_value, mu, logvar = encoder.forward(encoder_input)
decoder_outputs = decoder.forward_train_valid(decoder_input, encoder_outputs)
_, feat = decoder_outputs.max(2, keepdim=True)
feat = feat.reshape(encoder_input.size(0), encoder_input.size(1))

In [None]:
predict_value

In [None]:
# batch_size = encoder_outputs.shape[0]
# input_id = torch.LongTensor([5] * batch_size).view(batch_size, 1).cuda(0)
input_id = torch.tensor([[5], [5]], dtype=torch.long).cuda(0)

In [None]:
for step in range(decoder.length):
    output_id = decoder.forward_step(encoder_outputs, input_id)
    print(output_id)
    input_id = torch.cat((input_id, output_id[:,-1].reshape(-1, 1)), dim=1)

In [None]:
output_id

In [None]:
predict_softmax = F.log_softmax(decoder.out(out.contiguous().view(-1, decoder.embedding_size)), dim=1)

In [None]:
_, next_input_id = predict_softmax.max(dim=1, keepdim=True)

In [None]:
next_input_id.reshape(2, 5)

In [None]:
infer_res = []
for step in range(decoder.length):
    input_id = decoder.forward_step(encoder_outputs, input_id)
    infer_res.append(input_id)

In [None]:
new_gen = torch.cat(infer_res, dim=1)

In [None]:
new_gen_list = []
new_gen_list.extend(new_gen.data.squeeze().tolist())

In [None]:
new_gen_list

In [None]:
import feature_env
import torch
import pickle

In [None]:
data_name = "german_credit"
exp_name = "transformerVae_eta_0.01"
choice = torch.load("/home/dwangyang/NIPS2023/IJCAI-AutoFS/data/history/"+data_name+"/generated_choice_"+exp_name+".pt")

In [None]:
results = []
for init_seed in range(1):
    fe = feature_env.FeatureEvaluator(data_name)
    for i in range(choice.shape[0]):
        res = fe.report_performance(choice[i], store=False, flag='train', init_seed=init_seed)
        results.append((res, choice[i], i, init_seed))

INFO:  initialize the train and test dataset
INFO:  Pre on original is: 0.7304, Pre on generated is: 0.7343
INFO:  Rec on original is: 0.7312, Rec on generated is: 0.7438
INFO:  F-1 on original is: 0.6927, F-1 on generated is: 0.7352
INFO:  ROC/AUC on original is: 0.6229, ROC/AUC on generated is: 0.6846
INFO:  Pre on original is: 0.7304, Pre on generated is: 0.7072
INFO:  Rec on original is: 0.7312, Rec on generated is: 0.7188
INFO:  F-1 on original is: 0.6927, F-1 on generated is: 0.7093
INFO:  ROC/AUC on original is: 0.6229, ROC/AUC on generated is: 0.6564
INFO:  Pre on original is: 0.7304, Pre on generated is: 0.7072
INFO:  Rec on original is: 0.7312, Rec on generated is: 0.7188
INFO:  F-1 on original is: 0.6927, F-1 on generated is: 0.7093
INFO:  ROC/AUC on original is: 0.6229, ROC/AUC on generated is: 0.6564


In [None]:
sorted_results = sorted(results, key=lambda x: x[0], reverse=True)
sorted_results

[(0.7351783625730995,
  0.6846235231881502,
  tensor([1., 1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0.]),
  0,
  0),
 (0.7093421052631579,
  0.656409804267325,
  tensor([1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0.]),
  1,
  0),
 (0.7093421052631579,
  0.656409804267325,
  tensor([1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0.]),
  2,
  0)]

In [None]:
fe.generate_data(best_res[1], 'train').to_hdf(f'../data/history/{fe.task_name}/best-ours-test.hdf', key='train')
fe.generate_data(best_res[1], 'test').to_hdf(f'../data/history/{fe.task_name}/best-ours-test.hdf', key='test')

In [None]:
import matplotlib.pyplot as plt
import numpy as np

from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D, axes3d

fig = plt.figure()
ax = fig.gca(projection='3d')

x = np.arange(-3.5, 3.5, 0.2)
y = np.arange(-3.5, 3.5, 0.2)
X, Y = np.meshgrid(x, y)

# Z = np.exp(-(X-1)**2/2 - (Y-1)**2/2) * (2 - X**2/2 - Y**2/4)
X, Y, Z = axes3d.get_test_data()

ax.set_xlabel("X")
ax.set_ylabel("Y")
surf = ax.plot_wireframe(X, Y, Z, alpha=0.5)
# ax.contour(X, Y, Z, cmap=cm.Accent, linewidths=1.5)
ax.set_facecolor('white')
ax.set_axis_off()
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_zticklabels([])
ax.set_xlabel('')
ax.set_ylabel('')
ax.tick_params(colors='None')

# ax.view_init(30, 45)
surf.set_linewidth(1)

fig.set_dpi(600)
fig.set_size_inches(10, 8)

plt.show()