# 1. Validation of the trained model (CASG)

In [1]:
import torch
import numpy as np
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
# from pytorch_lightning.metrics.functional import accuracy
from tqdm import tqdm
from Datasets import Task2Dataset
from Models.Task2 import Task2Net

if torch.cuda.is_available():
    device = 'cuda:0'
else:
    device = 'cpu'

###########################################################
batch_size = 65536
lr = 1e-3
epoch = 3000
scheduler_step = 300
scheduler_decay = 0.9

resample_rate = 0.35 # dataset에서 label로 쓰일 비율
resample_epoch = 10

save_folder = "./GCN_3L_CASG"

###########################################################

dataset = Task2Dataset("Dataset/")
dataset.resample(resample_rate)
g = dataset.g.to(device)
train_g = dataset.train_g.to(device)

train_dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
model = Task2Net(dataset.g.to(device)).to(device)

# create optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=scheduler_step, gamma=scheduler_decay)

# Our model
checkpoint = torch.load('CASG_model_0.1437.tar')
model.load_state_dict(checkpoint['model'])
optimizer.load_state_dict(checkpoint['optimizer'])
print(model)

ce_loss = nn.CrossEntropyLoss()

for itemset_id, query, pos, neg, query_items, lengths in train_dataloader:
    pass

model.eval()
with torch.no_grad():
    labels = []
    batched_query = []
    query_items = []
    lengths = []
    for itemset_id, querys in dataset.valid_itemset_items.items():
        query = np.zeros(dataset.n_items, dtype=np.float32)
        label = dataset.valid_itemset_label[itemset_id]
        query[querys] = 1

        batched_query.append(query)
        labels.append(label)
        query_items.append(querys + [0 for _ in range(5 - len(querys))])
        lengths.append(len(querys))

    # import pdb; pdb.set_trace()
    query_embeds, pos_embeds, _, logit = model(g, torch.tensor(batched_query).to(device), torch.tensor(labels).to(device),
                                                       neg.to(device), torch.tensor(query_items).to(device),
                                                       torch.tensor(lengths).to(device))
    valid_loss = ce_loss(logit, torch.tensor(labels).to(device))

    scores = torch.topk(logit, 100).indices.detach().cpu().numpy()
    submit = []
    ranking_check = []
    for top100, label in zip(scores, labels):
        if label not in top100:
            ranking_check.append(101)
            continue
        for i in range(len(top100)):
            if label == top100[i]:
                ranking_check.append(i+1)
                break
        string = str(label)
        for i in top100:
            string += f',{i}'
        submit.append(string)
    acc = sum([label in top100 for top100, label in zip(scores, labels)]) / len(scores)
    print(f"{acc:.4f}, {sum(ranking_check)/len(ranking_check):.4f}")

Task2Net(
  (feature_fc): Linear(in_features=11776, out_features=32, bias=True)
  (GConv1): GraphConv(in=32, out=32, normalization=both, activation=None)
  (GConv2): GraphConv(in=32, out=48, normalization=both, activation=None)
  (GConv3): GraphConv(in=48, out=64, normalization=both, activation=None)
  (dropout1): Dropout(p=0.6, inplace=False)
  (dropout2): Dropout(p=0.6, inplace=False)
  (dropout3): Dropout(p=0.6, inplace=False)
  (mix_lstm): LSTM(144, 144, proj_size=72, batch_first=True, bidirectional=True)
)


  query_embeds, pos_embeds, _, logit = model(g, torch.tensor(batched_query).to(device), torch.tensor(labels).to(device),


0.1437, 91.4207


# 2. Test Query - Answer Generation

In [None]:
import torch
import numpy as np
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
# from pytorch_lightning.metrics.functional import accuracy
from tqdm import tqdm
from Datasets import Task2Dataset
from Models.Task2 import Task2Net

if torch.cuda.is_available():
    device = 'cuda:0'
else:
    device = 'cpu'

###########################################################
batch_size = 65536
lr = 1e-3
epoch = 3000
scheduler_step = 300
scheduler_decay = 0.9

resample_rate = 0.35 # dataset에서 label로 쓰일 비율
resample_epoch = 10

save_folder = "./GCN_3L_CASG"

###########################################################

dataset = Task2Dataset("Dataset/")
dataset.resample(resample_rate)
g = dataset.g.to(device)
train_g = dataset.train_g.to(device)

train_dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
model = Task2Net(dataset.g.to(device)).to(device)

# create optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=scheduler_step, gamma=scheduler_decay)

# Our model
checkpoint = torch.load('CASG_model_0.1437.tar')
model.load_state_dict(checkpoint['model'])
optimizer.load_state_dict(checkpoint['optimizer'])
print(model)

ce_loss = nn.CrossEntropyLoss()

for itemset_id, query, pos, neg, query_items, lengths in train_dataloader:
    pass

model.eval()
with torch.no_grad():
    labels = []
    batched_query = []
    query_items = []
    lengths = []
    for itemset_id, querys in dataset.test_itemset_items.items():
        query = np.zeros(dataset.n_items, dtype=np.float32)
        label = itemset_id
        query[querys] = 1

        batched_query.append(query)
        labels.append(label)
        query_items.append(querys + [0 for _ in range(5 - len(querys))])
        lengths.append(len(querys))

    # import pdb; pdb.set_trace()
    query_embeds, pos_embeds, _, logit = model(g, torch.tensor(batched_query).to(device), torch.tensor(labels).to(device),
                                                       neg.to(device), torch.tensor(query_items).to(device),
                                                       torch.tensor(lengths).to(device))
    valid_loss = ce_loss(logit, torch.tensor(labels).to(device))

    scores = torch.topk(logit, 100).indices.detach().cpu().numpy()
    submit = []
    ranking_check = []
    for top100, label in zip(scores, labels):
        string = str(label)
        for i in top100:
            string += f',{i}'
        submit.append(string)
    with open('./itemset_item_test_prediction.csv', 'w') as f:
        for i in submit:
            f.write(f'{i}\n')