In [1]:
from csv import reader
import ast
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import math, copy, time
from torch.autograd import Variable
import torch.optim as optim
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

# Setting random seed to facilitate reproduction
seed = 1234
torch.manual_seed(seed)
np.random.seed(seed)

In [2]:
# cuda environment is recommened
device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')
print(device)

cuda


# Model Structure

In [3]:
# LIB consists of three parts: (1) feature extractor, (2) Encoder and (3) Prediction model.
# This notebook shows the design of the encoder and the prediction model.

def clones(module, N):
    return nn.ModuleList([copy.deepcopy(module) for _ in range(N)])

class LayerNorm(nn.Module):
    def __init__(self, features, eps=1e-6):
        super(LayerNorm, self).__init__()
        self.a_2 = nn.Parameter(torch.ones(features))
        self.b_2 = nn.Parameter(torch.zeros(features))
        self.eps = eps

    def forward(self, x):
        mean = x.mean(-1, keepdim=True)
        std = x.std(-1, keepdim=True)
        return self.a_2 * (x - mean) / (std + self.eps) + self.b_2
    
class SublayerConnection(nn.Module):
    def __init__(self, size, dropout):
        super(SublayerConnection, self).__init__()
        self.norm = LayerNorm(size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, sublayer):
        return x + self.dropout(sublayer(self.norm(x)))
    
class PositionwiseFeedForward(nn.Module):
    def __init__(self, d_model, d_ff, dropout=0.4):
        super(PositionwiseFeedForward, self).__init__()
        self.w_1 = nn.Linear(d_model, d_ff)
        self.w_2 = nn.Linear(d_ff, d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        return self.w_2(self.dropout(F.relu(self.w_1(x))))
    
def attention(query, key, value, mask=None, dropout=None):
    d_k = query.size(-1)
    scores = torch.matmul(query, key.transpose(-2, -1)) \
             / math.sqrt(d_k)
    
    if mask is not None:
        scores = scores.masked_fill(mask == 0, -1e9)
    p_attn = F.softmax(scores, dim = -1)
    if dropout is not None:
        p_attn = dropout(p_attn)
    
    return torch.matmul(p_attn, value), p_attn

class MultiHeadedAttention(nn.Module):
    def __init__(self, h, d_model, dropout=0.4):
        super(MultiHeadedAttention, self).__init__()
        assert d_model % h == 0
        self.d_k = d_model // h
        self.h = h
        self.linears = clones(nn.Linear(d_model, d_model), 4)
        self.attn = None
        self.dropout = nn.Dropout(p=dropout)
        
    def forward(self, query, key, value, mask=None):
        if mask is not None:
            mask = mask.unsqueeze(1).unsqueeze(1)
        nbatches = query.size(0)
        
        query, key, value = \
            [l(x).view(nbatches, -1, self.h, self.d_k).transpose(1, 2)
             for l, x in zip(self.linears, (query, key, value))]

        x, self.attn = attention(query, key, value, mask=mask, 
                                 dropout=self.dropout)
        
        x = x.transpose(1, 2).contiguous() \
             .view(nbatches, -1, self.h * self.d_k)
        
        return self.linears[-1](x)
    
class Encoder(nn.Module):
    def __init__(self, layer, N):
        super(Encoder, self).__init__()
        self.layers = clones(layer, N)
        self.norm = LayerNorm(layer.size)
        
    def forward(self, y, x, mask):
        for layer in self.layers:
            x = layer(y, x, mask)
        return self.norm(x)
    
class EncoderLayer(nn.Module):
    def __init__(self, size, self_attn, feed_forward, dropout):
        super(EncoderLayer, self).__init__()
        self.self_attn = self_attn
        self.feed_forward = feed_forward
        self.sublayer = clones(SublayerConnection(size, dropout), 2)
        self.size = size

    def forward(self, y, x, mask):
        x = self.sublayer[0](x, lambda x: self.self_attn(y, x, x, mask))
        
        return self.sublayer[1](x, self.feed_forward)
    
# Pooling by attention
class PoolingLayer(nn.Module):
    def __init__(self, size, self_attn, feed_forward, dropout):
        super(PoolingLayer, self).__init__()
        self.self_attn = self_attn
        self.feed_forward = feed_forward
        self.sublayer = SublayerConnection(size, dropout)
        self.size = size

    def forward(self, y, x, mask):
        x = self.self_attn(y, x, x, mask)
        return self.sublayer(x, self.feed_forward)
    
def make_model(d_model, N, d_ff, h, dropout=0.4):
    
    c = copy.deepcopy
    attn = MultiHeadedAttention(h, d_model, dropout)
    ff = PositionwiseFeedForward(d_model, d_ff, dropout)
    
    model = Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N)
    pooling_model = Encoder(PoolingLayer(d_model, c(attn), c(ff), dropout), 1)
    
    return model, pooling_model

# Completed LIB
class self_attn_model(nn.Module):
    
    def __init__(self, encoder, pooling_model, ini_feats, encode_feats, hidden_dim):
        super(self_attn_model, self).__init__()
        
        self.encoder = encoder
        self.pma = pooling_model
        self.S = nn.Parameter(torch.Tensor(1,1,encode_feats))
        nn.init.xavier_uniform_(self.S)
        self.output1 = nn.Linear(encode_feats, hidden_dim, bias = True)
        self.output2 = nn.Linear(hidden_dim, 1, bias = True)
        self.linear1 = nn.Linear(ini_feats, encode_feats, bias = True)
        
    def forward(self, batch_samples, batch_mask):
        
        batch_samples = F.relu(self.linear1(batch_samples))
        attn_output = self.encoder(batch_samples, batch_samples, batch_mask)
        attn_output = self.pma(self.S.repeat(attn_output.size(0),1,1), attn_output, batch_mask)
        hidden_rep = F.relu(self.output1(attn_output))
        out = torch.sigmoid(self.output2(hidden_rep))
        
        return out

# Error Metric & Model Evaluation

In [4]:
# To output median, 90th quantile and 95th quantile errors
def find_median_90_95(actual, pred):
    
    qe =[]
    
    for i in range(0, len(pred)):
        
        actual_v = actual[i]
        pred_v = pred[i]
        qe.append(max((pred_v+1e-4)/(actual_v+1e-4),(actual_v+1e-4)/(pred_v+1e-4)))
        
    res = [np.median(qe), np.percentile(qe, 90), np.percentile(qe, 95)]
    
    return res

In [5]:
# Find Q-Error
def q_error(actual, pred):
    
    epsilon = 1e-4
    q_e = 0
    
    for i in range(0, len(pred)):
        q_e += max((actual[i]+epsilon)/(pred[i]+epsilon),(pred[i]+epsilon)/(actual[i]+epsilon))
        
        
    return q_e/len(pred)

In [6]:
# Model Evaluation on test data
def eval_on_test_set(model, bs):
    
    model.eval()
    
    running_error_qe = 0
    num_batches = 0
    _pred = []
    _label = []
    
    for i in range(0,len_test,bs):
        
        minibatch_data = test_data[i:i+bs].to(device)
        minibatch_label = test_label[i:i+bs].numpy()
        minibatch_mask = test_mask[i:i+bs].to(device)
        
        pred_rr = model(minibatch_data, minibatch_mask).detach().cpu().squeeze(dim=1).numpy()
        _pred.extend(pred_rr)
        _label.extend(minibatch_label)
        
        qe = q_error(minibatch_label, pred_rr)
        
        running_error_qe += qe
        
        num_batches += 1
    
    total_qe = running_error_qe/num_batches
    test_res_h = find_median_90_95(_label, _pred)
    
    print(f'Mean Test Q_error = {total_qe}\n')
    print('Median:')
    print(f'Median Test Q_error = {test_res_h[0]}\n')
    print('90 Percentile:')
    print(f'90th Test Q_error = {test_res_h[1]}\n')
    print('95 Percentile:')
    print(f'95th Test Q_error = {test_res_h[2]}\n')

# Load dataset

In [7]:
data_set_file_name = './data/TPC_DS_10_by_query.csv'
with open(data_set_file_name,'r') as ro:
    csv_reader = reader(ro)
    raw_data = list(csv_reader)

data_label = []
for i in range(0,len(raw_data[0])):
    lists = ast.literal_eval(raw_data[0][i])
    data_label.append(lists)

# Remove empty points
data = []
label = []
for i in range(0,len(data_label)):
    if len(data_label[i][0]) == 0:
        continue
    else:
        data.append(data_label[i][0])
        label.append(data_label[i][1])

# Find the maximum number of index optimizable operations
max_l = 0
min_l = 999
for i in range(0,len(data)):
    max_l = max(max_l, len(data[i]))
    min_l = min(min_l, len(data[i]))

# Pad data to faciliate batch training/testing
pad_data = []
mask = []
pad_element = [0 for i in range(0,12)]
for data_point in data:

    new_data = []
    point_mask = [0 for i in range(0,max_l)]

    for j in range(0,len(data_point)):
        new_data.append(data_point[j][:-1])
        point_mask[j] = 1

    if max_l - len(data_point) > 0:
        for k in range(0,max_l-len(data_point)):
            new_data.append(pad_element)

    pad_data.append(new_data)
    mask.append(point_mask)

pad_data = torch.tensor(pad_data)
mask = torch.tensor(mask)
label = torch.tensor(label)

test_data = pad_data
test_mask = mask
test_label = label

len_test = len(test_data)
print(f'Size of test dataset: {len_test}')

Size of test dataset: 5335


# Model Inference

In [8]:
dim1 = 32 # embedding size
dim2 = 64 # hidden dimension for prediction layer
dim3 = 128 # hidden dimension for FNN
n_encoder_layers = 6 # number of layer of attention encoder
n_heads = 8 # number of heads in attention
dropout_r = 0.2 # dropout ratio
bs = 20 # batch size

encoder_model, pooling_model = make_model(dim1, n_encoder_layers, dim3, n_heads, dropout=dropout_r)
model = self_attn_model(encoder_model, pooling_model, 12, dim1, dim2)
para_dict_loc = './model/LIB_query.pth'
model.load_state_dict(torch.load(para_dict_loc))
print(model)
print('')
model = model.to(device)

self_attn_model(
  (encoder): Encoder(
    (layers): ModuleList(
      (0): EncoderLayer(
        (self_attn): MultiHeadedAttention(
          (linears): ModuleList(
            (0): Linear(in_features=32, out_features=32, bias=True)
            (1): Linear(in_features=32, out_features=32, bias=True)
            (2): Linear(in_features=32, out_features=32, bias=True)
            (3): Linear(in_features=32, out_features=32, bias=True)
          )
          (dropout): Dropout(p=0.2, inplace=False)
        )
        (feed_forward): PositionwiseFeedForward(
          (w_1): Linear(in_features=32, out_features=128, bias=True)
          (w_2): Linear(in_features=128, out_features=32, bias=True)
          (dropout): Dropout(p=0.2, inplace=False)
        )
        (sublayer): ModuleList(
          (0): SublayerConnection(
            (norm): LayerNorm()
            (dropout): Dropout(p=0.2, inplace=False)
          )
          (1): SublayerConnection(
            (norm): LayerNorm()
          

In [9]:
eval_on_test_set(model, bs)

Mean Test Q_error = [2.1235878]

Median:
Median Test Q_error = 1.4134806394577026

90 Percentile:
90th Test Q_error = 3.5892429828643837

95 Percentile:
95th Test Q_error = 6.016079044342042

