In [1]:
import torch
import torch.nn as nn

In [2]:
class CBOW(torch.nn.Module):
    def __init__(self, vocab_size, embedding_dim, literal_to_ix):
        super(CBOW, self).__init__()
        # out: 1 x embedding_dim
        self.vocab_size = vocab_size
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.literal_to_ix = literal_to_ix
        self.linear1 = nn.Linear(embedding_dim, 128)
        self.activation_function1 = nn.ReLU()
        
        # out: 1 x vocab_size
        self.linear2 = nn.Linear(128, vocab_size)
        self.activation_function2 = nn.LogSoftmax(dim=-1)
        
    def forward(self, inputs):
        embeds = sum(self.embeddings(inputs)).view(1, -1)
        out = self.linear1(embeds)
        out = self.activation_function1(out)
        out = self.linear2(out)
        out = self.activation_function2(out)
        return out
    
    def get_literal_embedding(self, literal):
        ix = torch.tensor([self.literal_to_ix[literal]])
        return self.embeddings(ix)

    def get_embeddings(self):
        ix = torch.tensor([i for i in range(self.vocab_size)])
        return self.embeddings(ix)

In [3]:
# utils 
def make_context_vector(context, literal_to_idx):
    if 0 in context:
        print(context)
    idxs = [literal_to_idx[l] for l in context]
    return torch.tensor(idxs, dtype=torch.long)


def read_sat(sat_path):
    with open(sat_path) as f:
        sat_lines = f.readlines()
        header = sat_lines[0]
        header_info = header.replace("\n", "").split(" ")
        num_vars = int(header_info[-2])
        num_clauses = int(header_info[-1])

        sat = [[int(x) for x in line.replace(' 0\n', '').split(' ')]
               for line in sat_lines[1:]]

        return sat, num_vars, num_clauses

In [4]:
# data preprocessing

name = 'bmc-ibm-2.processed.cnf'

sat_path = f'./dataset/formulas/{name}'
sat_instance, num_vars, num_clauses = read_sat(sat_path)
vocab_size = num_vars * 2

data = []
for clause in sat_instance:
    clause_len = len(clause)
    for i in range(clause_len):
        context = [clause[x] for x in range(clause_len) if x != i]
        target = clause[i]
        data.append((context, target))

print(f'data size: {len(data)}')

data size: 1887


In [5]:
# model setting

EMDEDDING_DIM = 50

literal_to_ix = {}
for i in range(1, num_vars + 1):
    literal_to_ix[i] = 2 * i - 2
    literal_to_ix[-i] = 2 * i - 1

model = CBOW(vocab_size, EMDEDDING_DIM, literal_to_ix)
loss_function = nn.NLLLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

# training
for epoch in range(1):
    total_loss = 0
    for context, target in data:
        print(context, target)
        context_vector = make_context_vector(context, literal_to_ix)
        log_probs = model(context_vector)
        total_loss += loss_function(log_probs, torch.tensor([literal_to_ix[target]]))
    optimizer.zero_grad()
    total_loss.backward()
    optimizer.step()
    
    if epoch % 10 == 0:
        print(epoch, total_loss.item())

[-2] -1
[-1] -2
[-3] -1
[-1] -3
[-4] -1
[-1] -4
[-5] -1
[-1] -5
[-6] -1
[-1] -6
[-8] 7
[7] -8
[-9] 7
[7] -9
[-10] 7
[7] -10
[-11] 7
[7] -11
[12] 7
[7] 12
[-12, 9, 8, 10, 11] -7
[-7, 9, 8, 10, 11] -12
[-7, -12, 8, 10, 11] 9
[-7, -12, 9, 10, 11] 8
[-7, -12, 9, 8, 11] 10
[-7, -12, 9, 8, 10] 11
[3, 4, 5, 6, 9] 2
[2, 4, 5, 6, 9] 3
[2, 3, 5, 6, 9] 4
[2, 3, 4, 6, 9] 5
[2, 3, 4, 5, 9] 6
[2, 3, 4, 5, 6] 9
[-9] -6
[-6] -9
[-9] -5
[-5] -9
[-9] -4
[-4] -9
[-9] -3
[-3] -9
[-9] -2
[-2] -9
[-8] 13
[13] -8
[-9] 13
[13] -9
[12] 13
[13] 12
[15] 14
[14] 15
[15] -11
[-11] 15
[-11] 2
[2] -11
[16] -3
[-3] 16
[18] 17
[17] 18
[19] -4
[-4] 19
[18] -10
[-10] 18
[-10] 6
[6] -10
[-10] 20
[20] -10
[-11] 20
[20] -11
[10, 11] -20
[-20, 11] 10
[-20, 10] 11
[21] 2
[2] 21
[-22] 17
[17] -22
[-24] 23
[23] -24
[-24] 25
[25] -24
[-22] 25
[25] -22
[25] 6
[6] 25
[-27] -26
[-26] -27
[-29] -28
[-28] -29
[30] -29
[-29] 30
[30] -27
[-27] 30
[-30] 31
[31] -30
[30] -31
[-31] 30
[4, 5, 6, 32] 3
[3, 5, 6, 32] 4
[3, 4, 6, 32] 5
[3, 4

In [6]:
# test the embedding
print(model.get_literal_embedding(91))
embeddings = model.get_embeddings()
torch.save(embeddings, f'./model/embeddings/{name}.pt')

tensor([[-0.9869, -0.0265, -0.2298, -0.6615,  0.7601, -0.0852,  0.3917, -0.0690,
         -2.2308, -0.6483, -0.3985,  0.4776,  0.1620,  0.5829, -0.9747,  1.2276,
          0.5807, -0.4721,  0.9364,  0.4538, -1.5305, -0.5079,  0.3985, -0.2100,
         -0.4051, -0.2574,  0.0306, -2.0609, -1.6685, -1.6322, -1.4425, -0.7904,
         -0.2016,  0.9793, -2.5519,  0.8600, -0.3290, -0.6161, -1.7797,  1.4119,
         -1.2137, -1.2722,  0.5384, -0.5872,  0.3280, -0.4759,  0.3233,  0.7303,
          0.0391, -0.4218]], grad_fn=<EmbeddingBackward0>)


In [7]:
embeddings[180]

tensor([-0.9869, -0.0265, -0.2298, -0.6615,  0.7601, -0.0852,  0.3917, -0.0690,
        -2.2308, -0.6483, -0.3985,  0.4776,  0.1620,  0.5829, -0.9747,  1.2276,
         0.5807, -0.4721,  0.9364,  0.4538, -1.5305, -0.5079,  0.3985, -0.2100,
        -0.4051, -0.2574,  0.0306, -2.0609, -1.6685, -1.6322, -1.4425, -0.7904,
        -0.2016,  0.9793, -2.5519,  0.8600, -0.3290, -0.6161, -1.7797,  1.4119,
        -1.2137, -1.2722,  0.5384, -0.5872,  0.3280, -0.4759,  0.3233,  0.7303,
         0.0391, -0.4218], grad_fn=<SelectBackward0>)