In [13]:
import torch
import torch.nn as nn

In [14]:
class CBOW(torch.nn.Module):
    def __init__(self, vocab_size, embedding_dim, literal_to_ix):
        super(CBOW, self).__init__()
        # out: 1 x embedding_dim
        self.vocab_size = vocab_size
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.literal_to_ix = literal_to_ix
        self.linear1 = nn.Linear(embedding_dim, 128)
        self.activation_function1 = nn.ReLU()
        
        # out: 1 x vocab_size
        self.linear2 = nn.Linear(128, vocab_size)
        self.activation_function2 = nn.LogSoftmax(dim=-1)
        
    def forward(self, inputs):
        embeds = sum(self.embeddings(inputs)).view(1, -1)
        out = self.linear1(embeds)
        out = self.activation_function1(out)
        out = self.linear2(out)
        out = self.activation_function2(out)
        return out
    
    def get_literal_embedding(self, literal):
        ix = torch.tensor([self.literal_to_ix[literal]])
        return self.embeddings(ix)

    def get_embeddings(self):
        ix = torch.tensor([i for i in range(self.vocab_size)])
        return self.embeddings(ix)

In [15]:
# utils 
def make_context_vector(context, literal_to_idx):
    if 0 in context:
        print(context)
    idxs = [literal_to_idx[l] for l in context]
    return torch.tensor(idxs, dtype=torch.long)


def read_sat(sat_path):
    with open(sat_path) as f:
        sat_lines = f.readlines()
        header = sat_lines[0]
        header_info = header.replace("\n", "").split(" ")
        num_vars = int(header_info[-2])
        num_clauses = int(header_info[-1])

        sat = [[int(x) for x in line.replace(' 0\n', '').split(' ')]
               for line in sat_lines[1:]]

        return sat, num_vars, num_clauses

In [16]:
# data preprocessing

name = 'bmc-ibm-7.processed.cnf'

sat_path = f'./dataset/train_formulas/{name}'
sat_instance, num_vars, num_clauses = read_sat(sat_path)
vocab_size = num_vars * 2

data = []
for clause in sat_instance:
    clause_len = len(clause)
    for i in range(clause_len):
        context = [clause[x] for x in range(clause_len) if x != i]
        target = clause[i]
        data.append((context, target))

print(f'data size: {len(data)}')

data size: 14634


In [17]:
# model setting

EMDEDDING_DIM = 50

literal_to_ix = {}
for i in range(1, num_vars + 1):
    literal_to_ix[i] = 2 * i - 2
    literal_to_ix[-i] = 2 * i - 1

model = CBOW(vocab_size, EMDEDDING_DIM, literal_to_ix)
loss_function = nn.NLLLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

# training
for epoch in range(1):
    total_loss = 0
    for context, target in data:
        print(context, target)
        context_vector = make_context_vector(context, literal_to_ix)
        log_probs = model(context_vector)
        total_loss += loss_function(log_probs, torch.tensor([literal_to_ix[target]]))
    optimizer.zero_grad()
    total_loss.backward()
    optimizer.step()
    
    if epoch % 10 == 0:
        print(epoch, total_loss.item())

[2] 1
[1] 2
[4] 3
[3] 4
[-5] 3
[3] -5
[-5] 1
[1] -5
[-5] 6
[6] -5
[-5] 7
[7] -5
[-5] 8
[8] -5
[-5] 9
[9] -5
[-5] 10
[10] -5
[-5] 11
[11] -5
[-5] 12
[12] -5
[-5] 13
[13] -5
[-5] 14
[14] -5
[-5] 15
[15] -5
[-5] 16
[16] -5
[-5] 17
[17] -5
[18] 17
[17] 18
[19] 16
[16] 19
[20] 16
[16] 20
[-19, -20] -16
[-16, -20] -19
[-16, -19] -20
[21] 15
[15] 21
[22] 14
[14] 22
[23] 13
[13] 23
[24] 12
[12] 24
[25] 11
[11] 25
[26] 10
[10] 26
[27, 28, 29, 30] 24
[24, 28, 29, 30] 27
[24, 27, 29, 30] 28
[24, 27, 28, 30] 29
[24, 27, 28, 29] 30
[-30] -29
[-29] -30
[-30] -28
[-28] -30
[-30] -27
[-27] -30
[-30] -24
[-24] -30
[-30] 9
[9] -30
[31] 8
[8] 31
[32] 7
[7] 32
[33] 6
[6] 33
[35] -34
[-34] 35
[35] 36
[36] 35
[-36, -35] 34
[34, -35] -36
[34, -36] -35
[-38] 37
[37] -38
[-40] 39
[39] -40
[41] 39
[39] 41
[40, -41] -39
[-39, -41] 40
[-39, 40] -41
[-43] 42
[42] -43
[44] 42
[42] 44
[43, -44] -42
[-42, -44] 43
[-42, 43] -44
[-45] 18
[18] -45
[-20, 46] -19
[-19, 46] -20
[-19, -20] 46
[-46] 19
[19] -46
[-46] 20
[20]

In [18]:
# test the embedding
print(model.get_literal_embedding(91))
embeddings = model.get_embeddings()
torch.save(embeddings, f'./model/embeddings/{name}.pt')

tensor([[-2.2007,  1.2286, -3.2122,  1.7929,  0.2690, -0.6359,  0.2694,  0.3291,
         -1.6257,  0.3501, -0.3194,  1.2274,  0.0521, -0.2357, -1.6397,  1.1724,
         -1.3313,  1.7269, -0.4163, -0.3453, -0.2904, -0.1629,  1.6783, -1.2710,
          0.9893, -0.0763,  0.6037, -0.5358,  2.0970,  0.6396, -0.0522,  0.9078,
          0.8367,  2.0011, -0.0907,  0.7182, -0.4358, -0.6224, -0.2792,  0.5658,
         -0.0684,  0.8266, -0.0469, -1.6900,  1.9694, -0.2297, -0.2086, -0.2432,
         -0.9304,  0.0292]], grad_fn=<EmbeddingBackward0>)


In [19]:
embeddings[180]

tensor([-2.2007,  1.2286, -3.2122,  1.7929,  0.2690, -0.6359,  0.2694,  0.3291,
        -1.6257,  0.3501, -0.3194,  1.2274,  0.0521, -0.2357, -1.6397,  1.1724,
        -1.3313,  1.7269, -0.4163, -0.3453, -0.2904, -0.1629,  1.6783, -1.2710,
         0.9893, -0.0763,  0.6037, -0.5358,  2.0970,  0.6396, -0.0522,  0.9078,
         0.8367,  2.0011, -0.0907,  0.7182, -0.4358, -0.6224, -0.2792,  0.5658,
        -0.0684,  0.8266, -0.0469, -1.6900,  1.9694, -0.2297, -0.2086, -0.2432,
        -0.9304,  0.0292], grad_fn=<SelectBackward0>)