In [14]:
%cd "/home/kera/workspace/Transformer-GB"

/data/kera/workspace/Transformer-GB


In [15]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [16]:
import torch
import yaml 
import os 
os.environ["CUDA_VISIBLE_DEVICES"] = '3'
os.environ["TOKENIZERS_PARALLELISM"] = "false"

import argparse
import re 
from transformers import AutoModelForSeq2SeqLM, AutoConfig

from transformers import PreTrainedTokenizerFast
from src.loader.data import load_data
from src.loader.checkpoint import load_trained_bag
from src.evalution.evaluators import eval_prediction

load('src/data/symbolic_utils.sage')

In [17]:
def get_random_MQsystem(n, m, ring):
    ring = PolynomialRing(GF(2), 'x', n, order='lex')
    F = [ring.random_element(degree=2, terms=Infinity) for _ in range(m)]

    x = [ring.base_ring().random_element() for _ in range(n)]
    eps = [f(x) for f in F]
    F = [ring(f - e) for f, e in zip(F, eps)]
    F = ideal(F).basis

    assert all(f(x) == 0 for f in F)

    return F, x

def sol_to_gb(ring, sol):
    G = [ring(xi - s) for xi, s in zip(ring.gens(), sol)]
    return ideal(G).basis


In [18]:
def generate_MQ_dataset(n, m, num_samples=1000):
    ring = PolynomialRing(GF(2), 'x', n, order='lex')

    dataset = []
    for _ in range(num_samples):
        F, x = get_random_MQsystem(n, m, ring)
        G = sol_to_gb(ring, x)
        G, 
        dataset.append((F, G, x))

    return dataset


def experiement(field, n, load_dir, num_beams=1):
    bag = load_trained_bag(load_dir, from_checkpoint=True)
    model = bag['model'] 
    tokenizer = bag['tokenizer']
    params = bag['params']
    
    F, G = load_katsura(field, n)
    F_prefix = [poly_to_prefix(f) for f in F]
    G_prefix = [poly_to_prefix(g) for g in G]
    x_text = ' [SEP] '.join(F_prefix)
    y_text = ' [SEP] '.join(G_prefix)

    x = tokenizer(x_text, return_tensors='pt')['input_ids'].cuda()
    # y = tokenizer(y_text, return_tensors='pt')['input_ids'].cuda()
    output_ids = model.generate(x, max_length=1000, num_beams=num_beams, do_sample=False)
    z_text = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
    G_pred = [prefix_to_poly(zt, F.ring) for zt in z_text[0].split('[SEP]')]
    
    print(F)
    print(G)
    print(G_pred)

In [59]:
field = 'QQ'
n = 5
print(f' field = {field}, n = {n}')
load_dir = f'results/shape_gb_lex/gb_dataset_n={n}_field={field}'

bag = load_trained_bag(load_dir, from_checkpoint=True)
model = bag['model'] 
tokenizer = bag['tokenizer']
params = bag['params']

 field = QQ, n = 5


The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.


In [60]:
m = 2*n
dataset = generate_MQ_dataset(n, m, num_samples=1000)

In [61]:
F, G, sol = dataset[0]

In [62]:
def sol_listform(sol):
    return list(sol.values())[::-1]

def print_solution(sols, ring):
    gens = ring.gens()
    for s in sols:
        print([s[x] for x in gens])

In [63]:
import numpy as np
from tqdm import tqdm

In [64]:
ring = PolynomialRing(GF(2), 'x', n)
qring = PolynomialRing(QQ, 'x', n)

# QQ

In [74]:
num_beams = 1

bitwise_acc = 0
rev_bitwise_acc = 0
full_acc = 0
num_samples = 1000 # len(dataset)
no_solution = 0

num_test_samples = min(1000, num_samples)
for F, G, sol in tqdm(dataset[:num_test_samples]):
    F_prefix = [poly_to_prefix(f) for f in F]
    G_prefix = [poly_to_prefix(g) for g in G]
    x_text = ' [SEP] '.join(F_prefix)
    y_text = ' [SEP] '.join(G_prefix)

    x = tokenizer(x_text, return_tensors='pt')['input_ids'].cuda()
    # y = tokenizer(y_text, return_tensors='pt')['input_ids'].cuda()
    output_ids = model.generate(x, max_length=1000, num_beams=num_beams, do_sample=False)
    z_text = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
    G_pred = [prefix_to_poly(zt, qring) for zt in z_text[0].split('[SEP]')]
    # print(G_pred)

    G_pred = ideal(G_pred).change_ring(ring).basis
    pred_solutions = ideal(G_pred).variety()

    if pred_solutions:
        pred_sol = sol_listform(pred_solutions[0])

        bitwise_acc += np.sum(np.array(pred_sol) == np.array(sol))
        rev_bitwise_acc += np.sum(np.array(pred_sol) != np.array(sol))
        full_hit = np.all(np.array(pred_sol) == np.array(sol))
        full_acc    += full_hit

        # if full_hit:
        #     print(f'solution  : {sol}')
        #     print(f'prediction: {pred_sol}')

    else:
        no_solution += 1

bitwise_acc = float(bitwise_acc / (num_test_samples * n))
rev_bitwise_acc = float(rev_bitwise_acc / (num_test_samples * n))
full_acc    = float(full_acc / num_test_samples)

print('----------------------')
print(f'bitwise acc    : {bitwise_acc:.2f}')
print(f'rev_bitwise acc: {rev_bitwise_acc:.2f}')
print(f'full acc       : {full_acc:.2f}')
print(f'no solution    : {no_solution}/{num_test_samples}')


100%|██████████| 1000/1000 [02:39<00:00,  6.28it/s]

----------------------
bitwise acc    : 0.47
rev_bitwise acc: 0.44
full acc       : 0.06
no solution    : 94/1000





In [79]:
num_beams = 1

bitwise_acc = 0
rev_bitwise_acc = 0
full_acc = 0
num_samples = 1000 # len(dataset)
no_solution = 0

num_test_samples = min(10, num_samples)
for F, G, sol in tqdm(dataset[:num_test_samples], disable=True):
    F_prefix = [poly_to_prefix(f) for f in F]
    G_prefix = [poly_to_prefix(g) for g in G]
    x_text = ' [SEP] '.join(F_prefix)
    y_text = ' [SEP] '.join(G_prefix)

    x = tokenizer(x_text, return_tensors='pt')['input_ids'].cuda()
    # y = tokenizer(y_text, return_tensors='pt')['input_ids'].cuda()
    output_ids = model.generate(x, max_length=1000, num_beams=num_beams, do_sample=False)
    z_text = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
    G_pred = [prefix_to_poly(zt, qring) for zt in z_text[0].split('[SEP]')]
    # print(G_pred)

    G_pred = ideal(G_pred).change_ring(ring).basis

    print('- G (answer) --------')
    for g in G: print(g)
    print('- G (Transformer) --------')
    for g in G_pred: print(g)

    print('')

- G (answer) --------
x0
x1
x2 + 1
x3
x4 + 1
- G (Transformer) --------
x0 + x4 + 1
x1 + x4 + 1
x2 + x4 + 1
x3 + x4 + 1
x4^2

- G (answer) --------
x0
x1
x2
x3 + 1
x4
- G (Transformer) --------
x0 + x4 + 1
x4^2 + x1 + 1
x4^2 + x2
x3 + x4 + 1
x4^3 + x4^2

- G (answer) --------
x0 + 1
x1
x2
x3
x4
- G (Transformer) --------
x0 + x4 + 1
x1 + x4 + 1
x2 + x4 + 1
x3 + 1
x4^2 + x4

- G (answer) --------
x0
x1
x2 + 1
x3
x4 + 1
- G (Transformer) --------
x0 + x4 + 1
x1 + x4 + 1
x2 + 1
x3 + x4 + 1
x4^2 + 1

- G (answer) --------
x0 + 1
x1 + 1
x2 + 1
x3
x4 + 1
- G (Transformer) --------
x0 + x4 + 1
x1 + x4 + 1
x2 + x4 + 1
x3 + x4
x4^2

- G (answer) --------
x0 + 1
x1 + 1
x2
x3
x4
- G (Transformer) --------
x0 + x4 + 1
x1 + x4
x2 + x4 + 1
x3 + 1
x4^2

- G (answer) --------
x0 + 1
x1 + 1
x2 + 1
x3
x4
- G (Transformer) --------
x0 + x4 + 1
x1 + x4
x2 + 1
x3 + x4
x4^2 + x4

- G (answer) --------
x0
x1 + 1
x2 + 1
x3
x4 + 1
- G (Transformer) --------
x0 + x4
x1 + x4 + 1
x2 + x4 + 1
x3 + x4
x4^2

- G (an

In [26]:
m = n+1
dataset = generate_MQ_dataset(n, m, num_samples=1000)

In [66]:
load('src/data/gbdataset.sage')

F, G, sol = dataset[0]
F, G = ideal(F).change_ring(qring).basis, ideal(G).change_ring(qring).basis
F, G = matrix(F).T, matrix(G).T

# builder = GBDataset_Builder(qring, 
#                             max_rand_coeff=3, 
#                             max_coeff=-1,
#                             max_size=6, 
#                             max_degree=4, 
#                             max_num_terms=None, 
#                             min_num_terms=1, 
#                             max_Gdegree=4, 
#                             max_num_Gterms=None, 
#                             num_duplicants=1, 
#                             density=1.0, 
#                             with_permutation=True)

builder = GBDataset_Builder(qring, 
                            max_rand_coeff=3, 
                            max_coeff=-1,
                            max_size=6, 
                            max_degree=4, 
                            max_num_terms=None, 
                            min_num_terms=1, 
                            max_Gdegree=4, 
                            max_num_Gterms=None, 
                            num_duplicants=1, 
                            density=1.0, 
                            with_permutation=True)

# num_vars = F.nrows()
# m = num_vars
m = F.nrows()
max_degree = 3
density = 0.2
d = None
# m = randint(0, max_size-num_vars) + num_vars
# d = randint(min_num_terms, max_num_terms) if max_num_terms is not None else None 
# A = builder.random_umut_matrix(n, F.nrows(),  degree=max_degree, terms=d, density=density, num_bound=builder.max_rand_coeff)
U = builder.random_umut_matrix(m, m, degree=max_degree, terms=d, density=density, num_bound=builder.max_rand_coeff)
if builder.with_permutation:
    P = random_permutation_matrix(m) 
    U = U * P

F_new = U * F
F_new = [f[0] for f in F_new]
G     = [g[0] for g in G]

-f


In [67]:
num_beams = 1

F_prefix = [poly_to_prefix(f) for f in F_new]
G_prefix = [poly_to_prefix(g) for g in G]
x_text = ' [SEP] '.join(F_prefix)
y_text = ' [SEP] '.join(G_prefix)

x = tokenizer(x_text, return_tensors='pt')['input_ids'].cuda()
# y = tokenizer(y_text, return_tensors='pt')['input_ids'].cuda()
output_ids = model.generate(x, max_length=1000, num_beams=num_beams, do_sample=False)
z_text = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
G_pred = [prefix_to_poly(zt, qring) for zt in z_text[0].split('[SEP]')]

print('-- Gröbner basis (answer) ------------')
for i, g in enumerate(G): print(f'g_{i} = {g}')
print('')

print('-- Gröbner basis (Transformer) -------')
for i, g in enumerate(G_pred): print(f'g_{i} = {g}')
print('')


sols = ideal(G_pred).change_ring(ring).variety()
print(f' {len(sols)} solutions found.')
print_solution(sols, ring)

print(f'(True solution is {sol})')

-- Gröbner basis (answer) ------------
g_0 = x0
g_1 = x1 + 1
g_2 = x2 + 1
g_3 = x3
g_4 = x4 + 1

-- Gröbner basis (Transformer) -------
g_0 = x0 + x4 + 1
g_1 = x1 + x4
g_2 = x2 - 1/3*x4 + 1
g_3 = x3 + x4
g_4 = x4^2 - 2*x4

 1 solutions found.
[1, 0, 1, 0, 0]
(True solution is [0, 1, 1, 0, 1])


# F7

In [68]:
ring = PolynomialRing(GF(2), 'x', n)
qring = PolynomialRing(GF(7), 'x', n)

In [69]:
field = 'F7'
n = 5
print(f' field = {field}, n = {n}')
load_dir = f'results/shape_gb_lex/gb_dataset_n={n}_field={field}'

bag = load_trained_bag(load_dir, from_checkpoint=True)
model = bag['model'] 
tokenizer = bag['tokenizer']
params = bag['params']

 field = F7, n = 5


The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.


In [72]:
m = 2*n
dataset = generate_MQ_dataset(n, m, num_samples=1000)

In [73]:
num_beams = 1

bitwise_acc = 0
rev_bitwise_acc = 0
full_acc = 0
num_samples = len(dataset)
no_solution = 0

num_test_samples = min(1000, num_samples)
for F, G, sol in tqdm(dataset[:num_test_samples]):
    F_prefix = [poly_to_prefix(f) for f in F]
    G_prefix = [poly_to_prefix(g) for g in G]
    x_text = ' [SEP] '.join(F_prefix)
    y_text = ' [SEP] '.join(G_prefix)

    x = tokenizer(x_text, return_tensors='pt')['input_ids'].cuda()
    # y = tokenizer(y_text, return_tensors='pt')['input_ids'].cuda()
    output_ids = model.generate(x, max_length=1000, num_beams=num_beams, do_sample=False)
    z_text = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
    G_pred = [prefix_to_poly(zt, qring) for zt in z_text[0].split('[SEP]')]
    # print(G_pred)

    G_pred = ideal(G_pred).change_ring(ring).basis
    pred_solutions = ideal(G_pred).variety()

    if pred_solutions:
        pred_sol = sol_listform(pred_solutions[0])

        bitwise_acc += np.sum(np.array(pred_sol) == np.array(sol))
        rev_bitwise_acc += np.sum(np.array(pred_sol) != np.array(sol))

        full_hit = np.all(np.array(pred_sol) == np.array(sol))
        full_acc    += full_hit

        if full_hit:
            print(f'solution  : {sol}')
            print(f'prediction: {pred_sol}')


    else:
        no_solution += 1

bitwise_acc = float(bitwise_acc / (num_test_samples * n))
rev_bitwise_acc = float(rev_bitwise_acc / (num_test_samples * n))
full_acc    = float(full_acc / num_test_samples)

print('----------------------')
print(f'bitwise acc    : {bitwise_acc:.2f}')
print(f'full acc       : {full_acc:.2f}')
print(f'no solution    : {no_solution}/{num_test_samples}')

  0%|          | 0/300 [00:00<?, ?it/s]

  1%|▏         | 4/300 [00:00<01:13,  4.04it/s]

solution  : [0, 0, 1, 0, 1]
prediction: [0, 0, 1, 0, 1]


  6%|▋         | 19/300 [00:04<01:04,  4.38it/s]

solution  : [0, 0, 0, 0, 1]
prediction: [0, 0, 0, 0, 1]


  9%|▉         | 28/300 [00:06<01:00,  4.49it/s]

solution  : [0, 0, 0, 0, 0]
prediction: [0, 0, 0, 0, 0]


 18%|█▊        | 53/300 [00:11<00:57,  4.29it/s]

solution  : [0, 1, 0, 1, 1]
prediction: [0, 1, 0, 1, 1]


 18%|█▊        | 55/300 [00:12<00:53,  4.54it/s]

solution  : [0, 0, 0, 0, 0]
prediction: [0, 0, 0, 0, 0]


 21%|██▏       | 64/300 [00:14<00:48,  4.87it/s]

solution  : [0, 0, 0, 0, 0]
prediction: [0, 0, 0, 0, 0]


 37%|███▋      | 112/300 [00:25<00:43,  4.31it/s]

solution  : [0, 0, 0, 0, 0]
prediction: [0, 0, 0, 0, 0]


 40%|████      | 121/300 [00:27<00:42,  4.20it/s]

solution  : [1, 1, 0, 0, 1]
prediction: [1, 1, 0, 0, 1]


 48%|████▊     | 145/300 [00:32<00:34,  4.56it/s]

solution  : [1, 0, 0, 0, 1]
prediction: [1, 0, 0, 0, 1]


 51%|█████▏    | 154/300 [00:34<00:37,  3.89it/s]

solution  : [0, 0, 0, 0, 0]
prediction: [0, 0, 0, 0, 0]


 60%|█████▉    | 179/300 [00:40<00:27,  4.36it/s]

solution  : [0, 0, 0, 0, 1]
prediction: [0, 0, 0, 0, 1]


 75%|███████▌  | 226/300 [00:50<00:14,  5.28it/s]

solution  : [0, 0, 0, 0, 1]
prediction: [0, 0, 0, 0, 1]


 77%|███████▋  | 232/300 [00:51<00:11,  5.86it/s]

solution  : [1, 0, 1, 0, 0]
prediction: [1, 0, 1, 0, 0]


 85%|████████▌ | 256/300 [00:55<00:07,  5.64it/s]

solution  : [1, 0, 0, 0, 0]
prediction: [1, 0, 0, 0, 0]


 87%|████████▋ | 260/300 [00:56<00:08,  4.84it/s]

solution  : [0, 0, 0, 0, 0]
prediction: [0, 0, 0, 0, 0]


 93%|█████████▎| 280/300 [01:01<00:04,  4.62it/s]

solution  : [0, 0, 0, 0, 0]
prediction: [0, 0, 0, 0, 0]


 96%|█████████▌| 288/300 [01:02<00:02,  4.47it/s]

solution  : [0, 0, 0, 0, 0]
prediction: [0, 0, 0, 0, 0]


 97%|█████████▋| 290/300 [01:03<00:02,  4.59it/s]

solution  : [0, 0, 0, 0, 1]
prediction: [0, 0, 0, 0, 1]


 99%|█████████▊| 296/300 [01:04<00:00,  4.35it/s]

solution  : [0, 0, 1, 1, 1]
prediction: [0, 0, 1, 1, 1]


100%|██████████| 300/300 [01:05<00:00,  4.58it/s]

----------------------
bitwise acc    : 0.47
full acc       : 0.06
no solution    : 24/300



