In [1]:
from diffusion_libs import get_network, DiffusionModel, scale_dataset
from samples_generators import fill_vocabulary_c_v2, convert_back_to_code_c_v2, vocabulary_c_v2
import tensorflow as tf
from tensorflow import keras
import numpy as np
import tensorflow as tf

In [2]:
fill_vocabulary_c_v2()
# sampling
min_signal_rate = 0.02
max_signal_rate = 0.95

# architecture
embedding_dims = 32
embedding_max_frequency = 1000.0
embedding_min_frequency = 1.0

# optimization
batch_size = 16
ema = 0.999
learning_rate = 1e-3

# dictionary related
DICTIONARY_SIZE = len(vocabulary_c_v2)
TOKENS_CAPACITY = 256

widths = [64, 64, 96, 128, 256]
block_depth = 2

lang_base = f"E:\Studies\master_thesis\diffusion_models_checpoints_savespace\\final_checkpoints\simple_c_v3"
model_path = f"{lang_base}\cp-0128\model"

In [3]:
network = get_network(
      TOKENS_CAPACITY, embedding_min_frequency, embedding_max_frequency, 
      embedding_dims, widths=widths, block_depth=block_depth, name="complicated"
  )

model = DiffusionModel(
      TOKENS_CAPACITY, DICTIONARY_SIZE, network, batch_size, max_signal_rate, 
      min_signal_rate, ema, False
  )

model.compile(
    optimizer = keras.optimizers.experimental.Adam(
        learning_rate=learning_rate
    ),
    loss = keras.losses.mean_absolute_error
)

#normalizer
n_w = np.load(f"{lang_base}\\normalizer_weights.npy", allow_pickle=True)
normalizer = keras.layers.Normalization(mean=n_w[0], variance=n_w[1])
normalizer.build((TOKENS_CAPACITY))
model.normalizer = normalizer
model.load_weights(model_path)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x23917ef6e50>

### First should generate some sample to see if it still generates something that looks like code

In [8]:
raw, denormalized = model.generate(20,80)
for sample in denormalized:
  scaled = scale_dataset(sample, DICTIONARY_SIZE)
  print(" ".join(convert_back_to_code_c_v2(scaled)).replace(";", ";\n").replace("{", "{\n").replace("}", "}\n"))
  print()

void ID1 = char ID2 }
 char ID3 }
 int ID3 ( ;
 char ID4 ) int ID5 ) char ID6 ) int ID7 ) int ID9 | ID5 / STRING + STRING ) char ID8 ) char ID9 ;
 return ( STRING }
 ID5 ( ) return = STRING }
 STRING ( ) return = STRING }
 ID3 ( ) ID7 | ID4 + ID5 + STRING ) return = STRING }
 ID6 ( ( ) ;
 ID0 ( int ID11 | NUM ) ID11 < NUM ) ID10 - + ) ;
 int ID12 ;
 int ID13 | ID7 + STRING ) printf = STRING }
 ID5 ( ) {
 if scanf = ID5 + ID8 ;
 | NUM & & ID9 < = NUM ) ;
 int ID10 ;
 return = STRING }
 STRING ) ;
 ID5 = ID6 + ID7 / ID5 + STRING ;
 int ID10 = NUM + ) {
 if ;
 int ID8 = NUM ) {
 > NUM ;
 {
 EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMPTY EMP

In [5]:
scaled = scale_dataset(denormalized[len(denormalized) - 3], DICTIONARY_SIZE)
convs = convert_back_to_code_c_v2(scaled)
n = 5
chunked = [convs[i:i + n] for i in range(0, len(convs), n)]
for symbols in chunked:
  print(" ".join(symbols))

char ID1 = int ID2
( ; int ID2 =
NUM + } NUM ID4
| STRING + STRING )
printf = STRING } ID2
( ) ID2 | NUM
= ID2 | STRING )
ID2 = ID3 + STRING
) ID4 | STRING )
ID2 | STRING ) printf
= STRING } ID3 (
) printf = STRING }
ID2 ( ) if =
ID3 | | NUM =
, ) NUM & &
ID2 / ID3 ! NUM
( ; char ID4 |
STRING ) { > NUM
; { EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EM

In [6]:
l = np.around(denormalized[len(denormalized) - 3], 3).tolist()
chunked = [l[i:i + n] for i in range(0, len(l), n)]
for symbols in chunked:
  print(np.around(np.asarray(symbols), 3))

[0.069 0.659 0.295 0.059 0.685]
[0.318 0.366 0.058 0.696 0.277]
[0.117 0.177 0.405 0.109 0.734]
[0.252 0.101 0.175 0.096 0.339]
[0.539 0.289 0.09  0.404 0.69 ]
[0.318 0.341 0.697 0.27  0.11 ]
[0.291 0.701 0.251 0.105 0.343]
[0.695 0.275 0.704 0.174 0.1  ]
[0.337 0.733 0.271 0.098 0.326]
[0.682 0.253 0.102 0.341 0.545]
[0.296 0.087 0.409 0.725 0.32 ]
[0.344 0.539 0.295 0.088 0.409]
[0.695 0.318 0.343 0.586 0.291]
[0.71  0.259 0.253 0.123 0.285]
[0.422 0.331 0.112 0.236 0.244]
[0.68  0.188 0.703 0.464 0.115]
[0.316 0.367 0.063 0.746 0.273]
[0.101 0.343 0.391 0.502 0.114]
[0.345 0.387 0.001 0.002 0.002]
[0.003 0.    0.    0.001 0.003]
[0.001 0.002 0.003 0.001 0.   ]
[0.    0.001 0.003 0.    0.001]
[0.    0.001 0.    0.001 0.001]
[0.001 0.    0.002 0.    0.001]
[0.001 0.001 0.    0.    0.001]
[0.    0.    0.    0.    0.001]
[0.001 0.    0.001 0.001 0.001]
[0.    0.001 0.    0.    0.001]
[0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0.]
[0.    0.    0.    0.001 0.   ]
[0. 0. 0. 0. 0.]
[0. 