In [1]:
from diffusion_libs import get_network, DiffusionModel, scale_dataset, vocabulary, fill_vocabulary, convert_back_to_code
import tensorflow as tf
from tensorflow import keras
import numpy as np
import tensorflow as tf

In [2]:
fill_vocabulary()
# sampling
min_signal_rate = 0.02
max_signal_rate = 0.95

# architecture
embedding_dims = 32
embedding_max_frequency = 1000.0
embedding_min_frequency = 1.0

# optimization
batch_size = 16
ema = 0.999
learning_rate = 1e-3

# dictionary related
DICTIONARY_SIZE = len(vocabulary)
TOKENS_CAPACITY = 2048

widths = [64, 64, 96, 96, 128]
block_depth = 2

lang_base = f"E:\Studies\master_thesis\codebase\checkpoints\c_lang"
model_path = f"{lang_base}\cp-0095\model"

In [3]:
network = get_network(
      TOKENS_CAPACITY, embedding_min_frequency, embedding_max_frequency, 
      embedding_dims, widths=widths, block_depth=block_depth, name="complicated"
  )

model = DiffusionModel(
      TOKENS_CAPACITY, DICTIONARY_SIZE, network, batch_size, max_signal_rate, 
      min_signal_rate, ema, False
  )

model.compile(
    optimizer = keras.optimizers.experimental.Adam(
        learning_rate=learning_rate
    ),
    loss = keras.losses.mean_absolute_error
)

#normalizer
n_w = np.load(f"{lang_base}\\normalizer_weights.npy", allow_pickle=True)
normalizer = keras.layers.Normalization(mean=n_w[0], variance=n_w[1])
normalizer.build((TOKENS_CAPACITY))
model.normalizer = normalizer
model.load_weights(model_path)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x2088747e7f0>

### First should generate some sample to see if it still generates something that looks like code

In [9]:
raw, denormalized = model.generate(40,50)
for sample in denormalized:
  scaled = scale_dataset(sample, DICTIONARY_SIZE)
  print(" ".join(convert_back_to_code(scaled)).replace(";", ";\n").replace("{", "{\n").replace("}", "}\n").replace("EMPTY", ""))
  print()

float ID0 ) ) }
 else ' \ / > break + ID4 _ : true ) STR _ ID1 ( ID0 scanf | auto . | ID5 . }
 = return enum ID3 double NUM NUM NUM  NUM          NUM                          NUM                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      

In [5]:
no = 2
scaled = scale_dataset(denormalized[no], DICTIONARY_SIZE)
convs = convert_back_to_code(scaled)
n = 5
chunked = [convs[i:i + n] for i in range(0, len(convs), n)]
for symbols in chunked:
  print(" ".join(symbols))

for ID0 ) ) ?
for ID3 - ` default
? default } { return
] printf . return =
| [ } ) .
auto ` < ID4 ,
ID3 | ID4 printf ]
const EMPTY ; ' return
EMPTY EMPTY NUM EMPTY NUM
EMPTY NUM NUM NUM NUM
NUM EMPTY NUM NUM EMPTY
NUM EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EMPTY EMPTY EMPTY EMPTY
EMPTY EM

In [6]:
l = np.around(denormalized[no], 3).tolist()
chunked = [l[i:i + n] for i in range(0, len(l), n)]
for symbols in chunked:
  print(np.around(np.asarray(symbols), 3))

[0.07  0.288 0.201 0.199 0.254]
[0.071 0.297 0.238 0.235 0.043]
[0.251 0.04  0.262 0.258 0.013]
[0.272 0.157 0.215 0.011 0.242]
[0.226 0.268 0.263 0.2   0.217]
[0.015 0.232 0.243 0.3   0.211]
[0.299 0.223 0.301 0.155 0.273]
[0.034 0.001 0.28  0.265 0.012]
[0.002 0.    0.003 0.    0.002]
[0.001 0.004 0.002 0.002 0.004]
[0.003 0.002 0.002 0.002 0.001]
[0.003 0.001 0.001 0.002 0.001]
[0.001 0.002 0.002 0.001 0.001]
[0.001 0.001 0.001 0.001 0.001]
[0.    0.001 0.    0.    0.   ]
[0.001 0.    0.001 0.001 0.   ]
[0. 0. 0. 0. 0.]
[0.001 0.    0.    0.001 0.002]
[0.001 0.001 0.001 0.001 0.   ]
[0.001 0.    0.001 0.001 0.001]
[0.    0.    0.001 0.001 0.   ]
[0.    0.001 0.001 0.    0.   ]
[0.    0.    0.001 0.001 0.   ]
[0.001 0.001 0.    0.001 0.001]
[0.    0.001 0.001 0.    0.   ]
[0.001 0.002 0.001 0.001 0.   ]
[0.001 0.001 0.    0.001 0.   ]
[0.001 0.    0.    0.    0.   ]
[0.001 0.    0.    0.    0.   ]
[0.001 0.    0.    0.001 0.001]
[0.    0.    0.001 0.    0.   ]
[0. 0. 0. 0. 0.]
[0.   