## Overview
We will tune and pilot with the 500 most frequent monosyllabic words from TASA.

In [1]:
# python: base (3.11.4)
# updated cuda drivers for GPU implementation

import numpy as np
import json
import pandas as pd
import time
import tensorflow as tf

from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam

from src.learner import *
from utilities import *

# data
kidwords = pd.read_csv('data/kidwords/kidwords.csv', header=None)[0].tolist()

top_500 = pd.read_csv('data/top_500.csv')
train_word_indices = np.array([i for i, e in enumerate(kidwords) if e in top_500])

XX = np.genfromtxt('data/kidwords/orth.csv', delimiter=",")
YY = np.genfromtxt('data/kidwords/phon.csv', delimiter=",")

non_zero_a = np.any(XX != 0, axis=0)
X = XX[:, non_zero_a]

non_zero_b = np.any(YY != 0, axis=0)
Y = YY[:, non_zero_b]


2024-08-09 15:43:15.797802: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-08-09 15:43:15.807121: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-09 15:43:15.818023: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-09 15:43:15.821123: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-08-09 15:43:15.829169: I tensorflow/core/platform/cpu_feature_guar

GPU check

In [2]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


I0000 00:00:1723236199.136306   54382 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1723236199.157634   54382 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1723236199.157775   54382 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355


# Tune round #2
We didn't get to the limit in tuning in round 1, so we are trying another round.

In [3]:
seed = 323



with open('outputs/tune_top_500_v2.csv', 'w') as f:
    f.write("{},{},{},{},{},{},{},{},{},{},{}\n".format(
                                            "hidden_units",
                                            "learning_rate",
                                             "batch_size",
                                             "epochs",
                                             "loss_train",
                                             "accuracy_train",
                                             "mse_train",
                                             "loss_test",
                                             "accuracy_test",
                                             "mse_test",
                                             "time"))
    for learning_rate in [.001, .005, None]: 
        for batch_size in [6, 8, 10]:
            for epochs in [60, 80, 100]:
                for hidden in [20, 25, 30, 40]:
                    
                    print("Configuration currently training:", learning_rate, batch_size, epochs, hidden)

                    if learning_rate is not None:
                        optimizer = Adam(learning_rate=learning_rate)
                    if learning_rate is None:
                        optimzer = None

                    model = learner(X, Y, seed, hidden, optimizer=None)
                    
                    start_time = time.time()

                    model.fit(X[train_word_indices], Y[train_word_indices], epochs=epochs, batch_size=batch_size, verbose=True)

                    end_time = time.time()
                    runtime = end_time - start_time

                    loss_train, accuracy_train, mse_train = model.evaluate(X[train_word_indices], Y[train_word_indices], verbose=0) 
                    loss_test, accuracy_test, mse_test = model.evaluate(X[~train_word_indices], Y[~train_word_indices], verbose=0) 

                    f.write("{},{},{},{},{},{},{},{},{},{},{}\n".format(
                                                    hidden,
                                                    learning_rate,
                                                    batch_size,
                                                    epochs,
                                                    loss_train,
                                                    accuracy_train,
                                                    mse_train,
                                                    loss_test,
                                                    accuracy_test,
                                                    mse_test,
                                                    runtime))
f.close()

Configuration currently training: 0.001 6 60 20
Epoch 1/60


I0000 00:00:1723233517.254928   39425 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1723233517.255118   39425 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1723233517.255185   39425 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1723233517.351348   39425 cuda_executor.cc:1015] successful NUMA node read from SysFS ha

[1m266/479[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m0s[0m 570us/step - binary_accuracy: 0.7114 - loss: 0.5828 - mse: 0.1993

I0000 00:00:1723233518.561872   40117 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m479/479[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - binary_accuracy: 0.7589 - loss: 0.5180 - mse: 0.1729 
Epoch 2/60
[1m479/479[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 550us/step - binary_accuracy: 0.8696 - loss: 0.3044 - mse: 0.0927
Epoch 3/60
[1m479/479[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 567us/step - binary_accuracy: 0.9021 - loss: 0.2441 - mse: 0.0724
Epoch 4/60
[1m479/479[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 533us/step - binary_accuracy: 0.9206 - loss: 0.2010 - mse: 0.0587
Epoch 5/60
[1m479/479[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 539us/step - binary_accuracy: 0.9346 - loss: 0.1689 - mse: 0.0486
Epoch 6/60
[1m479/479[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 583us/step - binary_accuracy: 0.9456 - loss: 0.1450 - mse: 0.0411
Epoch 7/60
[1m479/479[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 556us/step - binary_accuracy: 0.9529 - loss: 0.1277 - mse: 0.0358
Epoch 8/60


2024-08-09 14:58:56.385259: W tensorflow/core/framework/op_kernel.cc:1840] OP_REQUIRES failed at xla_ops.cc:577 : INVALID_ARGUMENT: Matrix size-incompatible: In[0]: [1,260], In[1]: [101,20]

Stack trace for op definition: 
File "<frozen runpy>", line 198, in _run_module_as_main
File "<frozen runpy>", line 88, in _run_code
File "/home/mcb/miniconda3/lib/python3.11/site-packages/ipykernel_launcher.py", line 18, in <module>
File "/home/mcb/miniconda3/lib/python3.11/site-packages/traitlets/config/application.py", line 1075, in launch_instance
File "/home/mcb/miniconda3/lib/python3.11/site-packages/ipykernel/kernelapp.py", line 739, in start
File "/home/mcb/miniconda3/lib/python3.11/site-packages/tornado/platform/asyncio.py", line 195, in start
File "/home/mcb/miniconda3/lib/python3.11/asyncio/base_events.py", line 607, in run_forever
File "/home/mcb/miniconda3/lib/python3.11/asyncio/base_events.py", line 1922, in _run_once
File "/home/mcb/miniconda3/lib/python3.11/asyncio/events.py", line 

InvalidArgumentError: Graph execution error:

Detected at node sequential_1/dense_1/MatMul defined at (most recent call last):
<stack traces unavailable>
Matrix size-incompatible: In[0]: [1,260], In[1]: [101,20]

Stack trace for op definition: 
File "<frozen runpy>", line 198, in _run_module_as_main
File "<frozen runpy>", line 88, in _run_code
File "/home/mcb/miniconda3/lib/python3.11/site-packages/ipykernel_launcher.py", line 18, in <module>
File "/home/mcb/miniconda3/lib/python3.11/site-packages/traitlets/config/application.py", line 1075, in launch_instance
File "/home/mcb/miniconda3/lib/python3.11/site-packages/ipykernel/kernelapp.py", line 739, in start
File "/home/mcb/miniconda3/lib/python3.11/site-packages/tornado/platform/asyncio.py", line 195, in start
File "/home/mcb/miniconda3/lib/python3.11/asyncio/base_events.py", line 607, in run_forever
File "/home/mcb/miniconda3/lib/python3.11/asyncio/base_events.py", line 1922, in _run_once
File "/home/mcb/miniconda3/lib/python3.11/asyncio/events.py", line 80, in _run
File "/home/mcb/miniconda3/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 542, in dispatch_queue
File "/home/mcb/miniconda3/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 531, in process_one
File "/home/mcb/miniconda3/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 437, in dispatch_shell
File "/home/mcb/miniconda3/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 359, in execute_request
File "/home/mcb/miniconda3/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 775, in execute_request
File "/home/mcb/miniconda3/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 446, in do_execute
File "/home/mcb/miniconda3/lib/python3.11/site-packages/ipykernel/zmqshell.py", line 549, in run_cell
File "/home/mcb/miniconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3051, in run_cell
File "/home/mcb/miniconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3106, in _run_cell
File "/home/mcb/miniconda3/lib/python3.11/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
File "/home/mcb/miniconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3311, in run_cell_async
File "/home/mcb/miniconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3493, in run_ast_nodes
File "/home/mcb/miniconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3553, in run_code
File "/tmp/ipykernel_39425/1703203400.py", line 40, in <module>
File "/home/mcb/miniconda3/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler
File "/home/mcb/miniconda3/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 429, in evaluate
File "/home/mcb/miniconda3/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 165, in one_step_on_iterator
File "/home/mcb/miniconda3/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 154, in one_step_on_data
File "/home/mcb/miniconda3/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 82, in test_step
File "/home/mcb/miniconda3/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler
File "/home/mcb/miniconda3/lib/python3.11/site-packages/keras/src/layers/layer.py", line 882, in __call__
File "/home/mcb/miniconda3/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler
File "/home/mcb/miniconda3/lib/python3.11/site-packages/keras/src/ops/operation.py", line 46, in __call__
File "/home/mcb/miniconda3/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 156, in error_handler
File "/home/mcb/miniconda3/lib/python3.11/site-packages/keras/src/models/sequential.py", line 209, in call
File "/home/mcb/miniconda3/lib/python3.11/site-packages/keras/src/models/functional.py", line 175, in call
File "/home/mcb/miniconda3/lib/python3.11/site-packages/keras/src/ops/function.py", line 171, in _run_through_graph
File "/home/mcb/miniconda3/lib/python3.11/site-packages/keras/src/models/functional.py", line 556, in call
File "/home/mcb/miniconda3/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler
File "/home/mcb/miniconda3/lib/python3.11/site-packages/keras/src/layers/layer.py", line 882, in __call__
File "/home/mcb/miniconda3/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler
File "/home/mcb/miniconda3/lib/python3.11/site-packages/keras/src/ops/operation.py", line 46, in __call__
File "/home/mcb/miniconda3/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 156, in error_handler
File "/home/mcb/miniconda3/lib/python3.11/site-packages/keras/src/layers/core/dense.py", line 144, in call
File "/home/mcb/miniconda3/lib/python3.11/site-packages/keras/src/ops/numpy.py", line 3445, in matmul
File "/home/mcb/miniconda3/lib/python3.11/site-packages/keras/src/backend/tensorflow/numpy.py", line 477, in matmul

	 [[{{node sequential_1/dense_1/MatMul}}]]
	tf2xla conversion failed while converting __inference_one_step_on_data_96371[]. Run with TF_DUMP_GRAPH_PREFIX=/path/to/dump/dir and --vmodule=xla_compiler=2 to obtain a dump of the compiled functions.
	 [[StatefulPartitionedCall]] [Op:__inference_one_step_on_iterator_96398]