The goal of this experiment is to try and teach non-linear functions (trigo functions, power, combinations and multiplication of these) to a DL model and see how well it fares.

For the Fae bot I basically want the model to learn how to handle projections and rotation matrices. It needs to be comfortable with things like 1/(sqrt(cos(a)*sin(-b)))

In [15]:
import warnings
warnings.filterwarnings('ignore')


In [10]:
import numpy as np
np.set_printoptions(suppress=True)
import tensorflow as tf

import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.regularizers import l2
from keras.optimizers import Adam, SGD
from keras import backend as K
from keras.layers import LeakyReLU

In [11]:
from keras.callbacks import Callback

class NBatchLogger(Callback):
    """
    A Logger that log average performance per `display` steps.
    """
    def __init__(self, display):
        self.step = 0
        self.display = display
        self.metric_cache = {}

    def on_batch_end(self, batch, logs={}):
        self.step += 1
        for k in self.params['metrics']:
            if k in logs:
                self.metric_cache[k] = self.metric_cache.get(k, 0) + logs[k]
        if self.step % self.display == 0:
            metrics_log = ''
            for (k, v) in self.metric_cache.items():
                val = v / self.display
                if abs(val) > 1e-3:
                    metrics_log += ' - %s: %.4f' % (k, val)
                else:
                    metrics_log += ' - %s: %.4e' % (k, val)
            print('step: {}/{} ... {}'.format(self.step,
                                          self.params['epochs'],
                                          metrics_log))
            self.metric_cache.clear()

In [8]:
from math import *
# Function to be taught

def secret_func(ins):
    ca=100.0
    cb=-87.9
    #ca=0.0001
    #cb=-0.000879
    a,b,c,d,e,f=ins
    return e*cos(sin(f))*1.0/(1.0+(cos(ca*a*d)*sin(cb*b)**2)**c)

In [8]:
secret_func([-1,-1,-1,0.5,.5,.5])

0.001776538427897217

In [20]:
# Generate training dataset

from random import random

def generate_training(sz1, sz2):
    inputs = list()
    outputs = list()
    for k in range (sz2):
        inputs.append([random() for i in range(sz1)])
        outputs.append(secret_func(inputs[-1]))
    return inputs, outputs

In [10]:
inputs, outputs = generate_training(6,1000) 

In [11]:
outputs[:10]

[0.08890037936393602,
 0.35112704711024806,
 0.4710260300137159,
 (0.2685480957558578-0.03160998337198243j),
 (0.11420017574079881-0.006117668438792973j),
 (0.3615356936675377-0.04186293083985107j),
 (0.5026345875530822-0.21350488878318405j),
 (0.36801633283352836-0.3988537848368489j),
 0.21479619753117357,
 0.08261121127216428]

In [12]:

inputs = np.array(inputs)
outputs = np.array(outputs)

In [75]:
model = Sequential()
model.add(Dense(160, activation='tanh', input_shape=(6,), use_bias=True))
model.add(Dense(160, activation='tanh', use_bias=True))
model.add(Dense(1, activation='linear'))

model.summary()
#opt = SGD(lr=0.08, decay=1e-6, momentum=0.9, nesterov=True)
opt = Adam(lr=0.004, amsgrad=True)
model.compile(loss='mean_squared_error',
  optimizer=opt,
  metrics=['accuracy'])

model.fit(inputs, outputs,
    batch_size=10000,
    epochs=4000,
    verbose=0,
    shuffle=True,
    validation_split=0.0,
    callbacks=[NBatchLogger(display=100)])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_34 (Dense)             (None, 160)               1120      
_________________________________________________________________
dense_35 (Dense)             (None, 160)               25760     
_________________________________________________________________
dense_36 (Dense)             (None, 1)                 161       
Total params: 27,041
Trainable params: 27,041
Non-trainable params: 0
_________________________________________________________________
step: 100/4000 ...  - loss: 0.0964 - acc: 0.0000e+00
step: 200/4000 ...  - loss: 0.0828 - acc: 0.0000e+00
step: 300/4000 ...  - loss: 0.0826 - acc: 0.0000e+00
step: 400/4000 ...  - loss: 0.0819 - acc: 0.0000e+00
step: 500/4000 ...  - loss: 0.0811 - acc: 0.0000e+00
step: 600/4000 ...  - loss: 0.0793 - acc: 0.0000e+00
step: 700/4000 ...  - loss: 0.0781 - acc: 0.0000e+00
step: 800/4000 ...  - loss: 0.0

<keras.callbacks.History at 0x7f21e28b7f60>

In [13]:
ind=535
print(model.predict(np.array([inputs[ind]]))[0])
print(outputs[ind])


NameError: name 'model' is not defined

### Conclusion 1

The problem is not in the functions used as I first suspected. It seems to be from teh huge size of some constants (`ca` and `cb` in the `secret_func`). If I put them in the 0..1 range it works fine, but getting closer to 100 makes thr training much harder, or even impossible.

Same if the constants are too small (e.g. <0.0001)

### New problem

Let's isolate that problem and find a model that solves it well

In [26]:
from math import *
# Function to be taught

def secret_func(ins):
    mult = 100.0
    ca=1.1 * mult
    cb=-0.879 * mult
    a,b=ins
    return cos(ca*a+cb*b)

In [27]:
inputs, outputs = generate_training(2,1000) 
for i in range(len(inputs)):
    inputs[i].append(1.0)
    inputs[i].append(10)
    inputs[i].append(100)
inputs = np.array(inputs)
outputs = np.array(outputs)

In [28]:
input1 = keras.layers.Input(shape=(5,))
dense1_1 = keras.layers.Dense(160, use_bias=True, activation='relu')(input1)
dense1_2 = keras.layers.Dense(160, use_bias=True, activation='relu')(input1)
mult1 = keras.layers.Multiply()([dense1_1, dense1_2])
bn1 = keras.layers.BatchNormalization()(mult1)
    
dense2 = keras.layers.Dense(160, use_bias=True, activation='relu')(input1)
mult2 = keras.layers.Multiply()([dense2, mult1])
bn2 = keras.layers.BatchNormalization()(mult2)

dense3 = keras.layers.Dense(160, use_bias=True, activation='relu')(input1)
mult3 = keras.layers.Multiply()([dense3, mult2])
bn3 = keras.layers.BatchNormalization()(mult3)

dense4 = keras.layers.Dense(160, use_bias=True, activation='relu')(input1)
mult4 = keras.layers.Multiply()([dense4, mult3])
bn4 = keras.layers.BatchNormalization()(mult4)

merge_1 = keras.layers.Concatenate()([bn1, bn2, bn3, bn4, input1])

dense2 = keras.layers.Dense(160, use_bias=True, activation='relu')(merge_1)
bn5 = keras.layers.BatchNormalization()(dense2)
dense3 = keras.layers.Dense(160, use_bias=True, activation='relu')(bn5)
dense4 = keras.layers.Dense(160, use_bias=True, activation='relu')(dense3)
dense5 = keras.layers.Dense(160, use_bias=True, activation='relu')(dense4)
dense6 = keras.layers.Dense(160, use_bias=True, activation='relu')(dense5)

out = keras.layers.Dense(1, activation='linear')(dense5)
model = keras.models.Model(inputs=[input1], outputs=out)



#model = Sequential()
#model.add(Dense(160, activation='tanh', input_shape=(6,), use_bias=True))
#model.add(Dense(160, activation='tanh', use_bias=True))
#model.add(Dense(1, activation='linear'))

model.summary()
#opt = SGD(lr=0.08, decay=1e-6, momentum=0.9, nesterov=True)
opt = Adam(lr=0.004, amsgrad=True)
model.compile(loss='mean_squared_error',
  optimizer=opt,
  metrics=['accuracy'])


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            (None, 5)            0                                            
__________________________________________________________________________________________________
dense_34 (Dense)                (None, 160)          960         input_4[0][0]                    
__________________________________________________________________________________________________
dense_35 (Dense)                (None, 160)          960         input_4[0][0]                    
__________________________________________________________________________________________________
multiply_13 (Multiply)          (None, 160)          0           dense_34[0][0]                   
                                                                 dense_35[0][0]                   
__________

In [29]:
model.fit(inputs, outputs,
    batch_size=1000,
    epochs=10000,
    verbose=0,
    shuffle=True,
    validation_split=0.0,
    callbacks=[NBatchLogger(display=100)])

step: 100/10000 ...  - loss: 0.5203 - acc: 0.0000e+00
step: 200/10000 ...  - loss: 0.4665 - acc: 0.0000e+00
step: 300/10000 ...  - loss: 0.4545 - acc: 0.0000e+00
step: 400/10000 ...  - loss: 0.4219 - acc: 0.0000e+00
step: 500/10000 ...  - loss: 0.3970 - acc: 0.0000e+00
step: 600/10000 ...  - loss: 0.3810 - acc: 0.0000e+00
step: 700/10000 ...  - loss: 0.3540 - acc: 0.0000e+00
step: 800/10000 ...  - loss: 0.3532 - acc: 0.0000e+00
step: 900/10000 ...  - loss: 0.3287 - acc: 0.0000e+00
step: 1000/10000 ...  - loss: 0.3089 - acc: 0.0000e+00
step: 1100/10000 ...  - loss: 0.2870 - acc: 0.0000e+00
step: 1200/10000 ...  - loss: 0.2805 - acc: 0.0000e+00
step: 1300/10000 ...  - loss: 0.2679 - acc: 0.0000e+00
step: 1400/10000 ...  - loss: 0.2458 - acc: 0.0000e+00
step: 1500/10000 ...  - loss: 0.2273 - acc: 0.0000e+00
step: 1600/10000 ...  - loss: 0.2276 - acc: 0.0000e+00
step: 1700/10000 ...  - loss: 0.2177 - acc: 0.0000e+00
step: 1800/10000 ...  - loss: 0.2034 - acc: 0.0000e+00
step: 1900/10000 ..

<keras.callbacks.History at 0x7f8cd471bc50>

In [188]:
ins=inputs[0]
ins[0] = random.random()
ins[1] = random.random()
print(model.predict(np.array([ins]))[0])
print(secret_func(ins[:2]))

[0.3501999]
-0.3890039461518654


In [204]:
import random

ind=random.randint(0,1000)
ins=inputs[ind]
print(model.predict(np.array([ins]))[0])
print(secret_func(ins[:2]))

[-0.130393]
-0.6717672509600036
