In [1]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D,Conv1D, MaxPooling1D,Reshape, LSTM, Dropout, TimeDistributed, Input
from tensorflow.keras.datasets import mnist
from tensorflow.keras.optimizers import RMSprop, SGD
import numpy as np
import time

In [2]:
from fxpmath import Fxp
import numpy as np

class Converter():

    def encode(self, input_data, signed=True, total_bits=16, fractional_bits=13):
        '''

        Converts input data from float/int python data types to ap_fixed with total bits and fractional_bits and returns its uint32 equivalent

        :param input_data: can be both a single int/float number or a numpy array
        :param signed: Boolean, if the input data is signed or not
        :param total_bits: numer of total bits used to represent the input data
        :param fractional_bits: number of fractional bits used to represent the input data. Integer bits = total bits - fractional bits
        :return: input data converted to uint32 format. 0.5 can be represented with 4 bits as 0.100. This is converted into 0100 (fractional point removed) and then converted to int.
                 0.5 as input is converted to 4 as uint32.

        '''
        fixed_point_representation = Fxp(input_data, signed=signed, n_word = total_bits, n_frac = fractional_bits)
        uint_coverted = np.uint32(fixed_point_representation.uraw())
        return uint_coverted



    def decode(self, input_data, total_bits=16, fractional_bits=13):
        '''
        Converts input data from uint32 format to float with total_bits and fractional_bits resolution

        :param input_data: can be both a single int/float number or a numpy array
        :param total_bits: numer of total bits used to represent the input data
        :param fractional_bits: number of fractional bits used to represent the input data. Integer bits = total bits - fractional bits
        :return: converted input data from uint32 to float
        '''

        if type(input_data) is not np.ndarray:
            input_data = np.array([input_data])

        #Function taken from here: https://discuss.pynq.io/t/how-to-use-ap-fixed-data-type-to-communicate-with-the-ip-made-by-the-vivado-hls/679/5
        condition = 1 << (total_bits - 1)
        mask = (~((1 << total_bits) - 1)) & 0xFFFFFFFF
        return np.where(input_data < condition, input_data, (input_data.view('u4') | mask).view('i4')) / (1 << fractional_bits)





In [3]:
converter=Converter()

In [4]:
x_train=np.load("x_train_plain.npy")
y_train=np.load("y_train.npy")
y_train = tf.keras.utils.to_categorical(y_train)


In [5]:
print(x_train.shape)
print(y_train.shape)

(10000, 784)
(10000, 10)


In [None]:
# x_train=x_train.reshape((-1,784,1))/255.0
# x_test=x_test.reshape((-1,784,1))/255.0
# y_test = tf.keras.utils.to_categorical(y_test)

# x_train=x_train[idx]
# y_train=y_train[idx]

In [None]:
# x_train=x_train.reshape((-1,784))

In [6]:
model=Sequential()

model.add(Input(shape=(784,)))
model.add(Dense(units=64, activation='relu'))
model.add(Dense(units=10))
model.compile( loss='categorical_crossentropy', metrics=['accuracy'], optimizer=SGD(learning_rate=0.01))
model.summary()

optimizer = tf.keras.optimizers.SGD(learning_rate=1e-3)




Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 64)                50240     
_________________________________________________________________
dense_1 (Dense)              (None, 10)                650       
Total params: 50,890
Trainable params: 50,890
Non-trainable params: 0
_________________________________________________________________


2022-05-30 12:25:31.427942: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2022-05-30 12:25:31.428179: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-05-30 12:25:31.429082: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [None]:
w1=np.load("w1.npy")
w2=np.load("w2.npy")
b1=np.load("b1.npy")
b2=np.load("b2.npy")

w1= converter.decode(w1)
w2= converter.decode(w2)
b1= converter.decode(b1)
b2= converter.decode(b2)

weights1=[w1,b1]
weights2=[w2,b2]

In [None]:
w1=w1.reshape(64,784)
print(w1[0])

In [None]:
model.layers[0].set_weights(weights1)
model.layers[1].set_weights(weights2)

In [None]:
out=model.predict(x_train[:1])
print(out)

In [19]:
with tf.GradientTape() as tape:
    t0=time.time()

    y_pred=model(x_train[:1])
    t1=time.time()
    
    # print(y_pred)
    loss=tf.keras.losses.MeanSquaredError()(y_train,y_pred)
    t2=time.time()
    # print(loss)

grads=tape.gradient(loss,model.trainable_variables)
t3=time.time()

optimizer.apply_gradients(zip(grads,model.trainable_variables))
t4=time.time()


In [27]:
print(t4-t3)

0.0012159347534179688


In [None]:
model.fit(x_train,y_train,epochs=10,batch_size=32)

In [None]:
w1=model.layers[0].weights[0].numpy()
w2=model.layers[1].weights[0].numpy()
b1=model.layers[0].weights[1].numpy()
b2=model.layers[1].weights[1].numpy()

print(w1.shape)

In [None]:
converter=Converter()


In [None]:
w1_encoded=converter.encode(w1)
w2_encoded=converter.encode(w2)
b1_encoded=converter.encode(b1)
b2_encoded=converter.encode(b2)

In [None]:
print(w1_encoded.shape)

In [None]:
f=open("w1.txt","w")
for i in range(64):
    for j in range(784):
        f.write(f"{w1[j][i]}"+" ")
f.close()
    

In [None]:
f=open("b1.txt","w")
for i in range(64):
    f.write(f"{b1[i]}"+" ")
f.close()

f=open("b2.txt","w")
for i in range(10):
    f.write(f"{b2[i]}"+" ")
f.close()

In [None]:
f=open("w2.txt","w")
for i in range(10):
    for j in range(64):
        f.write(f"{w2[j][i]}"+" ")
f.close()

In [None]:
np.save('w1.npy',w1_encoded, dtype="uint16")
np.save('w2.npy',w2_encoded, dtype="uint16")
np.save('b1.npy',b1_encoded, dtype="uint16")
np.save('b2.npy',b2_encoded, dtype="uint16")