### Trains a neural network to predict a 512 dimensional vector

In [1]:
import pandas as pd, os, numpy as np
import plotly.express as px
pd.options.display.max_columns = 50
import swifter, datetime, pickle as pkl
import tensorflow_hub as hub
from tqdm.notebook import tqdm

In [2]:
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras import regularizers

In [3]:
with open('./data/cemb_map.pkl', 'rb') as handle:
    cemb_map = pkl.load(handle)
    
with open('./data/ctarget_map.pkl', 'rb') as handle:
    ctarget_map = pkl.load(handle)

In [4]:
len(cemb_map)

89033

In [5]:
len(ctarget_map)

97992

In [6]:
keys = list(cemb_map.keys())

In [7]:
for k in keys:
    if k not in ctarget_map:
        print(k)

In [8]:
X, Y = [], []

for key in tqdm(keys, total=len(keys)):
    X.append(cemb_map[key])
    Y.append(ctarget_map[key])

  0%|          | 0/89033 [00:00<?, ?it/s]

In [9]:
X = np.array(X)
Y = np.array(Y)

In [10]:
X.shape

(89033, 512)

In [11]:
Y.shape

(89033, 512)

In [12]:
from sklearn.model_selection import train_test_split

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [14]:
from sklearn.preprocessing import StandardScaler

In [15]:
scaler = StandardScaler()
scaler.fit(X_train)

StandardScaler()

In [16]:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [17]:
with open('./data/scaler.pkl', 'wb') as handle:
    pkl.dump(scaler, handle, protocol=3)

In [18]:
BATCH_SIZE = 1

In [19]:
fold_train_loss = []
fold_val_loss = []

train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
val_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))

train_dataset = train_dataset.batch(BATCH_SIZE)
val_dataset = val_dataset.batch(64)

train_dataset.prefetch(tf.data.AUTOTUNE)
val_dataset.prefetch(tf.data.AUTOTUNE)

<PrefetchDataset element_spec=(TensorSpec(shape=(None, 512), dtype=tf.float32, name=None), TensorSpec(shape=(None, 512), dtype=tf.float32, name=None))>

In [20]:
def build_and_compile_model():
    model = keras.Sequential([
        tf.keras.layers.InputLayer(
        input_shape=(X_train.shape[1],)),
        layers.Dense(25, activation='relu'),
        layers.Dense(12, activation='relu'),
        layers.Dense(12, activation='relu'),
        layers.Dense(12, activation='relu'),
        layers.Dense(512, activation='linear')
    ])

    model.compile(loss='mean_squared_error',
                optimizer=tf.keras.optimizers.SGD(0.01))
    return model

In [21]:
keras.backend.clear_session()
dnn = build_and_compile_model()
dnn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 25)                12825     
                                                                 
 dense_1 (Dense)             (None, 12)                312       
                                                                 
 dense_2 (Dense)             (None, 12)                156       
                                                                 
 dense_3 (Dense)             (None, 12)                156       
                                                                 
 dense_4 (Dense)             (None, 512)               6656      
                                                                 
Total params: 20,105
Trainable params: 20,105
Non-trainable params: 0
_________________________________________________________________


In [22]:
history = dnn.fit(
    train_dataset,
    validation_data=val_dataset,
    verbose=1, epochs=2, workers=1, shuffle=True)

Epoch 1/2
Epoch 2/2


In [23]:
dnn.save('./models/model1')

INFO:tensorflow:Assets written to: ./models/model1\assets


In [24]:
idx = 3

In [25]:
pred = dnn.predict(X_test[idx].reshape(1,-1))

In [26]:
L = tf.keras.losses.CosineSimilarity(axis=-1)

In [27]:
L(y_test[idx].reshape(1,-1), pred)

<tf.Tensor: shape=(), dtype=float32, numpy=-0.87329555>

In [28]:
y_test[idx][:10]

array([-0.03160512,  0.07280831, -0.06266174, -0.05880021,  0.04643148,
        0.05251106,  0.06651811,  0.07934297,  0.08166718, -0.03080929],
      dtype=float32)

In [29]:
pred[0][:10]

array([-0.04759209,  0.04022962, -0.02809096, -0.00954398,  0.02968043,
        0.02860958,  0.03878016,  0.0705606 ,  0.06684647, -0.01625933],
      dtype=float32)