In [2]:
import os
import numpy as np
import pandas as pd

import sklearn
from sklearn.metrics import f1_score, r2_score, mean_squared_error

import tensorflow as tf
import tensorflow_addons as tfa

from metrics import score_regression
from preprocess import get_dataset, preprocess_all, dataset_to_X_y, SUBSET_FEATURES

In [3]:
dataset = get_dataset(shuffle=True, seed=42)
dataset = preprocess_all(dataset)
X_train, y_train, X_validation, y_validation = dataset_to_X_y(dataset, "all")

In [4]:
def to_tf_dataset(X, y, batch_size):
    X = X.astype(np.float32)
    y = y.astype(np.float32)
    data = tf.data.Dataset.from_tensor_slices((X,y)).batch(batch_size).prefetch(-1)
    return data

batch_size = 512
train_dataset = to_tf_dataset(X_train, y_train, batch_size)
validation_dataset = to_tf_dataset(X_validation, y_validation, batch_size)


In [5]:
import tensorflow.keras.backend as K
class CustomLoss(tf.keras.losses.Loss):
    def thresh(self, tensor, th):
        ones = tf.ones_like(tensor)
        #zeros = tf.zeros_like(tensor)
        mask = tf.greater(tensor, th)
        masked_tensor = tf.where(mask, tensor-tensor+1, tensor-tensor)
        #masked_tensor = tf.cast(masked_tensor, tensor.dtype)
        return masked_tensor
    
    def f1_score(self, y_true, y_pred):
        tp = tf.reduce_sum(y_true*y_pred) 
        tn = tf.reduce_sum((1-y_true) * (1-y_pred)) 
        fp = tf.reduce_sum((1-y_true) * y_pred)
        fn = tf.reduce_sum((1-y_pred) * y_true)

        precision = tp / (tp + fp + K.epsilon())
        recall = tp / (tp + fn + K.epsilon())

        f1 = 2 * precision * recall / (precision + recall + K.epsilon())
        return f1



    def call(self, y_true, y_pred):
      y_pred = tf.convert_to_tensor(y_pred)
      y_pred = tf.reshape(y_pred, (-1,))
      y_true = tf.convert_to_tensor(y_true)
      y_true = tf.reshape(y_true, (-1,))

      score = 0.
      for th in [500, 1400, 5000, 10000]:
        t_true = self.thresh(y_true, th)
        t_pred = self.thresh(y_pred, th)
        f1 = self.f1_score(t_true, t_pred)
        score += f1

      return 4/(score + K.epsilon())


def build_and_compile_model():
  model = tf.keras.Sequential([
      #tf.keras.layers.LayerNormalization(axis=1 , center=True , scale=True),
      tf.keras.layers.Dense(64),
      tf.keras.layers.ReLU(),
      tf.keras.layers.BatchNormalization(),
      tf.keras.layers.Dense(64),
      tf.keras.layers.ReLU(),
      tf.keras.layers.BatchNormalization(),
      tf.keras.layers.Dense(1),
      tf.keras.layers.Lambda(lambda x:250*x)
  ])

  model.compile(loss='mae',
                optimizer=tf.keras.optimizers.Adam(.001))

  return model

model = build_and_compile_model()

In [6]:
model.fit(
    train_dataset,
    epochs=100
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7f3ee0129700>

In [7]:
y_val_hat = model.predict(X_validation.astype(np.float32))
y_train_hat = model.predict(X_train.astype(np.float32))

print(f"Train score      : {score_regression(y_train, y_train_hat):.3f}")
print(f"Validation score : {score_regression(y_validation, y_val_hat):.3f}")
print("R2 score")
print(f"Train score      : {r2_score(y_train, y_train_hat):.3f}")
print(f"Validation score : {r2_score(y_validation, y_val_hat):.3f}")

Train score      : 0.501
Validation score : 0.417
R2 score
Train score      : 0.018
Validation score : -110577671567.003
