In [1]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing

In [2]:
df = pd.read_csv('../datasets/autoria/autoria_clean_data.csv')

In [3]:
df.head()

Unnamed: 0,id,brand,price_USD,mileage_kkm,fuel_type,transmission_type,pub_date,year_made,model,engine_size
0,0,Mercedes-Benz,8999,159,gas/petrol,automatic,2021-05-21,2007,E 280 4MATIC AT CLASSIC,3.0
1,1,Mercedes-Benz,79900,103,diesel,automatic,2021-05-15,2015,G 350 Designo,3.0
2,2,BMW,15999,145,petrol,automatic,2021-04-18,2010,550,4.4
3,3,MINI,10200,111,petrol,automatic,2021-05-21,2014,Countryman S,1.6
4,4,Nissan,13900,97,petrol,automatic,2021-05-19,2010,X-Trail,2.0


In [4]:
orig_df = df.copy()

# Selecting features and labels
X_features = orig_df[['mileage_kkm', 'year_made', 'engine_size']].head(400)
Y_labels = orig_df.pop('price_USD').head(400)


#X_features = np.array(X_features)
#X_features

In [5]:
# Create a symbolic input
input = tf.keras.Input(shape=(), dtype=tf.float32)

# Do a calculation using is
result = 2*input + 1

# the result doesn't have a value
result

<KerasTensor: shape=(None,) dtype=float32 (created by layer 'tf.__operators__.add')>

In [6]:
calc = tf.keras.Model(inputs=input, outputs=result)

In [7]:
print(calc(1).numpy())
print(calc(2).numpy())

3.0
5.0


In [8]:
inputs = {}

for name, column in X_features.items():
  dtype = column.dtype
  if dtype == object:
    dtype = tf.string
  else:
    dtype = tf.float32

  inputs[name] = tf.keras.Input(shape=(1,), name=name, dtype=dtype)

inputs

{'mileage_kkm': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'mileage_kkm')>,
 'year_made': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'year_made')>,
 'engine_size': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'engine_size')>}

In [9]:
numeric_inputs = {name:input for name,input in inputs.items()
                  if input.dtype==tf.float32}

x = layers.Concatenate()(list(numeric_inputs.values()))
norm = preprocessing.Normalization()
norm.adapt(np.array(orig_df[numeric_inputs.keys()]))
all_numeric_inputs = norm(x)

all_numeric_inputs

<KerasTensor: shape=(None, 3) dtype=float32 (created by layer 'normalization')>

In [10]:
preprocessed_inputs = [all_numeric_inputs]

In [11]:
for name, input in inputs.items():
  if input.dtype == tf.float32:
    continue

  lookup = preprocessing.StringLookup(vocabulary=np.unique(X_features[name]))
  one_hot = preprocessing.CategoryEncoding(max_tokens=lookup.vocab_size())

  x = lookup(input)
  x = one_hot(x)
  preprocessed_inputs.append(x)

In [12]:
preprocessing = tf.keras.Model(inputs, preprocessed_inputs)

tf.keras.utils.plot_model(model = preprocessing , rankdir="LR", dpi=72, show_shapes=True)

('Failed to import pydot. You must `pip install pydot` and install graphviz (https://graphviz.gitlab.io/download/), ', 'for `pydotprint` to work.')


In [13]:
model_features_dict = {name: np.array(value) 
                         for name, value in X_features.items()}

In [14]:
features_dict = {name:values[:1] for name, values in model_features_dict.items()}
preprocessing(features_dict)

<tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[-2.2400647e-01, -1.7184177e-01,  3.0000000e+07]], dtype=float32)>

In [16]:
def prediction_model(preprocessing_head, inputs):
  body = tf.keras.Sequential([
    layers.Dense(64),
    layers.Dense(1)
  ])

  preprocessed_inputs = preprocessing_head(inputs)
  result = body(preprocessed_inputs)
  model = tf.keras.Model(inputs, result)

  model.compile(loss=tf.losses.BinaryCrossentropy(from_logits=True),
                optimizer=tf.optimizers.Adam())
  return model

model = prediction_model(preprocessing, inputs)

In [17]:
model.fit(x=model_features_dict, y=Y_labels, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f73f84d7a90>