In [1]:
import pickle as pkl
from pathlib import Path

import bs4
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras as K
import tensorflow_datasets as tfds

tf.__version__

'2.3.2'

In [2]:
def retrieve_first_image():
    files = Path('/mnt/data/car_auction_data/').glob("backup*")
    files = sorted(files)
    for file in files:
        print(str(file))
        data = pkl.load(open(file,"rb"))
        for row in data:
            if len(row['images']) > 0:
                im = tf.image.decode_jpeg(row['images'][0])
                im = tf.image.convert_image_dtype(im, tf.float32)
                im = tf.image.resize(im, [128, 128])
                # For later models where we might want to process everything at once
                desc =  tf.constant(row['description'], dtype=tf.string)
                yield im, tf.constant(row['price_float'], dtype=tf.float32)

            
# import itertools

# for image, label, desc in itertools.islice(retrieve_first_image(),3):
# #     image, label, desc = next(retrieve_first_image())
#     plt.figure()
#     plt.imshow(image.numpy())
#     plt.title(label.numpy())
#     print(desc.numpy(), image.shape, label.shape, desc.shape)

In [3]:
images = tf.data.Dataset.from_generator(retrieve_first_image, 
                                        output_types=(tf.float32, tf.float32), 
                                        output_shapes=((128,128,3),())
                                        )
# tfds.benchmark(images)

In [4]:
# for image, label in images.take(3):
#     plt.figure()
#     plt.imshow(image.numpy())
#     plt.title(label.numpy())

In [5]:
images = images.shuffle(1000).batch(50, drop_remainder=True)

model = K.Sequential([
    K.Input(shape=(128,128,3)),
    K.layers.Conv2D(16, (3,3), padding="SAME"),
    K.layers.BatchNormalization(),
    K.layers.Conv2D(16, (3,3), padding="SAME"),
#     K.layers.BatchNormalization(),
#     K.layers.MaxPool2D((2,2)),
#     K.layers.Conv2D(16, (3,3), padding="valid"),
#     K.layers.BatchNormalization(),
#     K.layers.Conv2D(16, (3,3), padding="valid"),
#     K.layers.BatchNormalization(),
#     K.layers.MaxPool2D((2,2)),
    K.layers.Flatten(),
    K.layers.Dense(64, activation="relu"),
    K.layers.Dense(32, activation="relu"),
    K.layers.Dense(1)
])

model.compile(loss="mse")

In [6]:
model.test_step(next(iter(images.take(1))))

/mnt/data/car_auction_data/backup_Convertible_0-1000_0_110.pickle
/mnt/data/car_auction_data/backup_Convertible_1000-2000_0_500.pickle
/mnt/data/car_auction_data/backup_Convertible_1000-2000_500_539.pickle
/mnt/data/car_auction_data/backup_Convertible_10000-11000_0_462.pickle


{'loss': <tf.Tensor: shape=(), dtype=float32, numpy=44806388.0>}

In [None]:
model.fit(images, epochs=1000)

Epoch 1/1000
/mnt/data/car_auction_data/backup_Convertible_0-1000_0_110.pickle
/mnt/data/car_auction_data/backup_Convertible_1000-2000_0_500.pickle
/mnt/data/car_auction_data/backup_Convertible_1000-2000_500_539.pickle
/mnt/data/car_auction_data/backup_Convertible_10000-11000_0_462.pickle
      1/Unknown - 0s 4ms/step - loss: 40477008.0000/mnt/data/car_auction_data/backup_Convertible_11000-12000_0_485.pickle
     10/Unknown - 24s 2s/step - loss: 35368884.0000/mnt/data/car_auction_data/backup_Convertible_2000-3000_0_490.pickle


In [None]:
for batch in images.take(1):
    fig = plt.figure(figsize=(8,32))
    axs = fig.subplots(25,2)
    ims = batch[0].numpy()
    prices = batch[1].numpy()
    i = 0
    for row in axs:
        for ax in row:
            ax.imshow(ims[i])
            ax.set_title(prices[i])
            i += 1
    fig.tight_layout(pad=1)

In [None]:
preds = []
prices = []

for ims, truth in images:
    preds.extend(model.predict(ims)[:,0])
    prices.extend(truth.numpy())

In [None]:
fig, ax = plt.subplots(1,1)
ax.set_xlim(0,1000)
ax.set_ylim(0,1000)
# ax.hist2d(prices, preds, bins=20)
ax.plot(prices, prices, color="black")
ax.scatter(prices,preds)