In [2]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from functools import partial

# Loading data

use tf.keras.datasets

In [None]:
(train_data, train_labels), (test_data, test_labels) = tf.keras.datasets.imdb.load_data(num_words=10_000)

get file from url

In [7]:
url = ""
ds = tf.keras.utils.get_file(origin=url, untar=True, extract=True, cache_dir=".")

In [None]:
# use pandas to read in csv
df = pd.read_csv("path/to/downloaded/file.csv")

In [10]:
# use 'with open' to open the textfile that you just got from the url
with open ("./test.txt", "r") as f:
  text = f.read()
text

'test\ndit\nis\neen\ntest'

In [16]:
with open ("./test.txt", "r") as f:
  text1 = f.read().splitlines()
text1

['test', 'dit', 'is', 'een', 'test']

In [12]:
with open ("./test.txt", "r") as f:
  text2 = f.readlines()
text2

['test\n', 'dit\n', 'is\n', 'een\n', 'test']

In [13]:
with open ("./test.txt", "r") as f:
  text3 = f.readline()
text3

'test\n'

use tensorflow datasets

In [None]:
train_set, valid_set, test_set = tfds.load(
    name="caltech101",
    split=["train", "test[50%:]", "test[:50%]"], # 100% training set, first 50% of testset for validation, last 50% of testset for testing
    as_supervised=True # Note use of as_supervised=True
)

train_set.cardinality(), valid_set.cardinality(), test_set.cardinality() # To get the size of the dataset

use tf.data.Dataset

In [None]:
tf.data.Dataset.range(100)

In [19]:
ds = tf.data.Dataset.from_tensor_slices(text1)

for item in ds.take(1):
  print(item)

tf.Tensor(b'test', shape=(), dtype=string)


use timeseries_dataset_from_array

In [None]:
data = np.arange(0,100)

ds1 = tf.keras.utils.timeseries_dataset_from_array(
    data=data,
    targets=data[10:], # meestal [sequence length:] Of None
    sequence_length=10,
    sequence_stride=10,
    sampling_rate=1, # default
    batch_size=None
)

# Preprocessing data

#### reshape

In [30]:
X = tf.random.uniform(shape=(10, 28, 28, 3))
X_1 = tf.reshape(X, [10, 28*28, 3])
X_2 = tf.keras.layers.Reshape([28*28, 3])(X) # bij de layer geen batch_size invoegen
X_3 = np.reshape(X, [10, 28*28, 3])
print(f"Original: \n{X.shape}\nNew: \n{X_1.shape}\n{X_2.shape}\n{X_3.shape}")

Original: 
(10, 28, 28, 3)
New: 
(10, 784, 3)
(10, 784, 3)
(10, 784, 3)


#### resize

In [41]:
height, width = 200, 150
image = X[0]

def resize_img(image, height, width):
  return tf.keras.layers.Resizing(height, width, crop_to_aspect_ratio=True)(image)

resize_img(image, height, width).shape

TensorShape([200, 150, 3])

#### scaling

In [33]:
A = np.array([20, 2, 29, 49, 68, 122, 34, 77, 167, 211, 234, 254, 1, 255]) # random array
A_scale = A / np.max(A)
A_scale

array([0.07843137, 0.00784314, 0.11372549, 0.19215686, 0.26666667,
       0.47843137, 0.13333333, 0.30196078, 0.65490196, 0.82745098,
       0.91764706, 0.99607843, 0.00392157, 1.        ])

#### normaliseren

In [39]:
normalize = tf.keras.layers.Normalization()
normalize.adapt(X)
X_normalized = normalize(X)
print(f"X: {X[0][0][0]}, Normalized: {X_normalized[0][0][0]}")

X: [0.02371061 0.21730173 0.5411055 ], Normalized: [-1.6512237  -0.9754501   0.13042599]


#### textvectorization

- `standardize` options
  - None: No standardization.
  - "lower_and_strip_punctuation": Text will be lowercased and all punctuation removed.
  - "lower": Text will be lowercased.
  - "strip_punctuation": All punctuation will be removed.

- `split` options
  - None: No splitting
  - "whitespace": Split on whitespace.
  - "character": Split on each unicode character.

- `output_mode` options
  - 'int'
  - 'tf_idf'
  - 'multi_hot'
  - 'count'

DEFAULT = standardize='lower_and_strip_punctuation', split='whitespace', output_mode='int'

In [None]:
text_encoding = tf.keras.layers.TextVectorization(standardize="lower_and_strip_punctuation", split="whitespace", output_mode="int")

#### category encoding

In [None]:
onehot_layer = tf.keras.layers.CategoryEncoding(num_tokens=4, output_mode='one_hot') # count en multi_hot kunnen ook als output_mode

In [44]:
y = [1, 2, 5, 2, 0]
one_hot = tf.keras.utils.to_categorical(y, num_classes=10) # extra classes worden achteraan toegevoegd
one_hot

array([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)

#### numerical encoding

In [46]:
age = tf.constant([[11.], [22.], [33.], [92.], [18.], [50.]])
discretization = tf.keras.layers.Discretization(bin_boundaries=[18., 50.]) # left inclusive, right exclusive

age_categories = discretization(age)
age_categories

<tf.Tensor: shape=(6, 1), dtype=int64, numpy=
array([[0],
       [1],
       [1],
       [2],
       [1],
       [2]])>

In [49]:
age = tf.constant([[10.], [88.], [77.], [16.], [45.], [25.], [35.], [18.], [65.]])
discretize_layer = tf.keras.layers.Discretization(bin_boundaries=[18., 35., 65.]) # 35 = 2 -> inclusive, 65 = 3 -> inclusive
age_categories = discretize_layer(age)
age_categories

<tf.Tensor: shape=(9, 1), dtype=int64, numpy=
array([[0],
       [3],
       [3],
       [0],
       [2],
       [1],
       [2],
       [1],
       [3]])>

#### text encoding

In [50]:
cities = ["Brussel", "Aalst", "Gent", "Kortrijk", "Gent"]
str_lookup_layer = tf.keras.layers.StringLookup() # kan output_mode hebben bvb one_hot | kan ook num_oov_indices=5 -> in dit geval eerste 5 indexen voorbehouden voor out-of-vocabulary
str_lookup_layer.adapt(cities)
print(str_lookup_layer([["Kortrijk"], ["Aalst"], ["Brussel"], ["Brugge"], ["Gent"]]))



tf.Tensor(
[[2]
 [4]
 [3]
 [0]
 [1]], shape=(5, 1), dtype=int64)


In [55]:
str_lookup_layer = tf.keras.layers.StringLookup(num_oov_indices=5)
str_lookup_layer.adapt(cities)
print(str_lookup_layer([["Kortrijk"], ["Aalst"], ["Brussel"], ["Brugge"], ["Gent"], ["Beveren"], ["Oosterzele"], ["Waregem"]]))

tf.Tensor(
[[6]
 [8]
 [7]
 [1]
 [5]
 [1]
 [1]
 [1]], shape=(8, 1), dtype=int64)


#### build in preprocess function from available models for transferlearning

In [None]:
preprocess = tf.keras.applications.model.preprocess_input # model is a placeholder, change it to name of the model (mobilenet_v2 for example)

train_set = train_set.map(lambda x, y: (preprocess(x), y))

#### data augmentation

In [45]:
data_augmentation = tf.keras.Sequential([
  tf.keras.layers.RandomFlip(mode="horizontal", seed=42),
  tf.keras.layers.RandomRotation(factor=0.2, seed=42, fill_mode="constant", fill_value=0), # wanneer het in graden wordt gevraagd, bvb 20 graden rotation -> factor=20/360 want wordt door tensorflow *2pi gedaan
  tf.keras.layers.RandomContrast(factor=0.2, seed=42),
  tf.keras.layers.RandomZoom(height_factor=0.15, seed=42, fill_mode="constant", fill_value=0) # fill_mode=constant -> zero padding toegevoegd wanneer volledige image niet beschikbaar is
])

# Build + compile model

### Sequential

#### DNN

In [None]:
model_dnn = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(10, 10)), # Dense verwacht 1D iput -> in ons geval batchsize, 10, 10 -> batchsize, 100
    tf.keras.layers.Dense(units=16, activation="relu")
    tf.keras.layers.Dense(units=10, activation="softmax") # units=1, activation="sigmoid" voor binary classification
])

#### Conv

In [73]:
model_conv = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(32, 32, 3)),
    tf.keras.layers.Rescaling(scale=1./255),

    tf.keras.layers.Conv2D(filters=32, kernel_size=(3,3), padding="same", activation="relu"),
    tf.keras.layers.Conv2D(filters=32, kernel_size=(3,3), padding="same", activation="relu"),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),

    tf.keras.layers.Conv2D(filters=64, kernel_size=(3,3), padding="same", activation="relu"),
    tf.keras.layers.Conv2D(filters=64, kernel_size=(3,3), padding="same", activation="relu"),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=128, activation="relu"),
    tf.keras.layers.Dense(units=10, activation="softmax")
])
model_conv.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling_3 (Rescaling)     (None, 32, 32, 3)         0         
                                                                 
 conv2d_16 (Conv2D)          (None, 32, 32, 32)        896       
                                                                 
 conv2d_17 (Conv2D)          (None, 32, 32, 32)        9248      
                                                                 
 max_pooling2d_6 (MaxPoolin  (None, 16, 16, 32)        0         
 g2D)                                                            
                                                                 
 conv2d_18 (Conv2D)          (None, 16, 16, 64)        18496     
                                                                 
 conv2d_19 (Conv2D)          (None, 16, 16, 64)        36928     
                                                      

#### FCN

In [74]:
DefaultConv2D = partial(tf.keras.layers.Conv2D,
                        kernel_size=(3,3), padding="same",
                        activation="relu")

model_fcn = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(32, 32, 3)),
    tf.keras.layers.Rescaling(scale=1./255),

    tf.keras.layers.Conv2D(filters=32, kernel_size=(3,3), padding="same", activation="relu"), #  hiervan partial maken en aanroepen met DefaultConv2D(filters=32)
    tf.keras.layers.Conv2D(filters=32, kernel_size=(3,3), padding="same", activation="relu"),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),

    tf.keras.layers.Conv2D(filters=64, kernel_size=(3,3), padding="same", activation="relu"),
    tf.keras.layers.Conv2D(filters=64, kernel_size=(3,3), padding="same", activation="relu"),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),

    tf.keras.layers.Conv2D(filters=128, kernel_size=(8, 8), padding="valid", activation="relu", strides=(1,1)),
    tf.keras.layers.Conv2D(filters=10, kernel_size=(1, 1), padding="valid", activation="softmax", strides=(1,1))
])
model_fcn.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling_4 (Rescaling)     (None, 32, 32, 3)         0         
                                                                 
 conv2d_20 (Conv2D)          (None, 32, 32, 32)        896       
                                                                 
 conv2d_21 (Conv2D)          (None, 32, 32, 32)        9248      
                                                                 
 max_pooling2d_8 (MaxPoolin  (None, 16, 16, 32)        0         
 g2D)                                                            
                                                                 
 conv2d_22 (Conv2D)          (None, 16, 16, 64)        18496     
                                                                 
 conv2d_23 (Conv2D)          (None, 16, 16, 64)        36928     
                                                      

#### RNN

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.SimpleRNN(16, input_shape=[None, 14]), # None -> om variabele sequence length te kunnen hebben, 14 = aantal features
    tf.keras.layers.Dense(1)
])

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.SimpleRNN(16, input_shape=[None, 14], return_sequences=True), # Return sequences True nodig bij meer dan 1 laag RNN -> anders error
    tf.keras.layers.SimpleRNN(8)
    tf.keras.layers.Dense(1)
])

optimizers

In [None]:
adam = tf.keras.optimizers.Adam(learning_rate=0.001)
adam_w = tf.keras.optimizers.AdamW(learning_rate=0.001)
sgd = tf.keras.optimizers.SGD(learning_rate=0.001)
rmsprop = tf.keras.optimizers.RMSprop(learning_rate=0.001)

top-k metrics

In [None]:
top1 = tf.keras.metrics.SparseTopKCategoricalAccuracy(k=1, name="top_1_acc") # dit is hetzelfde als metrics=["accuracy"]
top3 = tf.keras.metrics.SparseTopKCategoricalAccuracy(k=3, name="top_3_acc")
top5 = tf.keras.metrics.SparseTopKCategoricalAccuracy(k=5, name="top_5_acc")

In [None]:
model_dnn.compile(optimizer=adam, loss="sparse_categorical_crossentropy", metrics=["accuracy", "mse", "mae", "mape", top3, top5])
# "adam" kan ook -> dan is het met default settings
# loss="binary_crossentropy" bij sigmoid activation

callbacks

In [None]:
early_stop = tf.keras.callbacks.EarlyStopping(patience=5, min_delta=0.001, monitor='val_loss', restore_best_weights=True) # monitor='val_accuracy'
model_checkpoint = tf.keras.callbacks.ModelCheckpoint("name.keras", monitor='val_loss', save_best_only=True)