In [1]:
import os
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from pathlib import Path
import preprocessing


current_folder = Path()
dataset_folder = current_folder / "dataset"
images_folder = dataset_folder / "images"
models_folder = current_folder / "models"
logs_folder = current_folder / "logs"

X_train, X_test, y_train, y_test = preprocessing.get_dataset()
X_train = np.asarray(X_train)
X_test = np.asarray(X_test)
y_train = np.asarray(y_train)
y_test = np.asarray(y_test)

INFO: modified dataset already created


In [2]:
np.unique(y_train, return_counts=True)

(array(['crosswalk', 'speedlimit', 'stop', 'trafficlight'], dtype='<U12'),
 array([114, 554,  67,  91]))

In [3]:
#! pip install imblearn



In [4]:
from imblearn.over_sampling import SMOTE

X_flat = np.reshape(X_train, (X_train.shape[0], int(np.product(X_train.shape) / X_train.shape[0])))

sm = SMOTE(n_jobs=-1, random_state=42)
X_train_os, y_train_os = sm.fit_resample(X_flat, y_train)

X_train_os_rs = np.reshape(X_train_os, tuple([X_train_os.shape[0]]) + X_train.shape[1:])

X_train = X_train_os_rs
y_train = y_train_os



In [5]:
np.unique(y_train, return_counts=True)

(array(['crosswalk', 'speedlimit', 'stop', 'trafficlight'], dtype='<U12'),
 array([554, 554, 554, 554]))

In [6]:
# One hot encoding
label_enc = sklearn.preprocessing.LabelEncoder()
y_train = label_enc.fit_transform(y_train)
y_test = label_enc.transform(y_test)
one_hot = sklearn.preprocessing.OneHotEncoder(sparse=False)
y_train = one_hot.fit_transform(y_train.reshape(-1, 1))
y_test = one_hot.transform(y_test.reshape(-1, 1))

X_train, X_valid, y_train, y_valid = train_test_split(
    X_train, y_train, test_size=0.25, random_state=54
)



In [7]:
print(np.sum(y_train, axis=0) / len(y_train))
print(np.sum(y_test, axis=0) / len(y_train))

[0.25391095 0.24488568 0.24909747 0.2521059 ]
[0.0198556  0.08303249 0.00962696 0.01203369]


In [11]:
pip install tensorflow

Collecting tensorflow
  Using cached tensorflow-2.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (588.3 MB)
Collecting grpcio<2.0,>=1.24.3
  Downloading grpcio-1.51.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.8/4.8 MB[0m [31m337.5 kB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting keras<2.12,>=2.11.0
  Downloading keras-2.11.0-py2.py3-none-any.whl (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m981.3 kB/s[0m eta [36m0:00:00[0m:01[0m00:01[0m0m
[?25hCollecting absl-py>=1.0.0
  Downloading absl_py-1.4.0-py3-none-any.whl (126 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.5/126.5 kB[0m [31m593.0 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting tensorflow-estimator<2.12,>=2.11.0
  Using cached tensorflow_estimator-2.11.0-py2.py3-none-any.whl (439 kB)
Collecting h5py>=2.9.0
  Dow

In [12]:
import tensorflow as tf
from tensorflow import keras
from functools import partial

# Keras model
DefaultConv2D = partial(
    keras.layers.Conv2D, kernel_size=3, activation="relu", padding="SAME"
)

model = keras.Sequential(
    [
        DefaultConv2D(
            filters=64, kernel_size=7, input_shape=list(X_train[0].shape)
        ),  # was 28, 28, 1
        keras.layers.MaxPooling2D(pool_size=2),
        DefaultConv2D(filters=128),
        DefaultConv2D(filters=128),
        keras.layers.MaxPooling2D(pool_size=2),
        DefaultConv2D(filters=256),
        DefaultConv2D(filters=256),
        keras.layers.MaxPooling2D(pool_size=2),
        keras.layers.Flatten(),
        keras.layers.Dense(units=128, activation="relu"),
        keras.layers.Dropout(0.2),  # lower less regularization
        keras.layers.Dense(units=64, activation="relu"),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(units=4, activation="softmax"),
    ]
)

model.compile(
    loss="categorical_crossentropy",
    optimizer="sgd",
    metrics=["accuracy"],
)

# loss
# sparse_categorical_crossentropy used for sparse labels (target class index)
# categorial_cross_entropy would yield a one-hot vector (only one positive label)
# mean_squared_error for regression

tensorboard_cb = tf.keras.callbacks.TensorBoard(
    log_dir=logs_folder,
    histogram_freq=1,
)
early_stopping_cb = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=3,
    restore_best_weights=False,
)

print("Start training")
history = model.fit(
    X_train,
    y_train,
    epochs=500,
    validation_data=(X_valid, y_valid),
    callbacks=[tensorboard_cb, early_stopping_cb],
)

# Save model
os.makedirs(models_folder, exist_ok=True)
model.save(models_folder / "model1.h5")

2023-01-22 03:11:29.626705: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-01-22 03:11:34.210074: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/page/anaconda3/envs/sds_python/lib/python3.10/site-packages/cv2/../../lib64:/usr/local/webots/lib/controller
2023-01-22 03:11:34.210141: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-01-22 03:14:35.410223: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could 

Start training


2023-01-22 03:18:20.525998: W tensorflow/tsl/framework/cpu_allocator_impl.cc:82] Allocation of 199440000 exceeds 10% of free system memory.


Epoch 1/500


2023-01-22 03:18:23.125681: W tensorflow/tsl/framework/cpu_allocator_impl.cc:82] Allocation of 18874368 exceeds 10% of free system memory.


Epoch 2/500
Epoch 3/500
Epoch 4/500

KeyboardInterrupt: 

In [13]:
# Evaluate per class
y_pred = model.predict(X_test)
Y_pred = np.argmax(y_pred, axis=1)  # one-hot to index
Y_test = np.argmax(y_test, axis=1)
print(
    sklearn.metrics.classification_report(
        Y_test, Y_pred, target_names=label_enc.classes_
    )
)

# Evaluate general
test_results = model.evaluate(X_test, y_test)  # loss and metrics
print(f"Test Data - Loss: {test_results[0]:.3f}, Metrics: {test_results[1:]}")

              precision    recall  f1-score   support

   crosswalk       0.97      1.00      0.99        33
  speedlimit       0.99      1.00      0.99       138
        stop       0.89      1.00      0.94        16
trafficlight       1.00      0.75      0.86        20

    accuracy                           0.98       207
   macro avg       0.96      0.94      0.94       207
weighted avg       0.98      0.98      0.97       207

Test Data - Loss: 0.062, Metrics: [0.9758453965187073]
