# Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import tensorflow as tf
from PIL import Image
import os
import keras
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from keras.models import Sequential, load_model
from keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Dropout
from google.colab import drive
from matplotlib import style
import random
style.use('fivethirtyeight')



from tensorflow import keras
from PIL import Image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import accuracy_score
np.random.seed(42)

# Data storage

In [None]:
# Montar Google Drive
drive.mount('/content/drive')
dataset_path = '/content/drive/MyDrive/TFG CODE/DATASET IDENTIFIER/'
os.chdir = dataset_path

Mounted at /content/drive


Useful paths for later on

In [None]:
train_path = '/content/drive/MyDrive/TFG CODE/DATASET IDENTIFIER/Train'
test_path = '/content/drive/MyDrive/TFG CODE/DATASET IDENTIFIER/Test'
data_dir = '/content/drive/MyDrive/TFG CODE/DATASET IDENTIFIER'

IMG_HEIGHT = 30
IMG_WIDTH = 30
channels = 3

In [None]:
NUM_CATEGORIES = len(os.listdir(train_path))

In [None]:
data = []
labels = []
# We have 43 different kinds of signals
classes = 2
cur_path = os.getcwd()

In [None]:
class_names = { 0:'No signal)', 1:'Signal' }

# Data processing


Very important part. A loop that checks all the data inside the train folder and labels it correctly and stores everything in each list (data and labels) created before. 1 by 1 it resizes the image to 30x30 pixels so later we will have no problems when training the AI with these images.

In [None]:
for i in range(classes):
    path = os.path.join(dataset_path, 'Train', str(i))
    images = os.listdir(path)
    for a in images:
        try:
            image = Image.open(os.path.join(path, a))
            image = image.resize((IMG_HEIGHT, IMG_WIDTH))
            image = np.array(image)
            data.append(image)
            labels.append(i)
        except Exception as e:
            print(e)

cannot identify image file '/content/drive/MyDrive/TFG CODE/DATASET IDENTIFIER/Train/1/GT-final_test.csv'


In [None]:
# Convertir listas a numpy arrays
data = np.array(data)
labels = np.array(labels)

In [None]:
print(data.shape, labels.shape)

(24630, 30, 30, 3) (24630,)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=0)

In [None]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(19704, 30, 30, 3) (4926, 30, 30, 3) (19704,) (4926,)


In [None]:
y_train = to_categorical(y_train, 43)
y_test = to_categorical(y_test, 43)

Splitting the data into train and validation set

In [None]:
shuffle_indexes = np.arange(data.shape[0])
np.random.shuffle(shuffle_indexes)
data = data[shuffle_indexes]
labels = labels[shuffle_indexes]

In [None]:
random_state=random.randint(1,10000)
X_train, X_val, y_train, y_val = train_test_split(data, labels, test_size=0.3, random_state=42, shuffle=True)   #random.randint(1,10000)

X_train = X_train/255
X_val = X_val/255

print("Random: ", random_state)
print("X_train.shape", X_train.shape)
print("X_valid.shape", X_val.shape)
print("y_train.shape", y_train.shape)
print("y_valid.shape", y_val.shape)

Random:  4281
X_train.shape (17241, 30, 30, 3)
X_valid.shape (7389, 30, 30, 3)
y_train.shape (17241,)
y_valid.shape (7389,)


One hot encoding the labels

In [None]:
y_train = keras.utils.to_categorical(y_train, classes)
y_val = keras.utils.to_categorical(y_val, classes)

print(y_train.shape)
print(y_val.shape)

(17241, 2)
(7389, 2)


# Model


In [None]:
!pip install keras-tuner


Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/129.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m122.9/129.1 kB[0m [31m4.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [None]:
aug = ImageDataGenerator(
    rotation_range=10,
    zoom_range=0.15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.15,
    horizontal_flip=False,
    vertical_flip=False,
    fill_mode="nearest")



In [None]:
import kerastuner as kt

def model_builder(hp):

    model = keras.Sequential()

    hp_filters1 = hp.Int('filters1', min_value=16, max_value=128, step=16)
    model.add(keras.layers.Conv2D(filters=hp_filters1, kernel_size=(3, 3), activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, channels)))

    hp_filters2 = hp.Int('filters2', min_value=32, max_value=256, step=32)
    model.add(keras.layers.Conv2D(filters=hp_filters2, kernel_size=(3, 3), activation='relu'))

    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(keras.layers.BatchNormalization(axis=-1))

    model.add(keras.layers.Conv2D(filters=64, kernel_size=(3,3), activation='relu'))
    model.add(keras.layers.Conv2D(filters=128, kernel_size=(3,3), activation='relu'))
    model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(keras.layers.BatchNormalization(axis=-1))
    model.add(keras.layers.Flatten())

    hp_units = hp.Int('units', min_value=256, max_value=1024, step=256)
    model.add(keras.layers.Dense(units=hp_units, activation='relu'))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Dropout(rate=0.5))
    model.add(keras.layers.Dense(2, activation='softmax'))

    model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    return model


tuner = kt.Hyperband(model_builder,
                     objective='val_accuracy',
                     max_epochs=10,

                     factor=3,
                     directory='my_dir',
                     project_name='traffic_sign_recognition')

stop_early = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

tuner.search(aug.flow(X_train, y_train, batch_size=32),
             epochs=30,
             validation_data=(X_val, y_val),
             callbacks=[stop_early])

best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"The optimal number of filters in the first Conv2D layer is {best_hps.get('filters1')}")
print(f"The optimal number of filters in the second Conv2D layer is {best_hps.get('filters2')}")
print(f"The optimal learning rate for the optimizer is {best_hps.get('learning_rate')}")
print(f"The optimal number of units in the Dense layer is {best_hps.get('units')}")


Trial 30 Complete [00h 22m 40s]
val_accuracy: 0.9975639581680298

Best val_accuracy So Far: 0.9978346228599548
Total elapsed time: 06h 10m 17s
The optimal number of filters in the first Conv2D layer is 32
The optimal number of filters in the second Conv2D layer is 32
The optimal learning rate for the optimizer is 0.0001
The optimal number of units in the Dense layer is 768


In [None]:
# Build the model with the optimal hyperparameters and train it

model = tuner.hypermodel.build(best_hps)
history = model.fit(aug.flow(X_train, y_train, batch_size=32),
                    epochs=30,
                    validation_data=(X_val, y_val))

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
save_path = '/content/drive/MyDrive/TFG CODE/model_identifier.h5'
model.save(save_path)

# Results


In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(10, 10))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
import pandas as pd
import numpy as np
import cv2
from PIL import Image
from sklearn.metrics import accuracy_score

# Assuming you have already defined IMG_HEIGHT, IMG_WIDTH, data_dir, and model

# Load the test data
test = pd.read_csv(data_dir + '/Test.csv')

labels = test["classID"].values
imgs = test["path"].values

data = []

for img in imgs:
    try:
        image = cv2.imread(data_dir + '/Test/' + img)
        if image is None:
            print("Error in " + img + ": Image not found or unable to read")
            continue
        image_fromarray = Image.fromarray(image, 'RGB')
        resize_image = image_fromarray.resize((IMG_HEIGHT, IMG_WIDTH))
        data.append(np.array(resize_image))
    except Exception as e:
        print("Error in " + img + ": " + str(e))

# Preprocess the data
X_test = np.array(data)
X_test = X_test / 255.0

# Make predictions
pred_probs = model.predict(X_test)
pred = np.argmax(pred_probs, axis=1)

# Calculate accuracy
print('Test Data accuracy: ', accuracy_score(labels, pred) * 100)

In [None]:
from sklearn.metrics import classification_report
print(classification_report(labels, pred))

In [None]:
import matplotlib.pyplot as plt
import random

plt.figure(figsize=(25, 25))

# Randomly select 25 indices from the test dataset
random_indices = random.sample(range(len(X_test)), 25)

for i, idx in enumerate(random_indices):
    plt.subplot(5, 5, i + 1)
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])

    # Get the predicted and actual labels for the current sample
    prediction = np.argmax(pred[idx])  # Convert probabilities to class labels
    actual = labels[idx]

    # Set the color based on whether the prediction matches the actual label
    col = 'g' if prediction == actual else 'r'

    actual_label = classes[actual]
    pred_label = classes[prediction]
    plt.xlabel('Actual={}\n({})\nPred={}\n({})'.format(actual, actual_label, prediction, pred_label), color=col)
    plt.imshow(X_test[idx])

# Adjust the spacing between subplots
plt.subplots_adjust(hspace=0.5, wspace=0.3)

plt.show()
