Załączenie potrzebnych bibliotek

In [95]:
import numpy as np
import matplotlib as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from os import listdir
import os
import glob

import xml.etree.ElementTree as ET

Przygotowanie zbioru danych do uczenia
Wykorzystywany zbiór danych: https://www.kaggle.com/datasets/andrewmvd/road-sign-detection?resource=download

In [96]:
labels_path = 'dataset/annotations/'
img_path = 'dataset/images/'

label_files = sorted(listdir(labels_path))
img_files = sorted(listdir(img_path))

cathegories = ['trafficlight', 'stop', 'speedlimit', 'crosswalk']

assert len(label_files) == len(img_files) # sprawdzenie, czy ilość zdjęć pokrywa się z ilością labeli

for i in range(len(label_files)): # sprawdzenie, czy nazwy plików i zdjęć są w odpowiedniej kolejności
    assert(label_files[i][:-4] == img_files[i][:-4])

def get_class_vector(classes): 
    tmp = [0, 0, 0, 0]

    for i in range(4):
        if(cathegories[i] in classes):
            tmp[i] = 1
    
    return tmp


label_vectors = []

for i in range(len(label_files)):
    xml_data = open('dataset/annotations/' + label_files[i], 'r').read()
    root = ET.XML(xml_data)
    
    data = []
    
    for i in root.findall('object'):
        c = i.find('name').text
        data.append(c)
    label_vectors.append(get_class_vector(data))

img_vector = []

for i in range(len(img_files)):
    img_vector.append(np.array(
        keras.utils.load_img(
            img_path + img_files[i],
            color_mode = 'grayscale',
            target_size=(180, 180)
        )).astype("float32")
    )
# print(label_vectors)

In [97]:
print(img_vector)

[array([[22., 22., 22., ..., 22., 22., 22.],
       [22., 22., 22., ..., 22., 22., 22.],
       [22., 22., 22., ..., 23., 23., 23.],
       ...,
       [21., 21., 21., ..., 22., 21., 21.],
       [21., 21., 21., ..., 22., 21., 21.],
       [21., 21., 21., ..., 21., 21., 21.]], dtype=float32), array([[190., 191., 191., ..., 195., 195., 195.],
       [190., 191., 191., ..., 195., 195., 195.],
       [190., 191., 191., ..., 195., 195., 195.],
       ...,
       [ 13.,  77.,  53., ..., 136., 132., 109.],
       [ 13.,  61.,  48., ..., 134., 132., 107.],
       [ 15.,  38.,  25., ..., 120., 130., 105.]], dtype=float32), array([[122., 123., 123., ..., 111., 112., 111.],
       [122., 123., 123., ..., 112., 112., 111.],
       [123., 124., 123., ..., 112., 112., 111.],
       ...,
       [ 57.,  55.,  50., ...,  91.,  91.,  90.],
       [ 57.,  55.,  49., ...,  92.,  91.,  89.],
       [ 57.,  56.,  48., ...,  92.,  91.,  89.]], dtype=float32), array([[255., 255., 255., ...,  34.,  36.,  38.]

Opracowanie architektury sieci CNN

In [98]:
data_augmentation = keras.Sequential(
[
layers.RandomFlip("horizontal"),
layers.RandomRotation(0.1),
layers.RandomZoom(0.2),
]
)

model_input = keras.Input(shape=(180, 180, 1)) # obrazki o wymiarach 180x180 
x = data_augmentation(model_input) # "powiększenie" zbioru danych
x = layers.Rescaling(1.0/255.0)(model_input) # przeskalowanie pikseli do zakresu [0-1]
x = layers.Conv2D(32, 3, activation='relu')(x) # architektura sieci
x = layers.Conv2D(32, 3, activation='relu')(x)
x = layers.MaxPool2D(2)(x)
x = layers.Conv2D(64, 3, activation='relu')(x)
x = layers.Conv2D(64, 3, activation='relu')(x)
x = layers.MaxPool2D(2)(x)
x = layers.Conv2D(128, 3, activation='relu')(x)
x = layers.Conv2D(128, 3, activation='relu')(x)
x = layers.MaxPool2D(2)(x)
x = layers.Conv2D(256, 3, activation='relu')(x)
x = layers.Conv2D(256, 3, activation='relu')(x)
x = layers.MaxPool2D(2)(x)
x = layers.Conv2D(512, 3, activation='relu')(x)
x = layers.Conv2D(512, 3, activation='relu')(x)
x = layers.Flatten()(x)
x = layers.Dropout(0.5)(x) # losowe odrzucenie połowy predykcji
outputs = layers.Dense(4, activation='softmax')(x) # wyjścia sieci (4 klasy - 4 wyjścia, softmax bo na obrazku może znajdować się więcej niż jedna klasa jednocześnie)

model = keras.Model(inputs=model_input, outputs=outputs) # utworzenie modelu
model.summary() # podsumowanie modelu

model.compile(
    optimizer="rmsprop",
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

Model: "model_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_15 (InputLayer)       [(None, 180, 180, 1)]     0         
                                                                 
 rescaling_13 (Rescaling)    (None, 180, 180, 1)       0         
                                                                 
 conv2d_119 (Conv2D)         (None, 178, 178, 32)      320       
                                                                 
 conv2d_120 (Conv2D)         (None, 176, 176, 32)      9248      
                                                                 
 max_pooling2d_48 (MaxPoolin  (None, 88, 88, 32)       0         
 g2D)                                                            
                                                                 
 conv2d_121 (Conv2D)         (None, 86, 86, 64)        18496     
                                                          

In [99]:
tensorboard = keras.callbacks.TensorBoard(
log_dir="logs1",
)

c = keras.callbacks.ModelCheckpoint(
filepath="model_huge.keras",
monitor="loss",
save_best_only=True,
)

model.fit(np.array(img_vector[:-200]), np.array(label_vectors[:-200]),
epochs=30,
callbacks=[tensorboard, c])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x1bd4382a580>

In [100]:
data_augmentation = keras.Sequential(
[
layers.RandomFlip("horizontal"),
layers.RandomRotation(0.1),
layers.RandomZoom(0.2),
]
)

model_input = keras.Input(shape=(180, 180, 1)) # obrazki o wymiarach 180x180 
x = data_augmentation(model_input) # "powiększenie" zbioru danych
x = layers.Rescaling(1.0/255.0)(model_input) # przeskalowanie pikseli do zakresu [0-1]
x = layers.Conv2D(32, 3, activation='relu')(x) # architektura sieci
x = layers.Conv2D(32, 3, activation='relu')(x)
x = layers.MaxPool2D(2)(x)
x = layers.Conv2D(64, 3, activation='relu')(x)
x = layers.Conv2D(64, 3, activation='relu')(x)
x = layers.MaxPool2D(2)(x)
x = layers.Conv2D(128, 3, activation='relu')(x)
x = layers.Conv2D(128, 3, activation='relu')(x)
x = layers.MaxPool2D(2)(x)
x = layers.Conv2D(256, 3, activation='relu')(x)
x = layers.Flatten()(x)
x = layers.Dropout(0.5)(x) # losowe odrzucenie połowy predykcji
outputs = layers.Dense(4, activation='softmax')(x) # wyjścia sieci (4 klasy - 4 wyjścia, softmax bo na obrazku może znajdować się więcej niż jedna klasa jednocześnie)

model = keras.Model(inputs=model_input, outputs=outputs) # utworzenie modelu
model.summary() # podsumowanie modelu

model.compile(
    optimizer="rmsprop",
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

Model: "model_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_16 (InputLayer)       [(None, 180, 180, 1)]     0         
                                                                 
 rescaling_14 (Rescaling)    (None, 180, 180, 1)       0         
                                                                 
 conv2d_129 (Conv2D)         (None, 178, 178, 32)      320       
                                                                 
 conv2d_130 (Conv2D)         (None, 176, 176, 32)      9248      
                                                                 
 max_pooling2d_52 (MaxPoolin  (None, 88, 88, 32)       0         
 g2D)                                                            
                                                                 
 conv2d_131 (Conv2D)         (None, 86, 86, 64)        18496     
                                                          

In [101]:
tensorboard = keras.callbacks.TensorBoard(
log_dir="logs2",
)

c = keras.callbacks.ModelCheckpoint(
filepath="model_medium.keras",
monitor="loss",
save_best_only=True,
)

model.fit(np.array(img_vector[:-200]), np.array(label_vectors[:-200]),
epochs=30,
callbacks=[tensorboard, c])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x1bd3b2d4e50>

In [102]:
data_augmentation = keras.Sequential(
[
layers.RandomFlip("horizontal"),
layers.RandomRotation(0.1),
layers.RandomZoom(0.2),
]
)

model_input = keras.Input(shape=(180, 180, 1)) # obrazki o wymiarach 180x180 
x = data_augmentation(model_input) # "powiększenie" zbioru danych
x = layers.Rescaling(1.0/255.0)(model_input) # przeskalowanie pikseli do zakresu [0-1]
x = layers.Conv2D(32, 3, activation='relu')(x) # architektura sieci
x = layers.Conv2D(32, 3, activation='relu')(x)
x = layers.MaxPool2D(2)(x)
x = layers.Conv2D(64, 3, activation='relu')(x)
x = layers.Conv2D(64, 3, activation='relu')(x)
x = layers.Flatten()(x)
x = layers.Dropout(0.5)(x) # losowe odrzucenie połowy predykcji
outputs = layers.Dense(4, activation='softmax')(x) # wyjścia sieci (4 klasy - 4 wyjścia, softmax bo na obrazku może znajdować się więcej niż jedna klasa jednocześnie)

model = keras.Model(inputs=model_input, outputs=outputs) # utworzenie modelu
model.summary() # podsumowanie modelu

model.compile(
    optimizer="rmsprop",
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

Model: "model_14"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_17 (InputLayer)       [(None, 180, 180, 1)]     0         
                                                                 
 rescaling_15 (Rescaling)    (None, 180, 180, 1)       0         
                                                                 
 conv2d_136 (Conv2D)         (None, 178, 178, 32)      320       
                                                                 
 conv2d_137 (Conv2D)         (None, 176, 176, 32)      9248      
                                                                 
 max_pooling2d_55 (MaxPoolin  (None, 88, 88, 32)       0         
 g2D)                                                            
                                                                 
 conv2d_138 (Conv2D)         (None, 86, 86, 64)        18496     
                                                          

In [103]:
tensorboard = keras.callbacks.TensorBoard(
log_dir="logs3",
)

c = keras.callbacks.ModelCheckpoint(
filepath="model_small.keras",
monitor="loss",
save_best_only=True,
)

model.fit(np.array(img_vector[:-200]), np.array(label_vectors[:-200]),
epochs=30,
callbacks=[tensorboard, c])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x1bd4068c790>