In [1]:
import pandas as pd
import PIL
import os
import shutil
import numpy as np
from natsort import natsorted
import cv2

basedir = r'../../02_data_crop/'
dir_dict = {
    'train': os.path.join(basedir, 'train'),
    'test': os.path.join(basedir, 'test')
}
print(dir_dict['train'])

../../02_data_crop/train


In [2]:
def get_images(images_path):
    # speichert Bilder als numpy array

    array_images = []
    train_or_test_folder = os.listdir(images_path)

    for folder in natsorted(train_or_test_folder):
        single_folder = os.path.join(images_path, folder)

        for file in os.listdir(single_folder):
            filepath = os.path.join(single_folder, file)

            if filepath.lower().endswith(('.jpeg', '.jpg')):
                image = cv2.resize(cv2.imread(filepath), (64, 64))  # resize Größe bestimmt durch vortainiertes Netz
                array_images.append(image)

    images = np.array(array_images)
    
    return images

In [3]:
def get_label(images_path):
    # speichert Lables als numpy array

    array_label = []
    for folder in natsorted(os.listdir(images_path)):
        label = int(folder)

        for file in os.listdir(os.path.join(images_path, folder)):
            array_label.append(label)
    labels = np.array(array_label)
        
    return labels

In [4]:
from tabulate import tabulate
images = get_images(dir_dict['train']) # train images
labels = get_label(dir_dict['train'])  # train labels

X_eval = get_images(dir_dict['test'])  # Evaluierungs Bilder
y_eval = get_label(dir_dict['test'])   # Evaluierungs Labels

In [5]:
from tabulate import tabulate

tab = [['Anzahl Bilder im train Verzeichnis:', images.shape],
       ['Anzahl Lables im train Verzeichnis:', labels.shape],['Anzahl Validierungsbilder:', X_eval.shape],['Anzahl Validierungslabels:', y_eval.shape]]

print(tabulate(tab, headers=["Kenngröße", "Wert"]))

Kenngröße                            Wert
-----------------------------------  ------------------
Anzahl Bilder im train Verzeichnis:  (10368, 64, 64, 3)
Anzahl Lables im train Verzeichnis:  (10368,)
Anzahl Validierungsbilder:           (1448, 64, 64, 3)
Anzahl Validierungslabels:           (1448,)


In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size = 0.2, stratify = labels)

In [7]:
from tabulate import tabulate

tab = [['Anzahl Trainingsbilder:', X_train.shape], ['Anzahl Trainingslabels:', y_train.shape],
       ['Anzahl Validierungsbilder:', X_eval.shape], ['Anzahl Validierungslabels:', y_eval.shape],
       ['Anzahl Testbilder:', X_test.shape], ['Anzahl Testlabels:', y_test.shape]]

print(tabulate(tab, headers=["Kenngröße", "Wert"]))

Kenngröße                   Wert
--------------------------  -----------------
Anzahl Trainingsbilder:     (8294, 64, 64, 3)
Anzahl Trainingslabels:     (8294,)
Anzahl Validierungsbilder:  (1448, 64, 64, 3)
Anzahl Validierungslabels:  (1448,)
Anzahl Testbilder:          (2074, 64, 64, 3)
Anzahl Testlabels:          (2074,)


In [8]:
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras import optimizers

model = models.Sequential()
model.add(layers.Conv2D(32, (3,3), activation='relu', input_shape=(200,200,3)))
model.add(layers.MaxPooling2D((2,2)))

model.add(layers.Conv2D(64,(3,3), activation='relu'))
model.add(layers.MaxPooling2D((2,2)))

model.add(layers.Conv2D(128,(3,3), activation='relu'))
model.add(layers.MaxPooling2D((2,2)))

model.add(layers.Conv2D(128,(3,3), activation='relu'))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(27, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])

  super().__init__(


In [9]:
# one hot encoding der Labels
# Binarisierung, entsprechende Klasse mit 1 gekennzeichnet
import keras.utils
y_train = keras.utils.to_categorical(y_train)
y_test = keras.utils.to_categorical(y_test)
y_eval = keras.utils.to_categorical(y_eval)

In [10]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1./255,
                                   rotation_range=40,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow(X_train, y_train, batch_size=20)

test_generator = test_datagen.flow(X_test, y_test, batch_size=20)

validation_generator = test_datagen.flow(X_eval, y_eval, batch_size=20)


In [11]:
print(len(train_generator)* 20)
print(type(train_generator))
tab = [['Anzahl Trainingsdatensatz:', len(train_generator)* 20], ['Datentyp:', type(train_generator) ],
       ['Anzahl Testdatensatz:', len(test_generator)* 20], ['Datentyp:', type(test_generator) ],
       ['Anzahl Validierungsdatensatz:', len(validation_generator)* 20], ['Datentyp:', type(validation_generator) ]]

print(tabulate(tab, headers=["Kenngröße", "Wert"]))

8300
<class 'keras.src.legacy.preprocessing.image.NumpyArrayIterator'>
Kenngröße                      Wert
-----------------------------  -----------------------------------------------------------------
Anzahl Trainingsdatensatz:     8300
Datentyp:                      <class 'keras.src.legacy.preprocessing.image.NumpyArrayIterator'>
Anzahl Testdatensatz:          2080
Datentyp:                      <class 'keras.src.legacy.preprocessing.image.NumpyArrayIterator'>
Anzahl Validierungsdatensatz:  1460
Datentyp:                      <class 'keras.src.legacy.preprocessing.image.NumpyArrayIterator'>


In [12]:
import os

# Pfad zum übergeordneten Ordner mit den Unterordnern
parent_folder = dir_dict['train']

# Initialisiere ein leeres Array, um die Anzahl der Bilder in jedem Unterordner zu speichern
num_images_per_folder = []

# Gehe durch jeden Unterordner im übergeordneten Ordner
for folder_name in natsorted(os.listdir(parent_folder)):
    folder_path = os.path.join(parent_folder, folder_name)

    # Zähle die Anzahl der Dateien (Bilder) im aktuellen Unterordner
    num_images = len([file for file in os.listdir(folder_path) if file.endswith('.jpg') or file.endswith('.png')])

    # Füge die Anzahl der Bilder dem Array hinzu
    num_images_per_folder.append(num_images)

print(num_images_per_folder)

[494, 444, 389, 393, 449, 390, 392, 395, 440, 321, 426, 430, 398, 428, 417, 396, 386, 398, 408, 409, 407, 425, 402, 381, 373, 177]


In [13]:
classes = len(y_train[0])
uniq_labels = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27]
print(classes)
print(uniq_labels)

27
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]


In [30]:
#from sklearn.utils import class_weight

#class_weights = class_weight.compute_class_weight(uniq_labels, y_train)

In [None]:
#weight = torch.tensor((0.15, 0.85))

In [14]:
from sklearn.utils import class_weight
class_weight = class_weight.compute_class_weight('balanced'
                                               ,np.unique(y_train)
                                               ,y_train)

TypeError: too many positional arguments

In [32]:
history = model.fit(train_generator, steps_per_epoch=100, epochs=5, validation_data=test_generator, validation_steps=50)

Epoch 1/5


ValueError: Exception encountered when calling Sequential.call().

[1mInput 0 of layer "dense" is incompatible with the layer: expected axis -1 of input shape to have value 12800, but received input with shape (None, 512)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(None, 64, 64, 3), dtype=float32)
  • training=True
  • mask=None

In [None]:
score = model.evaluate(test_generator, verbose = 0)
print('Accuracy for test images:', round(score[1]*100, 3), '%')
score = model.evaluate(validation_generator, verbose = 0)
print('Accuracy for evaluation images:', round(score[1]*100, 3), '%')