In [11]:
import requests
import zipfile
import numpy as np
import tensorflow as tf

tf.config.run_functions_eagerly(True)

In [12]:
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

In [13]:
# Getting the data 1
url = 'https://github.com/SVizor42/ML_Zoomcamp/releases/download/straight-curly-data/data.zip'
dataset_path = '../datasets/'
response = requests.get(url)
with open(f'{dataset_path}data.zip', 'wb') as file:
    for chunk in response.iter_content(chunk_size=1024):
        file.write(chunk)

In [14]:
with zipfile.ZipFile(f'{dataset_path}data.zip') as zip_ref:
    zip_ref.extractall(f'{dataset_path}/hairs-dataset')

In [15]:
# Define CNN the model

# noinspection PyUnresolvedReferences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.optimizers import SGD

model = Sequential()
model.add(Input(shape=(200, 200, 3)))
model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
optimizer = SGD(learning_rate=0.002, momentum=0.8)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
model.summary()

In [16]:
# Create the data generator, load data

# noinspection PyUnresolvedReferences
from tensorflow.keras.preprocessing.image import ImageDataGenerator

gen = ImageDataGenerator(rescale=1. / 255)

train_dataset = gen.flow_from_directory(
    '../datasets/hairs-dataset/data/train',
    target_size=(200, 200),
    batch_size=20,
    shuffle=True,
    class_mode='binary'
)

test_dataset = gen.flow_from_directory(
    '../datasets/hairs-dataset/data/test',
    target_size=(200, 200),
    batch_size=20,
    shuffle=True,
    class_mode='binary'
)

Found 800 images belonging to 2 classes.
Found 201 images belonging to 2 classes.


In [17]:
# Train the model
history = model.fit(train_dataset, epochs=10, validation_data=test_dataset)

Epoch 1/10


  self._warn_if_super_not_called()


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 154ms/step - accuracy: 0.5835 - loss: 0.6810 - val_accuracy: 0.6468 - val_loss: 0.6256
Epoch 2/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 151ms/step - accuracy: 0.6776 - loss: 0.5897 - val_accuracy: 0.6219 - val_loss: 0.6696
Epoch 3/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 149ms/step - accuracy: 0.7206 - loss: 0.5495 - val_accuracy: 0.6716 - val_loss: 0.6153
Epoch 4/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 152ms/step - accuracy: 0.7309 - loss: 0.5430 - val_accuracy: 0.6368 - val_loss: 0.6328
Epoch 5/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 150ms/step - accuracy: 0.7642 - loss: 0.4959 - val_accuracy: 0.6667 - val_loss: 0.5889
Epoch 6/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 148ms/step - accuracy: 0.7717 - loss: 0.4707 - val_accuracy: 0.6766 - val_loss: 0.5725
Epoch 7/10
[1m40/40[0m [32m━━━━━━━━━

In [18]:
train_accuracies = history.history['accuracy']
median_accuracy = np.median(train_accuracies)
median_accuracy

0.7587499916553497

In [19]:
train_losses = history.history['loss']
std_dev_loss = np.std(train_losses)
std_dev_loss

0.07466872032593092

In [22]:
gen_with_augmentation = ImageDataGenerator(
    rescale=1. / 255,
    rotation_range=50,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_dataset_with_augmentation = gen_with_augmentation.flow_from_directory(
    '../datasets/hairs-dataset/data/train',
    target_size=(200, 200),
    batch_size=20,
    shuffle=True,
    class_mode='binary'
)

test_dataset_with_augmentation = gen_with_augmentation.flow_from_directory(
    '../datasets/hairs-dataset/data/test',
    target_size=(200, 200),
    batch_size=20,
    shuffle=True,
    class_mode='binary'
)

Found 800 images belonging to 2 classes.
Found 201 images belonging to 2 classes.


In [23]:
history_with_augmentation = model.fit(train_dataset_with_augmentation, epochs=10, validation_data=test_dataset_with_augmentation)

Epoch 1/10




[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 143ms/step - accuracy: 0.7058 - loss: 0.5732

  self._warn_if_super_not_called()


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 180ms/step - accuracy: 0.7055 - loss: 0.5735 - val_accuracy: 0.5771 - val_loss: 0.7002
Epoch 2/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 172ms/step - accuracy: 0.7003 - loss: 0.6013 - val_accuracy: 0.6716 - val_loss: 0.6201
Epoch 3/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 172ms/step - accuracy: 0.7443 - loss: 0.5525 - val_accuracy: 0.6269 - val_loss: 0.6813
Epoch 4/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 179ms/step - accuracy: 0.6884 - loss: 0.5966 - val_accuracy: 0.6667 - val_loss: 0.6057
Epoch 5/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 173ms/step - accuracy: 0.7292 - loss: 0.5372 - val_accuracy: 0.6866 - val_loss: 0.5863
Epoch 6/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 175ms/step - accuracy: 0.6867 - loss: 0.5676 - val_accuracy: 0.6965 - val_loss: 0.5775
Epoch 7/10
[1m40/40[0m [32m━━━━━━━━━

In [25]:
test_losses = history_with_augmentation.history['val_loss']
mean_test_loss = np.mean(test_losses)
mean_test_loss

0.604150938987732

In [26]:
test_accuracies = history_with_augmentation.history['val_accuracy'][-5:]
average_test_accuracy = np.mean(test_accuracies)
average_test_accuracy

0.7004975080490112