In [1]:
import requests
import zipfile
import numpy as np
import tensorflow as tf

tf.config.run_functions_eagerly(True)

In [2]:
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

In [3]:
# Getting the data 1
url = 'https://github.com/SVizor42/ML_Zoomcamp/releases/download/straight-curly-data/data.zip'
dataset_path = '../homework8-deep-learning/datasets/'
response = requests.get(url)
with open(f'{dataset_path}data.zip', 'wb') as file:
    for chunk in response.iter_content(chunk_size=1024):
        file.write(chunk)

In [4]:
with zipfile.ZipFile(f'{dataset_path}data.zip') as zip_ref:
    zip_ref.extractall(f'{dataset_path}/hairs-dataset')

In [5]:
# Define CNN the model

# noinspection PyUnresolvedReferences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.optimizers import SGD

model = Sequential()
model.add(Input(shape=(200, 200, 3)))
model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
optimizer = SGD(learning_rate=0.002, momentum=0.8)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
model.summary()



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 198, 198, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 99, 99, 32)        0         
 D)                                                              
                                                                 
 flatten (Flatten)           (None, 313632)            0         
                                                                 
 dense (Dense)               (None, 64)                20072512  
                                                                 
 dense_1 (Dense)             (None, 1)                 65        
                                                                 
Total params: 20073473 (76.57 MB)
Trainable params: 20073473 (76.57 MB)
Non-trainable params: 0 (0.00 Byte)
______________

In [6]:
# Create the data generator, load data

# noinspection PyUnresolvedReferences
from tensorflow.keras.preprocessing.image import ImageDataGenerator

gen = ImageDataGenerator(rescale=1. / 255)

train_dataset = gen.flow_from_directory(
    '../homework8-deep-learning/datasets/hairs-dataset/data/train',
    target_size=(200, 200),
    batch_size=20,
    shuffle=True,
    class_mode='binary'
)

test_dataset = gen.flow_from_directory(
    '../homework8-deep-learning/datasets/hairs-dataset/data/test',
    target_size=(200, 200),
    batch_size=20,
    shuffle=True,
    class_mode='binary'
)

Found 800 images belonging to 2 classes.
Found 201 images belonging to 2 classes.


In [7]:
# Train the model
history = model.fit(train_dataset, epochs=10, validation_data=test_dataset)

Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [8]:
train_accuracies = history.history['accuracy']
median_accuracy = np.median(train_accuracies)
median_accuracy

0.7268750071525574

In [9]:
train_losses = history.history['loss']
std_dev_loss = np.std(train_losses)
std_dev_loss

0.07614932526635858

In [10]:
gen_with_augmentation = ImageDataGenerator(
    rescale=1. / 255,
    rotation_range=50,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_dataset_with_augmentation = gen_with_augmentation.flow_from_directory(
    '../homework8-deep-learning/datasets/hairs-dataset/data/train',
    target_size=(200, 200),
    batch_size=20,
    shuffle=True,
    class_mode='binary'
)

test_dataset_with_augmentation = gen_with_augmentation.flow_from_directory(
    '../homework8-deep-learning/datasets/hairs-dataset/data/test',
    target_size=(200, 200),
    batch_size=20,
    shuffle=True,
    class_mode='binary'
)

Found 800 images belonging to 2 classes.
Found 201 images belonging to 2 classes.


In [11]:
history_with_augmentation = model.fit(train_dataset_with_augmentation, epochs=10, validation_data=test_dataset_with_augmentation)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [12]:
test_losses = history_with_augmentation.history['val_loss']
mean_test_loss = np.mean(test_losses)
mean_test_loss

0.6003234505653381

In [13]:
test_accuracies = history_with_augmentation.history['val_accuracy'][-5:]
average_test_accuracy = np.mean(test_accuracies)
average_test_accuracy

0.68855721950531