In [1]:
import os

import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
%matplotlib inline

from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras.optimizers import SGD
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator

In [2]:
!wget https://github.com/SVizor42/ML_Zoomcamp/releases/download/bee-wasp-data/data.zip
!unzip -q data.zip

--2023-11-12 11:27:12--  https://github.com/SVizor42/ML_Zoomcamp/releases/download/bee-wasp-data/data.zip
Resolving github.com (github.com)... 140.82.121.3
Connecting to github.com (github.com)|140.82.121.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/405934815/e6c56cb7-dce1-463f-865b-01e913c38485?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20231112%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20231112T112712Z&X-Amz-Expires=300&X-Amz-Signature=4a1f684f147f7c33b8c1881a4e6d1d4267364bb45e7e287b08accabe339f92cb&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=405934815&response-content-disposition=attachment%3B%20filename%3Ddata.zip&response-content-type=application%2Foctet-stream [following]
--2023-11-12 11:27:12--  https://objects.githubusercontent.com/github-production-release-asset-2e65be/405934815/e6c56cb7-dce1-463f-865b-01e913c38485?X-Amz-

In [3]:
# Define the model
model = Sequential()

# Add a convolutional layer
model.add(Conv2D(32, (3, 3), input_shape=(150, 150, 3), activation='relu'))

# Add a max pooling layer
model.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten the result to feed into a dense layer
model.add(Flatten())

# Add a dense layer with 64 neurons and 'relu' activation
model.add(Dense(64, activation='relu'))

# Add the output layer with 1 neuron and appropriate activation for binary classification
model.add(Dense(1, activation='sigmoid'))

# Define the SGD optimizer with specified parameters
sgd_optimizer = SGD(lr=0.002, momentum=0.8)

# Compile the model
model.compile(optimizer=sgd_optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Print the model summary
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 148, 148, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 74, 74, 32)        0         
 D)                                                              
                                                                 
 flatten (Flatten)           (None, 175232)            0         
                                                                 
 dense (Dense)               (None, 64)                11214912  
                                                                 
 dense_1 (Dense)             (None, 1)                 65        
                                                                 
Total params: 11215873 (42.79 MB)
Trainable params: 11215873 (42.79 MB)
Non-trainable params: 0 (0.00 Byte)
______________

## Question 1
Response - binary crossentropy

## Question 2
Response - 896

## Generators and Training

In [4]:
# Set up data generators
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)
train_dir = '/kaggle/working/data/train'
test_dir = '/kaggle/working/data/test'

In [5]:
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=20,
    class_mode='binary',
    shuffle=True
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(150, 150),
    batch_size=20,
    class_mode='binary',
    shuffle=True
)

Found 3677 images belonging to 2 classes.
Found 918 images belonging to 2 classes.


In [6]:
# Train the model
history = model.fit(train_generator, epochs=10, validation_data=test_generator)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Question 3

In [7]:
# Access training accuracy values from the history
training_accuracy = history.history['accuracy']

# Calculate the median
median_training_accuracy = np.median(training_accuracy)

# Print the result
print("Median Training Accuracy:", median_training_accuracy)

Median Training Accuracy: 0.8403589725494385


## Question 4

In [8]:
# Access training loss values from the history
training_loss = history.history['loss']

# Calculate the standard deviation
std_dev_training_loss = np.std(training_loss)

# Print the result
print("Standard Deviation of Training Loss:", std_dev_training_loss)

Standard Deviation of Training Loss: 0.20229444069756955


## Data Augmentation

In [9]:
# Create an ImageDataGenerator with specified augmentations
train_datagen_augmented = ImageDataGenerator(
    rescale=1./255,
    rotation_range=50,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Create the generator for augmented data
train_generator_augmented = train_datagen_augmented.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary'
)

Found 3677 images belonging to 2 classes.


In [10]:
# Train the model
history_augmented = model.fit(train_generator_augmented, epochs=20, validation_data=test_generator)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


## Question 5

In [11]:
# Calculate the mean of the test loss for all epochs
mean_test_loss_augmented = np.mean(history_augmented.history['val_loss'])

# Print the result
print("Mean Test Loss for All Epochs with Augmentations:", mean_test_loss_augmented)


Mean Test Loss for All Epochs with Augmentations: 0.47188584208488465


## Question 6

In [12]:
# Calculate the average test accuracy for the last 5 epochs
average_test_accuracy_last_5_epochs_augmented = np.mean(history_augmented.history['val_accuracy'][-5:])

# Print the result
print("Average Test Accuracy for the Last 5 Epochs with Augmentations:", average_test_accuracy_last_5_epochs_augmented)


Average Test Accuracy for the Last 5 Epochs with Augmentations: 0.8061002135276795
