In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import confusion_matrix

from roboflow import Roboflow
import numpy as np
import matplotlib.pyplot as plt
import itertools
import torch 

In [2]:
# Check if GPU is available
if torch.cuda.is_available():
    # Define device
    device = torch.device("cuda")
    print("GPU is available. Using GPU.")
else:
    # If GPU is not available, fall back to CPU
    device = torch.device("cpu")
    print("GPU is not available. Using CPU.")

GPU is not available. Using CPU.


### Download dataset

In [3]:
'''rf = Roboflow(api_key="gJvPdZaOYF8eb2ctWoWS")
project = rf.workspace("ai-clinic-beekeeping").project("beekeeping_step2")
version = project.version(4)
dataset = version.download("folder")'''

loading Roboflow workspace...
loading Roboflow project...
Downloading Dataset Version Zip in beekeeping_step2-4 to folder: 100% [48984229 / 48984229] bytes


Extracting Dataset Version Zip to beekeeping_step2-4 in folder:: 100%|██████████| 3769/3769 [00:02<00:00, 1303.75it/s]


### PREPARE DATASET

In [5]:
dataset="beekeeping_step2-4"
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')

valid_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    dataset+'/train',  # This is the target directory
    target_size=(410, 410),  # All images will be resized to 410x410
    batch_size=32,
    class_mode='binary')  # Since we use binary_crossentropy loss, we need binary labels

validation_generator = valid_datagen.flow_from_directory(
    dataset+'/valid',
    target_size=(410, 410),
    batch_size=32,
    class_mode='binary')


Found 3206 images belonging to 2 classes.
Found 368 images belonging to 2 classes.


### Model Definition

In [6]:
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(410, 410, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy',
              optimizer=Adam(learning_rate=1e-4),
              metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

In [7]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 408, 408, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 204, 204, 32)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 202, 202, 64)      18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 101, 101, 64)     0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 99, 99, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 49, 49, 128)      0

### Model Training

In [None]:

# Compute class weights
y_train = train_generator.classes
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
class_weights_dict = dict(enumerate(class_weights))

# Early Stopping Callback
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto', restore_best_weights=True)


history = model.fit(
    train_generator,
    steps_per_epoch=100,  # Adjust based on your dataset size
    epochs=20,
    validation_data=validation_generator,
    validation_steps=50,  # Adjust based on your validation set size
    class_weight=class_weights_dict,
    callbacks=[early_stopping]
)


### Save the Keras model

In [None]:
model.save('model_saved')

## Evaluation
### Confusion Matrix

In [None]:
test_generator = valid_datagen.flow_from_directory(
    dataset+'/test',  # specify your test dataset directory
    target_size=(410, 410),
    batch_size=32,
    class_mode='binary',
    shuffle=False)

predictions = model.predict(test_generator)
y_pred = np.round(predictions).astype(int).flatten()
y_true = test_generator.classes

In [None]:
# Function to plot the confusion matrix
def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')
    print(cm)
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j], horizontalalignment="center", color="white" if cm[i, j] > thresh else "black")
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
# Calculate and plot confusion matrix
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(8,8))
plot_confusion_matrix(cm, classes=['Non-Pollen', 'Pollen'], normalize=False, title='Confusion matrix')

### Visualize the training and validation loss and accuracy

# Summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# Summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()