In [1]:
import tensorflow as tf
import keras
import os
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

Using TensorFlow backend.


In [2]:
df = pd.read_csv('Data/pollen_data.csv').drop(columns='Unnamed: 0')

### Control Pannel

In [3]:
img_size = (300,300)

In [4]:
class acc_callback(tf.keras.callbacks.Callback):
    def on_epoch_end(self,epoch,logs={}):
        if (logs.get('val_acc')>=.95):
            print('Validation Accuracy has reached 95%')
            print('Ending epochs')
            self.model.stop_training=True

## Restructure Image Layout

For this project, I plan to use TensorFlow's Image Generator, and the images have to be seperated into a specific folder layout. 

This function Takes all of the images and puts them into the proper format

In [5]:
img_dir = 'Data/images'
def split_to_train_and_val(df,img_dir):
    train, test = train_test_split(df)

    train_pol = train[train['pollen_carrying'] == 1]
    test_pol = test[test['pollen_carrying'] == 1]

    train_no = train[train['pollen_carrying'] == 0]
    test_no = test[test['pollen_carrying'] == 0]


    groups = [
          {'data':train_pol,'dir':'Training/has_pollen'},
         {'data':test_pol,'dir':'Validation/has_pollen'},
          {'data':train_no,'dir':'Training/no_pollen'},
         {'data':test_no,'dir':'Validation/no_pollen'},
    ]


    for group in groups:
        os.system(f"mkdir -p {img_dir}/{group['dir']}")

        for file in group['data']['filename']:
            os.system(f"cp {img_dir}/{file} {img_dir}/{group['dir']}/{file}")

## Run this to create directories for Train and Val 

In [6]:
#split_to_train_and_val(df,img_dir)

# Modeling

### Build Image generators to read in Data from Folders

In [7]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1/255)
val_datagen = ImageDataGenerator(rescale=1/255)


train_generator = train_datagen.flow_from_directory(
        'Data/images/Training',
        target_size=img_size, #img_size is set in the Control Pannel at the top  
        batch_size=128,
        class_mode='binary')

val_generator = val_datagen.flow_from_directory(
        'Data/images/Validation',
        target_size=img_size,
        batch_size=28,
        class_mode='binary')


Found 535 images belonging to 2 classes.
Found 179 images belonging to 2 classes.


## Set up Sequential Conv NN 

In [8]:
model = tf.keras.models.Sequential()

model.add(tf.keras.layers.Conv2D(16, (3,3), activation='relu', input_shape=(300, 300, 3)))
model.add(tf.keras.layers.MaxPooling2D(2, 2))

model.add(tf.keras.layers.Conv2D(32, (3,3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D(2,2))

model.add(tf.keras.layers.Conv2D(64, (3,3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D(2,2)),

model.add(tf.keras.layers.Conv2D(64, (3,3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D(2,2)),

model.add(tf.keras.layers.Conv2D(64, (3,3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D(2,2))

model.add(tf.keras.layers.Flatten())

model.add(tf.keras.layers.Dense(512, activation='relu'))
model.add(tf.keras.layers.Dense(256, activation='relu'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))


W0625 13:28:32.854012 140288735053440 deprecation.py:506] From /home/snorks/dev/ml/ds-projects/bees/venv/lib/python3.7/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


## Compile our model using binary crossentropy since we are dealing with has pollen or no pollen

In [9]:
from tensorflow.keras.optimizers import RMSprop

model.compile(loss='binary_crossentropy',
              optimizer=RMSprop(lr=0.001),
              metrics=['acc'])

W0625 13:28:33.026495 140288735053440 deprecation.py:323] From /home/snorks/dev/ml/ds-projects/bees/venv/lib/python3.7/site-packages/tensorflow/python/ops/nn_impl.py:180: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


## Initialize Callback object

We're going to use a call back that checks val_acc to end the training once we hit 99% acc
If we don't hit 99% on our val data, we will cycle through 200 epochs

In [10]:
cb_acc = acc_callback()

In [11]:
from PIL import Image

In [12]:
history = model.fit_generator(
      train_generator,
      steps_per_epoch=8,  
      epochs=200,
      verbose=1,
      validation_data = val_generator,
      validation_steps=8,
      callbacks=[cb_acc])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Ending epochs
