# Introduction

This notebook is meant to test that you have all the components you need to run
through the workshop notebooks. It is based on a Kaggle dataset and a Kaggle Kernel
found [here](https://www.kaggle.com/uysimty/keras-cnn-dog-or-cat-classification).

Create a Kaggle account if you don't already have one, then download your Kaggle API key.
Info on how to get your api key (kaggle.json) [here](https://github.com/Kaggle/kaggle-api#api-credentials)
Place the kaggle.json file in the correct directory (~/.kaggle).

Go to the dogs-and-cats competition and accept the rules [here](https://www.kaggle.com/c/dogs-vs-cats/rules).

Run the next cell to initialize the Kaggle API.
If you are running on google colab, then you'll be presented
with a widget. Use it to browse to the kaggle.json file.
(Note: You might have to show hidden files/folders in the file browser)

<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/SachsLab/IntracranialNeurophysDL/blob/master/notebooks/01_00_tensorflow_test.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
  </td>
  <td>
    <a target="_blank" href="https://github.com/SachsLab/IntracranialNeurophysDL/blob/master/notebooks/01_00_tensorflow_test.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a>
  </td>
</table>

In [None]:
# This cell normalizes Google Colab and local environments.
from pathlib import Path
import os
try:
    # See if we are running on google.colab
    from google.colab import files
    os.chdir('..')
    if not (Path.cwd() / '.kaggle').is_dir():
        # Configure kaggle
        files.upload()  # Find the kaggle.json file in your ~/.kaggle directory.
        !pip install -q kaggle
        !mkdir -p ~/.kaggle
        !mv kaggle.json ~/.kaggle/
        !chmod 600 ~/.kaggle/kaggle.json
    if not (Path.cwd() / 'repo').is_dir():
        # Download the workshop repo and change to its directory
        # For now edit the username/password. This requirement will be removed when the repo is made public.
        !git clone https://github.com/SachsLab/IntracranialNeurophysDL.git
        os.chdir('IntracranialNeurophysDL')
    IN_COLAB = True
except ModuleNotFoundError:
    import sys
    if Path.cwd().stem == 'notebooks':
        os.chdir(Path.cwd().parent)
    # Make sure the kaggle executable is on the PATH
    os.environ['PATH'] = os.environ['PATH'] + ';' + str(Path(sys.executable).parent / 'Scripts')
    IN_COLAB = False
    
import matplotlib.pyplot as plt
plt.style.use(['dark_background', 'presentation'])

## Download Data

If you're running from PyCharm then you may prefer to download it manually in a
terminal/shell/Anaconda prompt because there is no visual feedback in PyCharm (should change in 2019.2).

In [None]:
datadir = Path.cwd() / 'data' / 'dogscats'
if not (datadir / 'train').is_dir():
    !kaggle competitions download -c dogs-vs-cats
    print("Finished downloading data.")
    import zipfile
    for fn in ['train', 'test1']:
        fpath = Path(fn + '.zip')
        with zipfile.ZipFile(fpath, 'r') as zip_ref:
            zip_ref.extractall(Path.cwd() / 'data' / 'dogscats')
        fpath.unlink()
    print("Finished unzipping extracting data.")
else:
    print("Data directory exists. Skipping download.")

# Prepare Training Data

In [None]:
import pandas as pd


train_path = Path.cwd() / 'data' / 'dogscats' / 'train'
train_names = os.listdir(train_path)
labels = [_.split('.')[0] for _ in train_names]
classes = [1 if _ == 'dog' else 0 for _ in labels]
df = pd.DataFrame({'filename': train_names, 'label': labels, 'class': classes})
df = df.sample(frac=1).reset_index(drop=True)  # Shuffle rows
df['label'].value_counts().plot.bar()  # Show item counts
plt.show()
df.head()  # Show a few lines of the table

In [None]:
# Display a sample image
import random
from tensorflow.keras.preprocessing.image import load_img


sample = random.choice(train_names)
image = load_img(train_path / sample)
plt.imshow(image)

In [None]:
# Create model
import tensorflow as tf


IMAGE_WIDTH = 128
IMAGE_HEIGHT = 128
IMAGE_SIZE = (IMAGE_WIDTH, IMAGE_HEIGHT)
IMAGE_CHANNELS = 3 # RGB color

model = tf.keras.models.Sequential()

model.add(tf.keras.layers.Conv2D(32, (3, 3), activation='relu',
                                 input_shape=(IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_CHANNELS)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(tf.keras.layers.Dropout(0.25))

model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(tf.keras.layers.Dropout(0.25))

model.add(tf.keras.layers.Conv2D(128, (3, 3), activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(tf.keras.layers.Dropout(0.25))

model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(512, activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

model.summary()

In [None]:
# Prepare callbacks

# import tensorboardcolab as tbc
# tbc_init = tbc.TensorBoardColab()
# tbc_cb = tbc.TensorBoardColabCallback(tbc_init)

earlystop = tf.keras.callbacks.EarlyStopping(patience=10)
learning_rate_reduction = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_accuracy', patience=2, verbose=1, factor=0.5, min_lr=0.00001)
callbacks = [earlystop, learning_rate_reduction]

In [None]:
# Separate train and validation data (0.8, 0.2 respectively)
from sklearn.model_selection import train_test_split

P_VALID = 0.2

train_df, valid_df = train_test_split(df, test_size=P_VALID, random_state=42)
train_df = train_df.reset_index(drop=True)
valid_df = valid_df.reset_index(drop=True)
n_train, n_validate = train_df.shape[0], valid_df.shape[0]

In [None]:
# Create data generators with augmentation
from tensorflow.keras.preprocessing.image import ImageDataGenerator

BATCH_SIZE = 50

train_datagen = ImageDataGenerator(
    rotation_range=15,
    rescale=1./255,
    shear_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    width_shift_range=0.1,
    height_shift_range=0.1
)

train_generator = train_datagen.flow_from_dataframe(
    train_df, 
    train_path, 
    x_col='filename',
    y_col='label',
    target_size=IMAGE_SIZE,
    class_mode='binary',
    batch_size=BATCH_SIZE
)

validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_dataframe(
    valid_df, 
    Path.cwd() / 'data' / 'dogscats' / 'train', 
    x_col='filename',
    y_col='label',
    target_size=IMAGE_SIZE,
    class_mode='binary',
    batch_size=BATCH_SIZE
)

In [None]:
# Fit model
history = model.fit_generator(
    train_generator, 
    epochs=5,  # increase to 50 to get some good results
    validation_data=validation_generator,
    validation_steps=n_validate//BATCH_SIZE,
    steps_per_epoch=n_train//BATCH_SIZE,
    callbacks=callbacks
)

In [None]:
# Visualize Training
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6), facecolor='white')

ax1.plot(history.history['loss'], color='b', label="Training loss")
ax1.plot(history.history['val_loss'], color='r', label="validation loss")
ax1.set_ylabel('Loss')
ax1.set_xlabel('Epochs')
ax1.legend(loc='best', shadow=True)

ax2.plot(history.history['accuracy'], color='b', label="Training accuracy")
ax2.plot(history.history['val_accuracy'], color='r',label="Validation accuracy")
ax2.set_ylabel('Accuracy')
ax2.set_xlabel('Epochs')
ax2.legend(loc='best', shadow=True)

plt.tight_layout()
plt.show()