In [16]:
import os
import sys
import gzip
import code
import urllib
import importlib
import tensorflow.python.platform
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
from tqdm import tqdm
from PIL import Image

In [17]:
COLAB = False
BRANCH = 'main'

In [18]:
if COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    drive_path = '/content/drive/Shareddrives/ML_Road_Segmentation/CS-433-project-2/project_road_segmentation'
    os.chdir(drive_path)
    from helpers.colab import mount_and_pull
    BRANCH_NAME = BRANCH
    mount_and_pull(BRANCH_NAME, drive, os)

In [19]:
%load_ext autoreload
%autoreload 2

from helpers.image_processing import *
from helpers.file_manipulation import *
from helpers.constants import *
from helpers.prediction_checking import *
from helpers.machine_learning import *
from helpers.loss_functions import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [29]:
RESTORE_MODEL = False  # If True, restore existing model instead of training a new one
SAVE_MODEL = False
GENERATE_PREDICTION = False  # If True, will generate a CSV to submit on AICrowd

MODEL_NAME = 'cnn_6conv'  # For now, cnn, unet-1, unet-2
SAVE_DIR = MODELS_SAVE_DIR + MODEL_NAME + '/'

NUM_EPOCHS = 250

In [34]:
mod = importlib.import_module('models.' + MODEL_NAME)
model_function = getattr(mod, MODEL_NAME)

In [46]:
from legacy.old_helpers import *
data_dir = 'data/training/'
train_data_filename = data_dir + 'images/'
train_labels_filename = data_dir + 'groundtruth/' 

# Extract pixel patches into numpy arrays. Dim: (nb patch, 16, 16, 3)

training_data = extract_data(train_data_filename, 100)
for transformation in ['mix', 'rotation', 'flip', 'shift']:
    training_data = np.vstack([training_data, extract_data(f'data/generated/{transformation}/images/', 100)])
training_labels = extract_labels(train_labels_filename, 100)
for transformation in ['mix', 'rotation', 'flip', 'shift']:
    training_labels = np.vstack([training_labels, extract_labels(f'data/generated/{transformation}/groundtruth/', 100)])

100%|██████████| 100/100 [00:00<00:00, 112.34it/s]
 10%|█         | 10/100 [00:00<00:00, 97.21it/s]

Loaded 100 training images


100%|██████████| 100/100 [00:00<00:00, 108.50it/s]
 11%|█         | 11/100 [00:00<00:00, 105.51it/s]

Loaded 100 training images


100%|██████████| 100/100 [00:00<00:00, 110.74it/s]
  0%|          | 0/100 [00:00<?, ?it/s]

Loaded 100 training images


100%|██████████| 100/100 [00:01<00:00, 96.46it/s]
  0%|          | 0/100 [00:00<?, ?it/s]

Loaded 100 training images


100%|██████████| 100/100 [00:01<00:00, 96.83it/s]
  0%|          | 0/100 [00:00<?, ?it/s]

Loaded 100 training images


100%|██████████| 100/100 [00:00<00:00, 478.21it/s]


Loaded 100 groudtruth images


100%|██████████| 100/100 [00:00<00:00, 245.49it/s]


Loaded 100 groudtruth images


100%|██████████| 100/100 [00:00<00:00, 210.66it/s]


Loaded 100 groudtruth images


100%|██████████| 100/100 [00:00<00:00, 225.30it/s]


Loaded 100 groudtruth images


100%|██████████| 100/100 [00:00<00:00, 247.89it/s]


Loaded 100 groudtruth images


In [50]:
from sklearn.model_selection import train_test_split
#Create training sets and validation sets
X_train, X_test, y_train, y_test = train_test_split(training_data, training_labels,\
                                                    train_size= int(len(training_data) * 0.8), random_state=SEED)
print(X_train.shape)
print(y_train.shape)

(250000, 16, 16, 3)
(62000, 16, 16, 3)


In [None]:
# X_train, X_test, y_train, y_test = get_train_test(data_augmentation=True)

Loading data/training/images/: 100%|██████████| 100/100 [00:03<00:00, 29.57it/s]
Loading data/training/groundtruth/: 100%|██████████| 100/100 [00:00<00:00, 295.91it/s]
Loading data/generated/shift/images/: 100%|██████████| 100/100 [00:02<00:00, 43.98it/s]
Loading data/generated/shift/groundtruth/: 100%|██████████| 100/100 [00:00<00:00, 168.90it/s]
Loading data/generated/flip/images/: 100%|██████████| 100/100 [00:02<00:00, 38.74it/s]
Loading data/generated/flip/groundtruth/: 100%|██████████| 100/100 [00:00<00:00, 158.07it/s]
Loading data/generated/mix/images/: 100%|██████████| 100/100 [00:02<00:00, 44.04it/s]
Loading data/generated/mix/groundtruth/: 100%|██████████| 100/100 [00:01<00:00, 84.56it/s]
Loading data/generated/rotation/images/: 100%|██████████| 100/100 [00:02<00:00, 39.28it/s]
Loading data/generated/rotation/groundtruth/: 100%|██████████| 100/100 [00:00<00:00, 102.17it/s]


Training features shape :  (500, 400, 400, 3)
Training labels shape :  (500, 400, 400)


In [52]:
# TODO : Try to train unet-2 with 250 epoch and focal tversky loss and check for overfit
# Hyperparameters to tweak : Alpha and Gamma from focal tversky loss for accuracy, and dropout rate for overfit
metrics=[tf.keras.metrics.Precision(),
         tf.keras.metrics.Recall(),
         dice_coef
        ]
callbacks = [
             tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10, verbose=1),
            tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5)
            ]
if RESTORE_MODEL:
    model = tf.keras.models.load_model(SAVE_DIR
    , custom_objects={'dice_coef_loss': dice_coef_loss, 'dice_coef': dice_coef})
else:  
    model = model_function()
    model.compile(optimizer='adam',
                loss='binary_crossentropy',
                metrics=metrics)
    history = model.fit(X_train, y_train, 
                        epochs = NUM_EPOCHS,
                        validation_split=VALIDATION_SIZE,
                        callbacks=callbacks)

Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 00011: early stopping


In [53]:
if True:
    model.save(SAVE_DIR)

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: model_save/cnn_6conv/assets


In [55]:
with tf.device('/cpu:0'):
    train_predictions = model.predict(X_train).squeeze()
    test_predictions = model.predict(X_test).squeeze()

In [57]:
visualize_random_predictions(X_train, y_train, train_predictions, size=10)

ValueError: ignored

<Figure size 432x288 with 0 Axes>

In [None]:
visualize_random_predictions(X_test, y_test, test_predictions, size=10)

In [58]:
if True:
    from helpers.colab import download_model
    from google.colab import files
    download_model(MODEL_NAME, SAVE_DIR, files)

  adding: model_save/cnn_6conv/ (stored 0%)
  adding: model_save/cnn_6conv/variables/ (stored 0%)
  adding: model_save/cnn_6conv/variables/variables.data-00000-of-00001 (deflated 52%)
  adding: model_save/cnn_6conv/variables/variables.index (deflated 71%)
  adding: model_save/cnn_6conv/assets/ (stored 0%)
  adding: model_save/cnn_6conv/saved_model.pb (deflated 90%)



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>