In [1]:
# Importing libraries
import os
import shutil
import random
from tqdm import tqdm

import numpy as np
import pandas as pd

import PIL
import rasterio
import seaborn as sns
import matplotlib.pyplot as plt

In [8]:
dataset = "./tif"
labels = os.listdir(dataset)
labels

['AnnualCrop',
 'Forest',
 'HerbaceousVegetation',
 'Highway',
 'Industrial',
 'Pasture',
 'PermanentCrop',
 'Residential',
 'River',
 'SeaLake']

# Data Preprocessing

In [3]:
# importing libraries
import re
from sklearn.model_selection import StratifiedShuffleSplit
from keras.preprocessing.image import ImageDataGenerator

Using TensorFlow backend.


In [9]:
TRAIN_DIR = './usage/training'
TEST_DIR = './usage/testing'
BATCH_SIZE = 64
NUM_CLASSES=len(labels)
INPUT_SHAPE = (64, 64, 13)
CLASS_MODE = 'categorical'

In [13]:
# create training and testing directories
for path in (TRAIN_DIR, TEST_DIR):
    if not os.path.exists(path):
        os.mkdir(path)

In [14]:
# create class label subdirectories in train and test
for l in labels:
    
    if not os.path.exists(os.path.join(TRAIN_DIR, l)):
        os.mkdir(os.path.join(TRAIN_DIR, l))

    if not os.path.exists(os.path.join(TEST_DIR, l)):
        os.mkdir(os.path.join(TEST_DIR, l))

In [15]:
# map each image path to their class label in 'data'
data = {}

for l in labels:
    for img in os.listdir(dataset+'/'+l):
        data.update({os.path.join(dataset, l, img): l})

X = pd.Series(list(data.keys()))
y = pd.get_dummies(pd.Series(data.values()))

split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=69)

In [16]:
# split the list of image paths
for train_idx, test_idx in split.split(X, y):
    
    train_paths = X[train_idx]
    test_paths = X[test_idx]

    # define a new path for each image depending on training or testing
    new_train_paths = [re.sub('\.\/tif', './usage/training', i) for i in train_paths]
    new_test_paths = [re.sub('\.\/tif', './usage/testing', i) for i in test_paths]

    train_path_map = list((zip(train_paths, new_train_paths)))
    test_path_map = list((zip(test_paths, new_test_paths)))
    
    # move the files
    print("moving training files..")
    for i in tqdm(train_path_map):
        if not os.path.exists(i[1]):
            if not os.path.exists(re.sub('training', 'testing', i[1])):
                shutil.copy(i[0], i[1])
    
    print("moving testing files..")
    for i in tqdm(test_path_map):
        if not os.path.exists(i[1]):
            if not os.path.exists(re.sub('training', 'testing', i[1])):
                shutil.copy(i[0], i[1])

  0%|                                                                                        | 0/21600 [00:00<?, ?it/s]

moving training files..


100%|████████████████████████████████████████████████████████████████████████████| 21600/21600 [13:36<00:00, 26.46it/s]
  0%|                                                                                 | 4/5400 [00:00<02:39, 33.80it/s]

moving testing files..


100%|██████████████████████████████████████████████████████████████████████████████| 5400/5400 [03:01<00:00, 29.75it/s]


In [17]:
# Create a ImageDataGenerator Instance which can be used for data augmentation

def my_image_mask_generator(image_data_generator, mask_data_generator):
    train_generator = zip(image_data_generator, mask_data_generator)
    for (img, mask) in train_generator:
        yield (img, mask)

SEED = 100

train_image_data_generator = ImageDataGenerator(
    width_shift_range = 0.1,
    height_shift_range = 0.1,
    rotation_range = 10,
    zoom_range = 0.1
).flow_from_directory(directory=TRAIN_DIR, batch_size = 16, target_size = (150, 150), seed = SEED)

train_mask_data_generator = ImageDataGenerator(
    width_shift_range = 0.1,
    height_shift_range = 0.1,
    rotation_range = 10,
    zoom_range = 0.1
).flow_from_directory(directory=TRAIN_DIR, batch_size = 16, target_size = (150, 150), seed = SEED)

Found 21600 images belonging to 10 classes.
Found 21600 images belonging to 10 classes.


In [18]:
print(train_image_data_generator.class_indices)
np.save('class_indices', train_image_data_generator.class_indices)

{'AnnualCrop': 0, 'Forest': 1, 'HerbaceousVegetation': 2, 'Highway': 3, 'Industrial': 4, 'Pasture': 5, 'PermanentCrop': 6, 'Residential': 7, 'River': 8, 'SeaLake': 9}


In [19]:
from keras.optimizers import Adam

# Creating custom generator for training images and masks
my_generator = my_image_mask_generator(train_image_data_generator, train_mask_data_generator)