# Init

In [None]:
%cd /kaggle/input/leaves
!ls

In [None]:
import tensorflow as tf
import numpy as np
import os
import random
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix
from PIL import Image



from tensorflow.keras.preprocessing.image import ImageDataGenerator
import cv2 as cv

tfk = tf.keras
tfkl = tf.keras.layers
print(tf.__version__)

In [None]:
# Random seed for reproducibility
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

In [None]:
# Dataset folders 
dataset_dir = '/kaggle/input/leaves'
training_dir = os.path.join(dataset_dir, 'training')

## Data Augmentation

In [None]:
aug_data_gen = ImageDataGenerator(rotation_range=30,
                                  zoom_range=0.3,
                                  horizontal_flip = True,
                                  vertical_flip = True,
                                  brightness_range = (0.7, 1.3),
                                  fill_mode='reflect',
                                  #COMMENTED BECAUSE GAP MAKES INVARIANT TO SHIFT
                                  #width_shift_range = 30,     #maybe even more!!!
                                  #height_shift_range = 30,
                                  validation_split=0.2,
                                  rescale=1/255.)

#augmented
train_gen = aug_data_gen.flow_from_directory(directory = training_dir,
                                                 subset = 'training',
                                                 target_size=(256,256),
                                                 color_mode='rgb',
                                                 classes=None,
                                                 class_mode='categorical',
                                                 batch_size=8,
                                                 shuffle=True,
                                                 seed=seed)

valid_gen = ImageDataGenerator(validation_split=0.2).flow_from_directory(directory = training_dir,
                                                 subset = 'validation',
                                                 target_size=(256,256),
                                                 color_mode='rgb',
                                                 classes=None,
                                                 class_mode='categorical',
                                                 batch_size=8,
                                                 shuffle=False,
                                                 seed=seed)


In [None]:
labels = (train_gen.class_indices)
inv_labels = {v: k for k, v in labels.items()}
print(inv_labels)

In [None]:
print(inv_labels[3])

## Data Visualization __from folder__

In [None]:
num_row = 2
num_col = 7
fig, axes = plt.subplots(num_row, num_col, figsize=(8*num_row,num_col))
for i in range(num_row*num_col):
  if i < (num_row*num_col):
    class_dir = os.path.join(training_dir, inv_labels[i])
    print(class_dir)
    class_imgs = next(os.walk(class_dir))[2]
    class_img = class_imgs[0]
    print(os.path.join(class_dir, class_img))
    img = Image.open(os.path.join(class_dir, class_img))
#     img = train_gen.next()[0]   #from generator instead of from file
    
#     img[:,:,1] = 0
    
    ax = axes[i//num_col, i%num_col]
    ax.imshow(np.array(img))
    
    ax.set_title('{}'.format(inv_labels[i]))
plt.tight_layout()
plt.show()

## Tresholding

In [None]:
def preprocess(im):
    im = im.astype('uint8')
    
    im_thresh = cv.cvtColor(im, cv.COLOR_BGR2GRAY)
    im_thresh = cv.adaptiveThreshold(im_thresh,255,cv.ADAPTIVE_THRESH_MEAN_C, cv.THRESH_BINARY,11,2)
    
#     threshold = 100
#     im_thresh = im_thresh.point(lambda p: p > threshold and 255)
    
    im_thresh = 255-im_thresh
    
    im[:,:,0] = im_thresh
    
#     print(im.shape)
    
    res = im.astype('float64')
    res /= 255
    return res

In [None]:
tresh_data_gen = ImageDataGenerator(validation_split=0.2,
                                    #all other augmentation stuff: 
                                    preprocessing_function = preprocess,
#                                     rescale=1/255.,       #rescale done after adaptive thresholding
                                    dtype='uint8')

#augmented
train_gen2 = tresh_data_gen.flow_from_directory(directory = training_dir,
                                                 subset = 'training',
                                                 target_size=(256,256),
                                                 color_mode='rgb',
                                                 classes=None,
                                                 class_mode='categorical',
                                                 batch_size=8,
                                                 shuffle=True,
                                                 seed=seed)

valid_gen = ImageDataGenerator(validation_split=0.2).flow_from_directory(directory = training_dir,
                                                 subset = 'validation',
                                                 target_size=(256,256),
                                                 color_mode='rgb',
                                                 classes=None,
                                                 class_mode='categorical',
                                                 batch_size=8,
                                                 shuffle=False,
                                                 seed=seed)


In [None]:
# generated = cv.resize(train_gen2.next(), (256,256), interpolation = cv.INTER_AREA)
generated = train_gen2.next()
idx = (train_gen2.batch_index - 1) * train_gen2.batch_size
print(train_gen2.filenames[idx : idx + train_gen2.batch_size])

print('-----------')
print(generated[0].shape)   #  (1,256,256,3)

img, label = generated

print(img.shape)   #  (1,256,256,3)
plt.imshow(img[0], 'gray')
plt.show()

## Data Visualization **from generator!!!**

In [None]:
num_row = 2
num_col = 7
fig, axes = plt.subplots(num_row, num_col, figsize=(8*num_row,num_col))


for i in range(num_row*num_col):
  if i < (num_row*num_col):
    
    img_batch, label = train_gen2.next()   #label is useless
    img = img_batch[0]    
    
    ax = axes[i//num_col, i%num_col]
    ax.imshow(np.array(img))

    ax.set_title('{}--{}'.format(inv_labels[i], img.shape))
plt.tight_layout()
plt.show()

## *examples adaptive thresholding from files*

In [None]:
num_row = 2
num_col = 7
fig, axes = plt.subplots(num_row, num_col, figsize=(8*num_row,num_col))
for i in range(num_row*num_col):
  if i < (num_row*num_col):
    class_dir = os.path.join(training_dir, inv_labels[i])
#     print(class_dir)
    class_imgs = next(os.walk(class_dir))[2]
    class_img = class_imgs[0]
#     print(os.path.join(class_dir, class_img))
    #img = Image.open(os.path.join(class_dir, class_img))
    img = cv.imread(os.path.join(class_dir, class_img),0)

    img = cv.adaptiveThreshold(img,255,cv.ADAPTIVE_THRESH_MEAN_C,\
            cv.THRESH_BINARY,11,2)
    

    ax = axes[i//num_col, i%num_col]
    ax.imshow(np.array(img), 'gray')
    
    ax.set_title('{}'.format(inv_labels[i]))
plt.tight_layout()
plt.show()

### *example for thresholding*

In [None]:
class_dir = os.path.join(training_dir, inv_labels[3])
class_imgs = next(os.walk(class_dir))[2]
class_img = class_imgs[0]
image = Image.open(os.path.join(class_dir, class_img))

# fig2 = plt.figure(figsize=(6, 4))
plt.imshow(np.uint8(image))

In [None]:
    
# img = Image.open(os.path.join(class_dir, class_img))
img = cv.imread(os.path.join(class_dir, class_img),0)
ret,thresh1 = cv.threshold(img,127,255,cv.THRESH_BINARY)
ret,thresh2 = cv.threshold(img,127,255,cv.THRESH_BINARY_INV)
ret,thresh3 = cv.threshold(img,127,255,cv.THRESH_TRUNC)
ret,thresh4 = cv.threshold(img,127,255,cv.THRESH_TOZERO)
ret,thresh5 = cv.threshold(img,127,255,cv.THRESH_TOZERO_INV)
titles = ['Original Image','BINARY','BINARY_INV','TRUNC','TOZERO','TOZERO_INV']
images = [img, thresh1, thresh2, thresh3, thresh4, thresh5]
for i in range(6):
    plt.subplot(2,3,i+1),plt.imshow(images[i],'gray',vmin=0,vmax=255)
    plt.title(titles[i])
    plt.xticks([]),plt.yticks([])
plt.show()

In [None]:
# img = cv.imread('sudoku.png',0)
img = cv.medianBlur(img,5)
ret,th1 = cv.threshold(img,127,255,cv.THRESH_BINARY)
th2 = cv.adaptiveThreshold(img,255,cv.ADAPTIVE_THRESH_MEAN_C,\
            cv.THRESH_BINARY,11,2)
th3 = cv.adaptiveThreshold(img,255,cv.ADAPTIVE_THRESH_GAUSSIAN_C,\
            cv.THRESH_BINARY,11,2)
titles = ['Original Image', 'Global Thresholding (v = 127)',
            'Adaptive Mean Thresholding', 'Adaptive Gaussian Thresholding']
images = [img, th1, th2, th3]
for i in range(4):
    plt.subplot(2,2,i+1),plt.imshow(images[i],'gray')
    plt.title(titles[i])
    plt.xticks([]),plt.yticks([])
plt.show()