# Imports

In [1]:
from PIL import Image
import numpy as np
import os
import random
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout, BatchNormalization
import pandas as pd
import pandas.util
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
import math
import glob

# Hyperparameters

In [2]:
IMAGE_FOLDER = 'images_classification/'

TEST_FOLDER = 'classification_grey/test/'
TRAIN_FOLDER = 'classification_grey/train/'
VAL_FOLDER = 'classification_grey/val/'

NUM_TRANSLATIONS = 9 # Number of random tranlations that are applied to each segment
NUM_SEGMENTS = 3     # An odd-numbered number of segments that determines which parts are being cut.
                     # The segments overlie one another to 50%. 
        
VAL_TRAIN_RATIO = 1/10 #ratio of images in the validation dataset to images in the train dataset

## Load coordinate information of landmarks

In [3]:
image_paths = glob.glob(IMAGE_FOLDER + '*.jpg')

# get all unique filenames
unique_files = list({key[:-len("_s0_t0.jpg")] for key in image_paths})
random.shuffle(unique_files) #randomly shuffle these filenames

# write picture information in file

In [4]:
filelist = os.listdir(IMAGE_FOLDER)

f= open("images.txt","w+")
for i in range(len(filelist)):
    f.write(f"{filelist[i]};{filelist[i][3:6]};{IMAGE_FOLDER}{filelist[i]}\n")
f.close()

# Create panda dataframe from textfile

In [5]:
df = pd.read_csv("images.txt", sep = ';', names = ['filename','classname', 'image_path'], encoding = 'unicode_escape')
df = df.set_index('filename')

print(df.head())
print("shape of data frame: ", df.shape)

                                    classname  \
filename                                        
DIPiss13_d_1_s1_t0.jpg                    iss   
DIPtriAufschlogersoge_v_4_s0_t7.jpg       tri   
DIPtriAufschlogersoge_v_4_s1_t0.jpg       tri   
DIPzei3_d_4_s1_t3.jpg                     zei   
DIPcom2_v_2_s0_t7.jpg                     com   

                                                                            image_path  
filename                                                                                
DIPiss13_d_1_s1_t0.jpg                    images_classification/DIPiss13_d_1_s1_t0.jpg  
DIPtriAufschlogersoge_v_4_s0_t7.jpg  images_classification/DIPtriAufschlogersoge_v_...  
DIPtriAufschlogersoge_v_4_s1_t0.jpg  images_classification/DIPtriAufschlogersoge_v_...  
DIPzei3_d_4_s1_t3.jpg                      images_classification/DIPzei3_d_4_s1_t3.jpg  
DIPcom2_v_2_s0_t7.jpg                      images_classification/DIPcom2_v_2_s0_t7.jpg  
shape of data frame:  (19980, 2)


# determine test images

In [6]:
images = np.array([key[len(IMAGE_FOLDER):-len("_0_s0_t0.jpg")] for key in image_paths]) #get all filenames without additional information
genet, num_genet = np.unique(images, return_counts=True) #get all clones of one genet
num_genet = num_genet // (NUM_SEGMENTS * (NUM_TRANSLATIONS+1)) #update number by taking the segments and translations applied to each image into account
lcm_count = np.lcm.reduce(num_genet) #reduce to lowest common multiple
test_idx = np.random.randint(low=1, high=lcm_count, size=genet.shape) #randomly determine indices for the test dataset
test_idx = (test_idx % num_genet) + 1

#get test images
test_images = [f"{genet[i]}_{test_idx[i]}_s{s}_t{t}.jpg" for i in range(test_idx.shape[0]) for s in range(NUM_SEGMENTS) for t in range(NUM_TRANSLATIONS+1)]
print(len(test_images))

4080


# determine train and validation images and create training, validation and test dataframes

In [7]:
# split data further into training, validation data frames

all_filenames = [image_path[len(IMAGE_FOLDER):] for image_path in image_paths] #get all filenames
non_test_filenames = [image for image in all_filenames if image not in test_images] #get all filenames apart from the ones that belong to test images
random.shuffle(non_test_filenames) #shuffle these filenames randomly

num_val = int(VAL_TRAIN_RATIO * len(non_test_filenames)) #set the number of images in the validation data set

val_images = non_test_filenames[:num_val] #get validation images
train_images = non_test_filenames[num_val:] #get training images

# construct training, validation and testing data frames 
test_df = df.loc[test_images]
val_df = df.loc[val_images]
train_df = df.loc[train_images]

# Function for image augmentation

In [8]:
def augmentation (filename, path, outfile):
    img = tf.io.read_file(path)
    img = tf.image.decode_png(img, channels = 3)
    
    img_brightness = tf.image.random_brightness(img, max_delta = 0.2)
    output_image = tf.image.encode_png(img_brightness)
    tf.io.write_file(tf.constant(outfile+filename[0:-4]+"brightness1.jpg"), output_image)
    
    img_brightness = tf.image.random_brightness(img, max_delta = 0.2)
    output_image = tf.image.encode_png(img_brightness)
    tf.io.write_file(tf.constant(outfile+filename[0:-4]+"brightness2.jpg"), output_image)
    
    img_contrast = tf.image.adjust_contrast(img,0.6)
    output_image = tf.image.encode_png(img_contrast)
    tf.io.write_file(tf.constant(outfile+filename[0:-4]+"contrast.jpg"), output_image)
                     
    img = tf.image.encode_png(img)
    tf.io.write_file(tf.constant(outfile+filename), img) #copy image to same folder

# augmentation of images 
## (only training images)

In [9]:
#transform test images to greyscale
for i in range(0,len(test_images)):
    filename = test_images[i]
    path = IMAGE_FOLDER+filename
    img = tf.io.read_file(path)
    img = tf.image.decode_png(img, channels = 3)
    img_gray = tf.image.rgb_to_grayscale(img)
    output_image = tf.image.encode_png(img_gray)
    tf.io.write_file(tf.constant(TEST_FOLDER+filename), output_image)

#transform validation images to greyscale
for i in range(0,len(val_images)):
    filename = val_images[i]
    path = IMAGE_FOLDER+filename
    img = tf.io.read_file(path)
    img = tf.image.decode_png(img, channels = 3)
    img_gray = tf.image.rgb_to_grayscale(img)
    output_image = tf.image.encode_png(img_gray)
    tf.io.write_file(tf.constant(VAL_FOLDER+filename), output_image)
    
#transform training images to greyscale
for i in range(0,len(train_images)):
    filename = train_images[i]
    path = IMAGE_FOLDER+filename
    img = tf.io.read_file(path)
    img = tf.image.decode_png(img, channels = 3)
    img_gray = tf.image.rgb_to_grayscale(img)
    output_image = tf.image.encode_png(img_gray)
    tf.io.write_file(tf.constant(TRAIN_FOLDER+filename), output_image)

#augment training images
train_files = os.listdir(TRAIN_FOLDER)
for i in range(len(train_files)):
    filename = train_files[i]
    augmentation(filename, TRAIN_FOLDER+filename, TRAIN_FOLDER)

## write new images into text file and read into panda dataframe

In [10]:
test_files = os.listdir(TEST_FOLDER)
val_files = os.listdir(VAL_FOLDER)
train_files = os.listdir(TRAIN_FOLDER)

random.shuffle(train_files)

f= open("test.txt","w+")
for i in range(len(test_files)):
    f.write(f"{test_files[i]};{test_files[i][3:6]};{TEST_FOLDER}{test_files[i]}\n")
f.close()

f= open("val.txt","w+")
for i in range(len(val_files)):
    f.write(f"{val_files[i]};{val_files[i][3:6]};{VAL_FOLDER}{val_files[i]}\n")
f.close()

f= open("train.txt","w+")
for i in range(len(train_files)):
    f.write(f"{train_files[i]};{train_files[i][3:6]};{TRAIN_FOLDER}{train_files[i]}\n")
f.close()

# Create a pandas dataframe from a tab separated file 
test_df = pd.read_csv("test.txt", sep = ';', names = ['filename','classname','image_path'], encoding = 'unicode_escape')
test_df = test_df.set_index('filename')

val_df = pd.read_csv("val.txt", sep = ';', names = ['filename','classname','image_path'], encoding = 'unicode_escape')
val_df = val_df.set_index('filename')

train_df = pd.read_csv("train.txt", sep = ';', names = ['filename','classname', 'image_path'], encoding = 'unicode_escape')
train_df = train_df.set_index('filename')

## add 'class' column

In [12]:
# associate class names with a class (0 .. K-1)
classnames = train_df['classname'].unique() # all 6 species names
K = classnames.size  # 6
name2class = dict(zip(classnames, range(K))) # dictionary that maps a name to its index in classnames array
print("names and classes:", name2class)

# Add a column 'class' to data frame  with the number representing the species name
test_df['class'] = test_df['classname'].map(name2class) # new column class with number representing plant name

# Add a column 'class' to data frame  with the number representing the species name
val_df['class'] = val_df['classname'].map(name2class) # new column class with number representing plant name
# Add a column 'class' to data frame  with the number representing the species name
train_df['class'] = train_df['classname'].map(name2class) # new column class with number representing plant name

names and classes: {'alp': 0, 'iss': 1, 'zei': 2, 'oel': 3, 'com': 4, 'tri': 5}


# save dataframes for further use

In [13]:
test_df.to_pickle('grey_classification_test.pkl')
val_df.to_pickle('grey_classification_val.pkl')
train_df.to_pickle('grey_classification_train.pkl')