# Imports

In [1]:
from PIL import Image
import numpy as np
import os
import random
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout, BatchNormalization
import pandas as pd
import pandas.util
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
import math

## Load coordinate information of landmarks

In [2]:
landmarks = np.load('landmarks_easy.npy', allow_pickle = True)[()].copy()

# alle nicht dateinamen (unabh von translation)
unique_files = list({key[:-len("_t0.jpg")] for key in landmarks.keys()})
random.shuffle(unique_files)

# einen dataframe erstellen
df_landmarks = pd.DataFrame.from_dict(landmarks, orient = 'index')
df_landmarks['filename'] = df_landmarks.index

# write picture information in file

In [3]:
filelist = os.listdir("out_all/smallbox2/")

f= open("images.txt","w+")
for i in range(len(filelist)):
    f.write(f"{filelist[i]};{filelist[i][3:6]};out_all/smallbox2/{filelist[i]}\n")
f.close()

# Create panda dataframe from textfile

In [4]:
df = pd.read_csv("images.txt", sep = ';', names = ['filename','classname', 'image_path'], encoding = 'unicode_escape')
df = df.set_index('filename')

print(df.head())
print("shape of data frame: ", df.shape)

                    classname                             image_path
filename                                                            
DIPalp14_d_1_t0.jpg       alp  out_all/smallbox2/DIPalp14_d_1_t0.jpg
DIPalp14_d_1_t1.jpg       alp  out_all/smallbox2/DIPalp14_d_1_t1.jpg
DIPalp14_d_1_t2.jpg       alp  out_all/smallbox2/DIPalp14_d_1_t2.jpg
DIPalp14_d_1_t3.jpg       alp  out_all/smallbox2/DIPalp14_d_1_t3.jpg
DIPalp14_d_1_t4.jpg       alp  out_all/smallbox2/DIPalp14_d_1_t4.jpg
shape of data frame:  (3996, 2)


In [5]:
frames = [df, df_landmarks]
df_all = pd.concat(frames, axis = 1, sort = False)

# create training, validation and test dataframes

In [6]:
# randomly split data frame into training, validation and test data frames
num_imgs  = len(unique_files) # total number of examples
num_test  = 100# size of test set, used only once at end
num_val   = 100 # size of validation set, used to monitor training progress
num_train = num_imgs - num_test - num_val # size of training set, the (large) rest

assert num_train > 0, "Error: examples consumed by test and validation sets alone"


NUM_TRANSLATIONS = 6
all_filenames = [f"{filename}_t{i}.jpg" for filename in unique_files for i in range(NUM_TRANSLATIONS)]

test_indices = all_filenames[:NUM_TRANSLATIONS*num_test]
val_indicies = all_filenames[NUM_TRANSLATIONS*num_test:NUM_TRANSLATIONS*(num_test+num_val)]
train_indicies = all_filenames[NUM_TRANSLATIONS*(num_test+num_val):]

# construct training and testing data frames 
test1_df = df_all.loc[test_indices]
val1_df = df_all.loc[val_indicies]
train1_df = df_all.loc[train_indicies]

#assert train_df.shape[0] == num_train, "Internal error of 3-way split into train, test, val"
#print("Sizes: train =", num_train, "\tvalidation =", num_val, "\ttest =", num_test)

In [7]:
#test1_df = test_df.reset_index(drop=False)
# train1_df = train_df.reset_index(drop=False)
# val1_df = val_df.reset_index(drop=False)
train1_df


Unnamed: 0_level_0,classname,image_path,landmarks,filename
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
DIPalp17_v_5_t0.jpg,alp,out_all/smallbox2/DIPalp17_v_5_t0.jpg,"[[56.66049251704129, 39.49755078780956], [56.9...",DIPalp17_v_5_t0.jpg
DIPalp17_v_5_t1.jpg,alp,out_all/smallbox2/DIPalp17_v_5_t1.jpg,"[[59.092826244645536, 46.7945519706223], [59.4...",DIPalp17_v_5_t1.jpg
DIPalp17_v_5_t2.jpg,alp,out_all/smallbox2/DIPalp17_v_5_t2.jpg,"[[62.3359378814512, 37.06521706020531], [62.65...",DIPalp17_v_5_t2.jpg
DIPalp17_v_5_t3.jpg,alp,out_all/smallbox2/DIPalp17_v_5_t3.jpg,"[[62.60619718451834, 40.84884730314525], [62.9...",DIPalp17_v_5_t3.jpg
DIPalp17_v_5_t4.jpg,alp,out_all/smallbox2/DIPalp17_v_5_t4.jpg,"[[56.119973910907014, 43.551440333816636], [56...",DIPalp17_v_5_t4.jpg
...,...,...,...,...
DIPzei4_d_4_t1.jpg,zei,out_all/smallbox2/DIPzei4_d_4_t1.jpg,"[[35.91328857364223, 40.89679298158239], [36.1...",DIPzei4_d_4_t1.jpg
DIPzei4_d_4_t2.jpg,zei,out_all/smallbox2/DIPzei4_d_4_t2.jpg,"[[36.722648939684625, 48.38337636747458], [36....",DIPzei4_d_4_t2.jpg
DIPzei4_d_4_t3.jpg,zei,out_all/smallbox2/DIPzei4_d_4_t3.jpg,"[[36.722648939684625, 40.08743261553999], [36....",DIPzei4_d_4_t3.jpg
DIPzei4_d_4_t4.jpg,zei,out_all/smallbox2/DIPzei4_d_4_t4.jpg,"[[37.53200930572702, 38.671051974965785], [37....",DIPzei4_d_4_t4.jpg


In [8]:
def augmentation (filename, path, outfile):
    img = tf.io.read_file(path)
    img = tf.image.decode_png(img, channels = 3)
    
    img_brightness = tf.image.random_brightness(img, max_delta = 0.2)
    output_image = tf.image.encode_png(img_brightness)
    tf.io.write_file(tf.constant(outfile+path[0:-4]+"brightness1.jpg"), output_image)
    landmarks[filename[0:-4]+"brightness1.jpg"] = landmarks[filename]
    
    img_brightness = tf.image.random_brightness(img, max_delta = 0.2)
    output_image = tf.image.encode_png(img_brightness)
    tf.io.write_file(tf.constant(outfile+path[0:-4]+"brightness2.jpg"), output_image)
    landmarks[filename[0:-4]+"brightness2.jpg"] = landmarks[filename]

    img_saturation = tf.image.random_saturation(img, lower = 0.95, upper = 1.05)
    output_image = tf.image.encode_png(img_saturation)
    tf.io.write_file(tf.constant(outfile+path[0:-4]+"saturation1.jpg"), output_image)
    landmarks[filename[0:-4]+"saturation1.jpg"] = landmarks[filename]
    
    img_saturation = tf.image.random_saturation(img, lower = 0.95, upper = 1.05)
    output_image = tf.image.encode_png(img_saturation)
    tf.io.write_file(tf.constant(outfile+path[0:-4]+"saturation2.jpg"), output_image)
    landmarks[filename[0:-4]+"saturation2.jpg"] = landmarks[filename]
    
    img_hue = tf.image.random_hue(img, max_delta = 0.2)
    output_image = tf.image.encode_png(img_hue)
    tf.io.write_file(tf.constant(outfile+path[0:-4]+"hue1.jpg"), output_image)
    landmarks[filename[0:-4]+"hue1.jpg"] = landmarks[filename]
    
    img_hue = tf.image.random_hue(img, max_delta = 0.2)
    output_image = tf.image.encode_png(img_hue)
    tf.io.write_file(tf.constant(outfile+path[0:-4]+"hue2.jpg"), output_image)
    landmarks[filename[0:-4]+"hue2.jpg"] = landmarks[filename]
    
    img_gray = tf.image.rgb_to_grayscale(img)
    output_image = tf.image.encode_png(img_gray)
    tf.io.write_file(tf.constant(outfile+path[0:-4]+"gray.jpg"), output_image)
    landmarks[filename[0:-4]+"gray.jpg"] = landmarks[filename]
    
    img_contrast = tf.image.adjust_contrast(img,0.6)
    output_image = tf.image.encode_png(img_contrast)
    tf.io.write_file(tf.constant(outfile+path[0:-4]+"contrast.jpg"), output_image)
    landmarks[filename[0:-4]+"contrast.jpg"] = landmarks[filename]
                     
    img = tf.image.encode_png(img)
    tf.io.write_file(tf.constant(outfile+path), img) #copy image to same folder

# augmentation of images 
## (except test images)

In [11]:
#Testbilder müssen nicht augmentiert werden
for i in range(0,len(test1_df)):
    filename = test1_df['filename'][i]
    path = test1_df['image_path'][i]
    img = tf.io.read_file(path)
    tf.io.write_file(tf.constant("test/easy2/"+filename), img)
    
for i in range(0,len(val1_df)):
    filename = val1_df["filename"][i]
    augmentation(filename, val1_df['image_path'][i], "val/easy2/")
    
for i in range(0,len(train1_df)):
    filename = train1_df['filename'][i]
    augmentation(filename, train1_df['image_path'][i], "train/easy2/")

In [12]:
df_landmarks_augmented = pd.DataFrame.from_dict(landmarks).transpose()
df_landmarks_augmented

Unnamed: 0,landmarks
DIPalp14_d_1_t0.jpg,"[[47.43548267200521, 42.204294495544985], [48...."
DIPalp14_d_1_t1.jpg,"[[44.126195737738406, 38.52730901302632], [45...."
DIPalp14_d_1_t2.jpg,"[[42.655401544730935, 37.97576119064852], [44...."
DIPalp14_d_1_t3.jpg,"[[44.31004501186434, 47.53592344519706], [45.8..."
DIPalp14_d_1_t4.jpg,"[[51.847865251027606, 40.73350030253752], [53...."
...,...
DIPalp95_d_2_t5saturation2.jpg,"[[64.5462962865733, 39.53415605982711], [62.28..."
DIPalp95_d_2_t5hue1.jpg,"[[64.5462962865733, 39.53415605982711], [62.28..."
DIPalp95_d_2_t5hue2.jpg,"[[64.5462962865733, 39.53415605982711], [62.28..."
DIPalp95_d_2_t5gray.jpg,"[[64.5462962865733, 39.53415605982711], [62.28..."


## write new images in to text file and read into panda dataframe

In [31]:
test_files = os.listdir("test/easy2/")
val_files = os.listdir("val/easy2/out_all/smallbox2/")
train_files = os.listdir("train/easy2/out_all/smallbox2/")

f= open("test.txt","w+")
for i in range(len(test_files)):
    f.write(f"{test_files[i]};{test_files[i][3:6]};test/easy2/{test_files[i]}\n")
f.close()

f= open("val.txt","w+")
for i in range(len(val_files)):
    f.write(f"{val_files[i]};{val_files[i][3:6]};val/easy2/out_all/smallbox2/{val_files[i]}\n")
f.close()

f= open("train.txt","w+")
for i in range(len(train_files)):
    f.write(f"{train_files[i]};{train_files[i][3:6]};train/easy2/out_all/smallbox2/{train_files[i]}\n")
f.close()

# Create a pandas dataframe from a tab separated file 
test2_df = pd.read_csv("test.txt", sep = ';', names = ['filename','classname','image_path'], encoding = 'unicode_escape')
test2_df = test2_df.set_index('filename')
#print(test2_df.head())
#print("shape of data frame: ", test2_df.shape)

val2_df = pd.read_csv("val.txt", sep = ';', names = ['filename','classname','image_path'], encoding = 'unicode_escape')
val2_df = val2_df.set_index('filename')
#print(val2_df.head())
#print("shape of data frame: ", val2_df.shape)

train2_df = pd.read_csv("train.txt", sep = ';', names = ['filename','classname', 'image_path'], encoding = 'unicode_escape')
train2_df = train2_df.set_index('filename')
#print(train2_df.head())
#print("shape of data frame: ", train2_df.shape)

num_test = test2_df.shape[0]
num_train = train2_df.shape[0]
num_val = val2_df.shape[0]

In [32]:
#del test1_df['filename']
df_test = pd.merge(test1_df, test2_df, how = 'right', on = ['filename', 'classname'])
del df_test['image_path_x']
df_test.rename(columns={'image_path_y': 'image_path'}, inplace=True)

df_val = pd.merge(df_landmarks_augmented, val2_df, how = 'right', left_index=True, right_index=True)
# df_val = df_val.set_index('filename')
df_val['filename'] = df_val.index

df_train = pd.merge(df_landmarks_augmented, train2_df, how = 'right', left_index=True, right_index=True)
# df_train = df_train.set_index('filename')
df_train['filename'] = df_train.index

## add 'class' column

In [35]:
df_train

Unnamed: 0_level_0,landmarks,classname,image_path,filename
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
DIPalp14_d_1_t0.jpg,"[[47.43548267200521, 42.204294495544985], [48....",alp,train/easy2/out_all/smallbox2/DIPalp14_d_1_t0.jpg,DIPalp14_d_1_t0.jpg
DIPalp14_d_1_t0brightness1.jpg,"[[47.43548267200521, 42.204294495544985], [48....",alp,train/easy2/out_all/smallbox2/DIPalp14_d_1_t0b...,DIPalp14_d_1_t0brightness1.jpg
DIPalp14_d_1_t0brightness2.jpg,"[[47.43548267200521, 42.204294495544985], [48....",alp,train/easy2/out_all/smallbox2/DIPalp14_d_1_t0b...,DIPalp14_d_1_t0brightness2.jpg
DIPalp14_d_1_t0contrast.jpg,"[[47.43548267200521, 42.204294495544985], [48....",alp,train/easy2/out_all/smallbox2/DIPalp14_d_1_t0c...,DIPalp14_d_1_t0contrast.jpg
DIPalp14_d_1_t0gray.jpg,"[[47.43548267200521, 42.204294495544985], [48....",alp,train/easy2/out_all/smallbox2/DIPalp14_d_1_t0g...,DIPalp14_d_1_t0gray.jpg
...,...,...,...,...
DIPzeiFilzwald_v_5_t5gray.jpg,"[[43.16347787429154, 47.08235735693495], [42.6...",zei,train/easy2/out_all/smallbox2/DIPzeiFilzwald_v...,DIPzeiFilzwald_v_5_t5gray.jpg
DIPzeiFilzwald_v_5_t5hue1.jpg,"[[43.16347787429154, 47.08235735693495], [42.6...",zei,train/easy2/out_all/smallbox2/DIPzeiFilzwald_v...,DIPzeiFilzwald_v_5_t5hue1.jpg
DIPzeiFilzwald_v_5_t5hue2.jpg,"[[43.16347787429154, 47.08235735693495], [42.6...",zei,train/easy2/out_all/smallbox2/DIPzeiFilzwald_v...,DIPzeiFilzwald_v_5_t5hue2.jpg
DIPzeiFilzwald_v_5_t5saturation1.jpg,"[[43.16347787429154, 47.08235735693495], [42.6...",zei,train/easy2/out_all/smallbox2/DIPzeiFilzwald_v...,DIPzeiFilzwald_v_5_t5saturation1.jpg


In [36]:
# associate class names with a class (0 .. K-1)
classnames = df_train['classname'].unique() # all 6 species names
K = classnames.size  # 6
name2class = dict(zip(classnames, range(K))) # dictionary that maps a name to its index in classnames array
print("names and classes:", name2class)

# Add a column 'class' to data frame  with the number representing the species name
df_test['class'] = df_test['classname'].map(name2class) # new column class with number representing plant name
# print a few random example lines
#print(df_test.sample(n=5))
# Add a column 'class' to data frame  with the number representing the species name
df_val['class'] = df_val['classname'].map(name2class) # new column class with number representing plant name
# print a few random example lines
#print(df_val.sample(n=5))
# Add a column 'class' to data frame  with the number representing the species name
df_train['class'] = df_train['classname'].map(name2class) # new column class with number representing plant name
# print a few random example lines
#print(df_train.sample(n=5))

names and classes: {'alp': 0, 'com': 1, 'iss': 2, 'oel': 3, 'tri': 4, 'zei': 5}


# save dataframes for further use

In [40]:
df_test.to_pickle('test_easy2_df.pkl')
df_val.to_pickle('val_easy2_df.pkl')
df_train.to_pickle('train_easy2_df.pkl')