In [None]:
import numpy as np 

import pandas as pd 
import matplotlib.pyplot as plt 
import shutil 
from os import listdir, makedirs, getcwd, remove 
from os.path import isfile, join, abspath, exists, isdir, expanduser 
import matplotlib.image as mimg

import tensorflow as tf 
from keras import layers 
from keras import models 
from keras import optimizers 
from sklearn.preprocessing import LabelEncoder 
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array 


import import_ipynb
import define_svm


In [None]:
def plot_and_save_image(running_corrects,val_running_corrects, running_loss,val_running_loss, title, direc):
    fig, (ax1, ax2) = plt.subplots(1, 2,figsize=(15,5))
    fig.suptitle(title)
    ax1.plot(running_corrects)
    ax1.plot(val_running_corrects)
    ax1.set_title('model accuracy')

    ax1.legend(['train', 'test'], loc='upper left')
    ax2.plot(running_loss)
    ax2.plot(val_running_loss)
    ax2.set_title('model loss')

    ax2.legend(['train', 'test'], loc='upper left')
    
    c = ['accuracy','loss']
    j = 0
    for ax in [ax1,ax2]:
        
        ax.set(xlabel='epochs', ylabel=c[j])
        j = j+1
    ax1.grid()
    ax2.grid()
    plt.savefig(direc)

# COLLECTING DATA 

In [None]:
species_pth = '../../../patches/train'
species = listdir(species_pth)

In [None]:
# create dataframe with species and it's path for the image patch!
lichens =[]

for sp in species:
    dr = join(join(species_pth, sp))
    al_img = listdir(dr)
    
    for imgs in al_img:
        img_dir =join(dr,imgs)
        lichens.append((sp, img_dir))

        
# create dataframe


lichens_dataframe =pd.DataFrame(data = lichens, columns = ['category', 'image'],index = None)

In [None]:
print("Total number of lichens patch in the dataset: ", len(lichens))
fl_count = lichens_dataframe['category'].value_counts()
print("lichens patch in each category: ")
print(fl_count)

In [None]:
# Let's visualize some lichen's crops from each category

# A list for storing names of some random samples from each category
random_samples = []

# Get samples fom each category 
for category in fl_count.index:
    samples = lichens_dataframe['image'][lichens_dataframe['category'] == category].sample(4).values
    for sample in samples:
        random_samples.append(sample)
print(len(random_samples))
        
# Plot the samples
f, ax = plt.subplots(4,4, figsize=(15,10))
for i,sample in enumerate(random_samples[:16]):
    ax[i//4, i%4].imshow(mimg.imread(random_samples[i]))
    ax[i//4, i%4].axis('off')
plt.show()    

In [None]:
%mkdir -p data/train
%mkdir -p data/valid
current_dir_train = join(getcwd(),'data/train')
current_dir_val = join(getcwd(),'data/valid')

for sp in species:
    join(current_dir_train),sp
    makedirs(join(current_dir_train,sp))
current_dir_val = join(getcwd(),'data/valid')

for sp in species:
    join(current_dir_val),sp
    makedirs(join(current_dir_val,sp))

    for sp in species:
    join(current_dir_val),sp
    makedirs(join(current_dir_val,sp))


# move 75% of the images in training direcory and the remaining in the validation directory
for category in fl_count.index:
    samples =lichens_dataframe['image'][lichens_dataframe['category'] == category].values
    tr_num = len(samples)*75//100
    perm = np.random.permutation(samples)
    for i in range(tr_num):
        print(i)
        name = perm[i].split('/')[-1]
        shutil.copyfile(perm[i],'data/train/'+ str(category) + '/' + name)
    print('*****************************************************')
    for i in range(tr_num,len(samples)):
        print(i)
        name = perm[i].split('/')[-1]
        shutil.copyfile(perm[i],'data/valid/'+ str(category) + '/' + name)       
    

# USING PRE-TRAINED CONVNET

In [None]:
from keras.applications import VGG16

conv_base = VGG16(weights = 'imagenet', include_top = False, input_shape = (100, 100, 3))

In [None]:
conv_base.summary()

# FEATURE EXTRACTION 

We can extract features of our images dataset using a pretrained model. This is called Feature Extraction. There are 2 ways to use this method, first one doesn't support data augmentation, but however the second method is usable with data augmentation.

In [None]:
datagen = ImageDataGenerator(rescale=1./255)
batch_size = 16

## NO DATA AUGMENTATION

In [None]:
# write a function which extracts features, and then train an SVM or another neural network

current_dir_train = join(getcwd(),'data/train')
current_dir_val = join(getcwd(),'data/valid')
def feature_extraction(directory, sample_count):
    
    # pre_allocated memory
    #features = []
    #labels_dummy = []
    features = np.zeros(shape = (sample_count,3,3,512))
    labels_dummy = np.zeros(shape = (sample_count,20))
    labels =np.zeros(shape = (sample_count,1))
    
    generator = ImageDataGenerator(rescale = 1./255).flow_from_directory(directory, target_size = (100, 100), 
                                                                            batch_size = batch_size, class_mode = 'categorical')
    
    
    
    i = 0
    
    
    print('enter in loop')
    
    for input_batch, labels_batch in generator:
        
        features_batch = conv_base.predict(input_batch)
        features[i*batch_size : (i + 1)*batch_size] = features_batch 
        labels_dummy[i*batch_size : (i + 1)*batch_size] = labels_batch
        #features.append(features_batch)
        #labels_dummy.append(labels_batch)
        
        i = i + 1
        
        if i*batch_size >= sample_count:
            break
    
    
    #features = np.array(features)
    #labels_dummy = np.array(labels_dummy)
    return features, labels_dummy
    
    
    
train_features, train_labels_dummy = feature_extraction(current_dir_train, 1200)   
    
validation_features, validation_labels_dummy = feature_extraction(current_dir_val, 400)   



    
    

In [None]:
# create labels vector 
def from_dummy_to_labels(dummy_vec):
    res = np.zeros(dummy_vec.shape[0])
    for i in range(dummy_vec.shape[0]):
        tmp = list(dummy_vec[i])
        res[i] = tmp.index(1)
    return res

In [None]:
training_labels = from_dummy_to_labels(train_labels_dummy)
validation_labels = from_dummy_to_labels(validation_labels_dummy)

train_features = np.reshape(train_features, (1200, 3 * 3 * 512))
validation_features = np.reshape(validation_features, (400, 3 * 3 * 512))

## TRAIN A SVM MODEL  WITH EXTRACTED FEATURES!

In [None]:
svc, _ , _ = define_svm.define_and_train_svm(train_features, training_labels, 'linear')
svc.score(validation_features,validation_labels)

In [None]:
from sklearn.metrics.pairwise import chi2_kernel
c = chi2_kernel(train_features)

In [None]:
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC, SVC
svc  = OneVsRestClassifier(SVC(kernel = 'precomputed'),n_jobs = -1)
svc = svc.fit(c,training_labels)




In [None]:
svc, mean , gram = define_svm.define_and_train_svm(train_features, training_labels, 'precomputed',distance = define_svm.chisquared_distance)

In [None]:
# doing some test on svm 
def test_accuracy(test_features,features,gram, test_label,svc):
    num_objects = features.shape[0]
    res = []
    prediction = []
    for i,ft in enumerate(test_features):
        pred = svc.predict(np.array([np.dot(gram, ft)]))
        prediction.append(pred[0])
        #print(i,": ",pred," : ",test_label[i])
        if(pred==test_label[i]):
            res.append(1)
        else:
            res.append(0)

    res = np.array(res).reshape(-1)
    return np.sum(res)/res.shape, np.array(prediction)


In [None]:
res, pred = test_accuracy(validation_features ,train_features,c, validation_labels, svc)

print(res)



In [None]:
# saeing he model 
import pickle
filename = 'svm_with_pre_trained_net.sav'
pickle.dump(svc, open(filename, 'wb'))

### TRAIN A NEW NEURAL NETWORK (NOT DEEP) TO SEE WHAT HAPPENS

In [None]:
model = models.Sequential()
model.add(layers.Dense(2048, activation='relu', input_dim=3 * 3 * 512))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(512, activation='relu', input_dim=3 * 3 * 512))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(128, activation='relu', input_dim=3 * 3 * 512))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(20, activation='softmax'))

In [None]:
model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-4),
              metrics=['acc'])

In [None]:
history = model.fit(train_features, train_labels_dummy,
                    epochs = 100,
                    batch_size=32,
                    validation_data=(validation_features, validation_labels_dummy))

In [None]:
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

plot_and_save_image(acc,val_acc,loss,val_loss,'accuracy and loss for model','acc_loss_pretrained_cnn')

In [None]:
np.min(val_loss )

## TRAIN A NEURAL NET WITH DATA AUGMENTATION 

In [None]:
model2 = models.Sequential()
model2.add(conv_base)
model2.add(layers.Flatten())
model2.add(layers.Dense(512, activation='relu'))
model2.add(layers.Dropout(0.5))
model2.add(layers.Dense(128, activation='relu'))
model2.add(layers.Dropout(0.3))
model2.add(layers.Dense(20, activation='softmax'))



In [None]:
model2.layers[0].trainable = False

In [None]:
model2.summary()

In [None]:
train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
        'data/train',
        target_size=(100, 100),  
        batch_size=batch_size,
        class_mode='categorical')  

validation_generator = test_datagen.flow_from_directory(
        'data/valid',
        target_size=(100, 100),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle = False
)

In [None]:
model2.compile(loss='categorical_crossentropy',
              optimizer=optimizers.Adam(lr=2e-5),
              metrics=['acc'])

In [None]:
history = model2.fit_generator(
          train_generator,
          epochs=50,
          validation_data=validation_generator)

## 

## FINE-TUNING

In [None]:
train_datagen = ImageDataGenerator(rescale=1./255,
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
        'data/train',
        target_size=(100, 100),  # all images will be resized to 240x240
        batch_size=batch_size,
        class_mode='categorical')  # more than two classes

validation_generator = test_datagen.flow_from_directory(
        'data/valid',
        target_size=(100, 100),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle = False
)

In [None]:
conv_base.summary()

In [None]:
conv_base.trainable = True

set_trainable = False
for layer in conv_base.layers:
    if layer.name == 'block5_conv1':
        set_trainable = True
    if set_trainable:
        layer.trainable = True
    else:
        layer.trainable = False

In [None]:
model3 = models.Sequential()
model3.add(layers.Flatten())
model3.add(layers.Dense(512, activation='relu'))
model3.add(layers.Dropout(0.5))
model3.add(layers.Dense(128, activation='relu'))
model3.add(layers.Dropout(0.3))
model3.add(layers.Dense(20, activation='softmax'))

In [None]:
model3.compile(loss='binary_crossentropy',optimizer=optimizers.Adam(lr=2e-5),metrics=['acc'])

In [None]:
model3.summary()

In [None]:
istory = model3.fit_generator(
    train_generator,
    steps_per_epoch=100,
    epochs=6,
    validation_data=validation_generator,
    validation_steps=50)

In [None]:
acc = istory.history['acc']
val_acc = istory.history['val_acc']
loss = istory.history['loss']
val_loss = istory.history['val_loss']
epochs = range(1, len(acc) + 1)

f, axes = plt.subplots(1,2,figsize=(14,4))

axes[0].plot(epochs, acc, 'bo', label='Training acc')
axes[0].plot(epochs, val_acc, 'b', label='Validation acc')
axes[0].legend()

axes[1].plot(epochs, loss, 'bo', label='Training loss')
axes[1].plot(epochs, val_loss, 'b', label='Validation loss')
axes[1].yaxis.set_label_position("right")
axes[1].legend()

plt.show()

In [None]:
validation_generator.reset()
y_pred = model.predict_generator(validation_generator)
y_pred = y_pred.argmax(-1)
con_mat = tf.math.confusion_matrix(validation_generator.classes, y_pred)
con_mat = np.array(con_mat)
#plot_confusion_matrix(cm = con_mat, classes = validation_generator.class_indices.keys(), normalize = False)