In [8]:
import operator
import cv2
import numpy as np
import os
from random import shuffle
from tqdm import tqdm

TRAIN_DIR = '../res/Proper'
MODELS_DIR = '/Models'
IMG_SIZE_X = 160
IMG_SIZE_Y = 120
LR = 1e-3
labels = os.listdir(TRAIN_DIR)
LABELS_COUNT = len(labels)
MODEL_NAME = 'cactusifier-{}-{}-{}-10conv2filter.model'.format(LR, '2conv-basic','treshold_15')

labelCounts = {}
for directory in labels:
    files = os.listdir(os.path.join(TRAIN_DIR, directory))
    labelCounts[directory] = len(files)

print(sum(labelCounts.values()), 'images')
labelCounts = sorted(labelCounts.items(), key=operator.itemgetter(1), reverse = True)
labelCounts

48640 images


[('A Retusus', 3136),
 ('E Horiz', 2560),
 ('Th Bicolor', 2560),
 ('A Kotschoubeyanus', 2112),
 ('L Williamsii', 1792),
 ('S Disciformis', 1728),
 ('R Beguinii', 1664),
 ('A Myriostigma', 1664),
 ('A valdezii', 1536),
 ('N Conoidea', 1472),
 ('Th Heterochromus', 1472),
 ('A Trigonus', 1408),
 ('Ef ochoterenaus', 1344),
 ('E Horizontalonius', 1280),
 ('Lobivia pampana', 1280),
 ('H Texensis', 1216),
 ('Ep Greggii', 1216),
 ('A lloydii', 1216),
 ('C Echinus', 1216),
 ('T nikolae', 1152),
 ('Thelocephala challensis', 1152),
 ('Th Multicephalus', 1152),
 ('N senilis', 1088),
 ('A ritteri', 1088),
 ('T Valdezianus', 1088),
 ('N subgibbosa', 1088),
 ('M Candida', 1088),
 ('A Fissuratus', 1024),
 ('Th Hex', 1024),
 ('Th lloydii', 1024),
 ('A scaphar', 960),
 ('A Coahuilense', 960),
 ('Ec Pectinatus', 960),
 ('Thelocephala glabrescens', 960),
 ('M lasiacantha', 960)]

In [2]:
def get_hot_one_encoded_label(label):
    h1e = np.zeros(LABELS_COUNT)
    h1e[labels.index(label)] = 1
    return h1e

In [3]:
def get_label_from_h1e(h1e):
    index = np.where(h1e == 1)[0][0]
    return labels[index]

In [4]:
def get_label_from_index(index):
    return labels[index]

In [5]:
def create_train_data():
    training_data = []
    counter = 0
    for directory in labels:
        print(counter, directory)
        labelDirectory = os.path.join(TRAIN_DIR, directory)
        files = os.listdir(labelDirectory)
        for img in files:
            label = get_hot_one_encoded_label(directory)
            path = os.path.join(labelDirectory, img)
            try:
                img = cv2.resize(cv2.imread(path, cv2.IMREAD_COLOR), (IMG_SIZE_X, IMG_SIZE_Y))
            except Exception as e:
                print(path)
                print(str(e))
            training_data.append([np.array(img), np.array(label)])
        counter = counter + 1
    print('Shuffling')
    shuffle(training_data)
    print('Saving')
    #np.save('train_data.npy', training_data)
    print('Done!')
    return training_data

In [6]:
train_data = create_train_data()

0 A Coahuilense
1 A Fissuratus
2 A Kotschoubeyanus
3 A lloydii
4 A Myriostigma
5 A Retusus
6 A ritteri
7 A scaphar
8 A Trigonus
9 A valdezii
10 C Echinus
11 E Horiz
12 E Horizontalonius
13 Ec Pectinatus
14 Ef ochoterenaus
15 Ep Greggii
16 H Texensis
17 L Williamsii
18 Lobivia pampana
19 M Candida
20 M lasiacantha
21 N Conoidea
22 N senilis
23 N subgibbosa
24 R Beguinii
25 S Disciformis
26 T nikolae
27 T Valdezianus
28 Th Bicolor
29 Th Heterochromus
30 Th Hex
31 Th lloydii
32 Th Multicephalus
33 Thelocephala challensis
34 Thelocephala glabrescens
Shuffling
Saving
Done!


In [7]:
import tflearn
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression

import tensorflow as tf
tf.reset_default_graph()

convnet = input_data(shape=[None, IMG_SIZE_X, IMG_SIZE_Y, 3], name='input')

convnet = conv_2d(convnet, 32, 2, activation='relu')
convnet = max_pool_2d(convnet, 2)

convnet = conv_2d(convnet, 64, 2, activation='relu')
convnet = max_pool_2d(convnet, 2)

convnet = conv_2d(convnet, 32, 2, activation='relu')
convnet = max_pool_2d(convnet, 2)

convnet = conv_2d(convnet, 64, 2, activation='relu')
convnet = max_pool_2d(convnet, 2)

convnet = conv_2d(convnet, 32, 2, activation='relu')
convnet = max_pool_2d(convnet, 2)

convnet = conv_2d(convnet, 64, 2, activation='relu')
convnet = max_pool_2d(convnet, 2)

convnet = conv_2d(convnet, 32, 2, activation='relu')
convnet = max_pool_2d(convnet, 2)

convnet = conv_2d(convnet, 64, 2, activation='relu')
convnet = max_pool_2d(convnet, 2)

convnet = conv_2d(convnet, 32, 2, activation='relu')
convnet = max_pool_2d(convnet, 2)

convnet = conv_2d(convnet, 64, 2, activation='relu')
convnet = max_pool_2d(convnet, 2)


convnet = fully_connected(convnet, 1024, activation='relu')
convnet = dropout(convnet, 0.8)

convnet = fully_connected(convnet, LABELS_COUNT, activation='softmax')
convnet = regression(convnet, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='targets')

model = tflearn.DNN(convnet, tensorboard_dir='log')


curses is not supported on this machine (please install/reinstall curses for an optimal experience)


In [None]:
if os.path.exists('{}.meta'.format(MODEL_NAME)):
    model.load(os.path.join(MODELS_DIR, MODEL_NAME))
    print("Model", MODEL_NAME, "loaded!")

In [13]:
trainCountRatio = 0.8
trainSize = int(round(len(train_data)*trainCountRatio))
test = train_data[:-trainSize]
train = train_data[-trainSize:]

In [14]:
X = np.array([i[0] for i in train]).reshape(-1, IMG_SIZE_X, IMG_SIZE_Y, 3)
Y = [i[1] for i in train]

test_x = np.array([i[0] for i in test]).reshape(-1, IMG_SIZE_X, IMG_SIZE_Y, 3)
test_y = [i[1] for i in test]

In [16]:
for i in range(100):
    model.fit({'input': X}, {'targets': Y}, n_epoch=5, validation_set=({'input': test_x}, {'targets': test_y}), 
        snapshot_step=500, show_metric=True, run_id=MODEL_NAME)
    model.save(MODEL_NAME)

Training Step: 152875  | total loss: 0.13165 | time: 15.845s
| Adam | epoch: 252 | loss: 0.13165 - acc: 0.9540 -- iter: 17088/38912


KeyboardInterrupt: 

In [17]:
model.save(MODEL_NAME)

In [28]:
TEST_PATH = '../res/Test'
TEST_SAMPLE_PATH = os.path.join(TEST_PATH, 'retusus.jpg')
testImage = np.array(cv2.resize(cv2.imread(TEST_SAMPLE_PATH, cv2.IMREAD_COLOR), (IMG_SIZE_X, IMG_SIZE_Y)))
testImage = testImage.reshape(-1, IMG_SIZE_X, IMG_SIZE_Y, 3)
result = model.predict(testImage)
s = sorted(enumerate(result[0]), key=lambda x: x[1])
bestMatches = s[-5:]
for match in bestMatches:
    print(get_label_from_index(match[0]), str(round(match[1]*100, 2)) + '%')

R Beguinii 0.0%
Th Hex 0.0%
A ritteri 0.0%
Ep Greggii 0.0%
A Retusus 100.0%


In [None]:
#tensorboard --logdir=foo:E:\Programming\Python\Cactusifier\app\log