In [40]:
import operator
import cv2
import numpy as np
import os
from random import shuffle
from tqdm import tqdm

TRAIN_DIR = '../res/Proper'
IMG_SIZE_X = 160
IMG_SIZE_Y = 120
LR = 1e-3
labels = os.listdir(TRAIN_DIR)
LABELS_COUNT = len(labels)
MODEL_NAME = 'cactusifier-{}-{}-8conv2filter.model'.format(LR, '2conv-basic')

labelCounts = {}
for directory in labels:
    files = os.listdir(os.path.join(TRAIN_DIR, directory))
    labelCounts[directory] = len(files)

print(sum(labelCounts.values()), 'images')
labelCounts = sorted(labelCounts.items(), key=operator.itemgetter(1), reverse = True)
labelCounts

80448 images


[('A Retusus', 3136),
 ('E Horiz', 2560),
 ('Th Bicolor', 2560),
 ('L Williamsii', 1792),
 ('A Kotsch', 1728),
 ('S Disciformis', 1728),
 ('R Beguinii', 1664),
 ('A Myriostigma', 1664),
 ('A valdezii', 1536),
 ('Th Heterochromus', 1472),
 ('N Conoidea', 1472),
 ('A Trigonus', 1408),
 ('Ef ochoterenaus', 1344),
 ('Lobivia pampana', 1280),
 ('E Horizontalonius', 1280),
 ('H Texensis', 1216),
 ('A lloydii', 1216),
 ('C Echinus', 1216),
 ('Thelocephala challensis', 1152),
 ('Th Multicephalus', 1152),
 ('Ep Greggii', 1152),
 ('T nikolae', 1152),
 ('N subgibbosa', 1088),
 ('M Candida', 1088),
 ('N senilis', 1088),
 ('A ritteri', 1088),
 ('T Valdezianus', 1088),
 ('Th Hex', 1024),
 ('A Fissuratus', 1024),
 ('Th lloydii', 1024),
 ('Thelocephala glabrescens', 960),
 ('A Coahuilense', 960),
 ('M lasiacantha', 960),
 ('Ec Pectinatus', 960),
 ('A scaphar', 960),
 ('M Formosa', 896),
 ('Ec Knippelianus', 896),
 ('H kunzei', 896),
 ('Ef Zacatecasensis', 896),
 ('Th Conothelos', 896),
 ('T Pseudopect

In [2]:
def get_hot_one_encoded_label(label):
    h1e = np.zeros(LABELS_COUNT)
    h1e[labels.index(label)] = 1
    return h1e

In [3]:
def get_label_from_h1e(h1e):
    index = np.where(h1e == 1)[0][0]
    return labels[index]

In [4]:
def get_label_from_index(index):
    return labels[index]

In [41]:
def create_train_data():
    training_data = []
    counter = 1
    for directory in labels:
        print(counter, directory)
        labelDirectory = os.path.join(TRAIN_DIR, directory)
        files = os.listdir(labelDirectory)
        for img in files:
            label = get_hot_one_encoded_label(directory)
            path = os.path.join(labelDirectory, img)
            try:
                img = cv2.resize(cv2.imread(path, cv2.IMREAD_COLOR), (IMG_SIZE_X, IMG_SIZE_Y))
            except Exception as e:
                print(path)
                print(str(e))
            training_data.append([np.array(img), np.array(label)])
        counter = counter + 1
    print('Shuffling')
    shuffle(training_data)
    print('Saving')
    #np.save('train_data.npy', training_data)
    print('Done!')
    return training_data

In [6]:
train_data = create_train_data()

1 A Asterias
2 A Coahuilense
3 A Fissuratus
4 A Kotsch
5 A lloydii
6 A Myriostigma
7 A Ornatum
8 A Retusus
9 A ritteri
10 A scaphar
11 A Trigonus
12 A valdezii
13 C columna-alba
14 C Echinus
15 C recurvata
16 Cylindropuntia
17 E Horiz
18 E Horizontalonius
19 E ingens
20 Ec Knippelianus
21 Ec Pectinatus
22 Ef Coptonogonus
23 Ef Multicostatus
24 Ef ochoterenaus
25 Ef spinosus
26 Ef Zacatecasensis
27 Em mariposensis
28 Em Unguispinus
29 Ep Greggii
30 Ep micromeris
31 Gl Uncinatus
32 H heinrichianus
33 H kunzei
34 H simulans
35 H Texensis
36 Horridocactus confinis
37 L alberto-vojtechii
38 L Principis
39 L Williamsii
40 Lobivia pampana
41 M andersoniana
42 M Candida
43 M densispina
44 M Formosa
45 M Gummifera
46 M hermosana
47 M jaliscana
48 M lasiacantha
49 M luethyi
50 M perezdelarosae
51 M Picta
52 M Theresae
53 M wagneriana
54 N Conoidea
55 N senilis
56 N subgibbosa
57 Neoporteria chilensis
58 R Beguinii
59 R mandragora
60 S Disciformis
61 T graminispinus
62 T krainzianus
63 T nikolae


In [42]:
import tflearn
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression

import tensorflow as tf
tf.reset_default_graph()

convnet = input_data(shape=[None, IMG_SIZE_X, IMG_SIZE_Y, 3], name='input')

convnet = conv_2d(convnet, 32, 2, activation='relu')
convnet = max_pool_2d(convnet, 2)

convnet = conv_2d(convnet, 64, 2, activation='relu')
convnet = max_pool_2d(convnet, 2)

convnet = conv_2d(convnet, 32, 2, activation='relu')
convnet = max_pool_2d(convnet, 2)

convnet = conv_2d(convnet, 64, 2, activation='relu')
convnet = max_pool_2d(convnet, 2)

convnet = conv_2d(convnet, 32, 2, activation='relu')
convnet = max_pool_2d(convnet, 2)

convnet = conv_2d(convnet, 64, 2, activation='relu')
convnet = max_pool_2d(convnet, 2)

convnet = conv_2d(convnet, 32, 2, activation='relu')
convnet = max_pool_2d(convnet, 2)

convnet = conv_2d(convnet, 64, 2, activation='relu')
convnet = max_pool_2d(convnet, 2)

convnet = fully_connected(convnet, 1024, activation='relu')
convnet = dropout(convnet, 0.8)

convnet = fully_connected(convnet, LABELS_COUNT, activation='softmax')
convnet = regression(convnet, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='targets')

model = tflearn.DNN(convnet, tensorboard_dir='log')


In [None]:
if os.path.exists('{}.meta'.format(MODEL_NAME)):
    model.load(MODEL_NAME)
    print("Model", MODEL_NAME, "loaded!")

In [8]:
trainCountRatio = 0.8
trainSize = int(round(len(train_data)*trainCountRatio))
test = train_data[:-trainSize]
train = train_data[-trainSize:]

In [9]:
X = np.array([i[0] for i in train]).reshape(-1, IMG_SIZE_X, IMG_SIZE_Y, 3)
Y = [i[1] for i in train]

test_x = np.array([i[0] for i in test]).reshape(-1, IMG_SIZE_X, IMG_SIZE_Y, 3)
test_y = [i[1] for i in test]

In [43]:
for i in range(100):
    model.fit({'input': X}, {'targets': Y}, n_epoch=5, validation_set=({'input': test_x}, {'targets': test_y}), 
        snapshot_step=500, show_metric=True, run_id=MODEL_NAME)
    model.save(MODEL_NAME)

Training Step: 352099  | total loss: 0.45939 | time: 72.080s
| Adam | epoch: 350 | loss: 0.45939 - acc: 0.9061 -- iter: 64320/64358
Training Step: 352100  | total loss: 0.43384 | time: 80.223s
| Adam | epoch: 350 | loss: 0.43384 - acc: 0.9092 | val_loss: 0.15320 - val_acc: 0.9509 -- iter: 64358/64358
--
---------------------------------
Run id: cactusifier-0.001-2conv-basic-8conv2filter.model
Log directory: log/


KeyboardInterrupt: 

In [44]:
model.save(MODEL_NAME)

In [67]:
TEST_PATH = '../res/Test'
TEST_SAMPLE_PATH = os.path.join(TEST_PATH, 'sdisciformis.jpg')
testImage = np.array(cv2.resize(cv2.imread(TEST_SAMPLE_PATH, cv2.IMREAD_COLOR), (IMG_SIZE_X, IMG_SIZE_Y)))
testImage = testImage.reshape(-1, IMG_SIZE_X, IMG_SIZE_Y, 3)
result = model.predict(testImage)
s = sorted(enumerate(result[0]), key=lambda x: x[1])
bestMatches = s[-5:]
for match in bestMatches:
    print(get_label_from_index(match[0]), str(round(match[1]*100, 2)) + '%')

L Williamsii 0.0%
T Pseudopectinatus 0.0%
H Texensis 0.0%
A Retusus 0.01%
A Myriostigma 99.99%


In [69]:
bestMatches[4][0]

5

In [70]:
labels

['A Asterias',
 'A Coahuilense',
 'A Fissuratus',
 'A Kotsch',
 'A lloydii',
 'A Myriostigma',
 'A Ornatum',
 'A Retusus',
 'A ritteri',
 'A scaphar',
 'A Trigonus',
 'A valdezii',
 'C columna-alba',
 'C Echinus',
 'C recurvata',
 'Cylindropuntia',
 'E Horiz',
 'E Horizontalonius',
 'E ingens',
 'Ec Knippelianus',
 'Ec Pectinatus',
 'Ef Coptonogonus',
 'Ef Multicostatus',
 'Ef ochoterenaus',
 'Ef spinosus',
 'Ef Zacatecasensis',
 'Em mariposensis',
 'Em Unguispinus',
 'Ep Greggii',
 'Ep micromeris',
 'Gl Uncinatus',
 'H heinrichianus',
 'H kunzei',
 'H simulans',
 'H Texensis',
 'Horridocactus confinis',
 'L alberto-vojtechii',
 'L Principis',
 'L Williamsii',
 'Lobivia pampana',
 'M andersoniana',
 'M Candida',
 'M densispina',
 'M Formosa',
 'M Gummifera',
 'M hermosana',
 'M jaliscana',
 'M lasiacantha',
 'M luethyi',
 'M perezdelarosae',
 'M Picta',
 'M Theresae',
 'M wagneriana',
 'N Conoidea',
 'N senilis',
 'N subgibbosa',
 'Neoporteria chilensis',
 'R Beguinii',
 'R mandragora'

In [None]:
#tensorboard --logdir=foo:E:\Programming\Python\Cactusifier\app\log