In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import math

In [2]:
import os
import plaidml.keras
plaidml.keras.install_backend()
os.environ["KERAS_BACKEND"] = "plaidml.keras.backend"

In [3]:
from keras import backend as K
from keras.models import Sequential, load_model
from keras.layers import Conv2D, MaxPool2D, Flatten, Dense, Dropout
from keras.callbacks import ModelCheckpoint

In [4]:
K

<module 'plaidml.keras.backend' from 'C:\\Users\\rhemo\\Anaconda3\\lib\\site-packages\\plaidml\\keras\\backend.py'>

# Reading Data

In [61]:
all_categories = os.listdir("../data")
np.random.shuffle(all_categories)

In [62]:
# categories = all_categories[:100]
# os.mkdir("../dataset")
# np.savez("../dataset/categories.npz", categories=categories)

categories = np.load("../dataset/categories.npz")['categories']

In [63]:
categories

array(['pillow.npy', 'violin.npy', 'jail.npy', 'computer.npy',
       'rifle.npy', 'full_numpy_bitmap_angel.npy', 'hockey stick.npy',
       'television.npy', 'snowman.npy', 'lantern.npy', 'smiley face.npy',
       'bathtub.npy', 'suitcase.npy', 'pizza.npy', 'helmet.npy',
       'streetlight.npy', 'triangle.npy', 'submarine.npy', 'camera.npy',
       'toilet.npy', 'mouse.npy', 'cello.npy', 'clarinet.npy',
       'couch.npy', 'asparagus.npy', 'purse.npy', 'lighthouse.npy',
       'foot.npy', 'microwave.npy', 'blackberry.npy', 'eraser.npy',
       'axe.npy', 'eyeglasses.npy', 'umbrella.npy', 'cat.npy',
       'spider.npy', 'apple.npy', 'whale.npy', 'broccoli.npy',
       'calculator.npy', 'spoon.npy', 'traffic light.npy', 'peas.npy',
       'zebra.npy', 'matches.npy', 'moon.npy', 'syringe.npy',
       'blueberry.npy', 'bush.npy', 'fence.npy', 'flamingo.npy',
       'saw.npy', 'harp.npy', 'bee.npy', 'crab.npy', 'penguin.npy',
       'snake.npy', 'clock.npy', 'cake.npy', 'hedgehog.npy',
  

In [64]:
# We take 10_000 images of each category for training, so alltogether 1M images which is not possible to load into memory
# so we will make a set of training files, each with 100_000 images, where 1000 from each category
# and save that inside dataset
 
def save_mini_dataset(i, data_for="training"):
    dataset = np.zeros((100_000, (28*28)+100))
    for j in range(len(categories)):
        image_set=np.load("../data/"+categories[j])
        dataset[j*1000:(j+1)*1000, :784] = image_set[i*1000:(i+1)*1000]
        del image_set
        y_ = np.zeros((1000, 100))
        y_[j] = 1
        dataset[j*1000:(j+1)*1000, 784:] = y_
        del y_
    np.savez("../dataset/"+data_for+"_set_"+str(i)+".npz", dataset=dataset)
    del dataset # instantly removing from memory to prevent overloading

# uncomment to save files again:
    
for i in range(0, 10_000*len(categories)//100_000):
    save_mini_dataset(i)
    
for i in range(10_000*len(categories)//100_000, 10_000*len(categories)//100_000 + 2_000*len(categories)//100_000):
    save_mini_dataset(i, data_for="testing")

# then when we train we loop over all of the files one at a time, feed in model for training over 100 times 



In [5]:
train_files = []
test_files = []
for each in os.listdir("../dataset"):
    if "testing" in each:
        test_files.append(each)
    elif "training" in each:
        train_files.append(each)

In [6]:
model_names = ["modelv1.h5", "modelv2-largerconvlayers.h5"]
selected_model = model_names[1]

In [7]:
class Classifier:
    
    def __init__(self, model):
        if model=="" or type(model) != str:
            return "Model should have a name"
        try:
            self.model = load_model(model)
            print("Loaded old model")
        except:
            self.model = Classifier.create_model()
            print("New model created")
        self.model_name = model
        self.batch_size = 512
        
    def get_model(self):
        return self.model
    
    def create_model():
        model = Sequential()
        model.add(Dropout(0.2, input_shape=(28,28,1)))
        model.add(Conv2D(32,kernel_size=7,padding='same',activation='relu'))
        model.add(MaxPool2D())
        model.add(Conv2D(64,kernel_size=5,padding='same',activation='relu'))
        model.add(MaxPool2D())
        model.add(Conv2D(128,kernel_size=3,padding='same',activation='relu'))
        model.add(MaxPool2D())
        model.add(Flatten())
        model.add(Dense(256, activation='relu'))
        model.add(Dense(100, activation='softmax'))
        model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
        return model
    
    def XY_Gen(self, files):
        while True:
            np.random.shuffle(files)
            for each in files:
                dataset = np.load("../dataset/"+each)['dataset']
                np.random.shuffle(dataset)
                X = dataset[:, :784]
                Y = dataset[:, 784:]
                X = X.reshape((X.shape[0], 28, 28, 1))
                for i in range(0, X.shape[0], self.batch_size):
                    yield(X[i:min(i+self.batch_size, X.shape[0])], Y[i:min(i+self.batch_size, X.shape[0])])
                del X, Y, dataset
    
    def fit_model(self,train_files, epochs=10):
        print("Starting model training")
        self.model.fit_generator(self.XY_Gen(train_files), epochs=epochs, steps_per_epoch=math.ceil((1000_000)//self.batch_size), verbose=1, callbacks=[ModelCheckpoint(self.model_name, monitor='acc', save_best_only=True)])
    
        

In [8]:
classifier = Classifier(selected_model)

INFO:plaidml:Opening device "opencl_amd_gfx902.0"


New model created


In [9]:
classifier.fit_model(train_files)

Starting model training
Epoch 1/10


INFO:plaidml:Analyzing Ops: 67 of 257 operations complete


 195/1953 [=>............................] - ETA: 13:40 - loss: 0.8134 - acc: 0.0140

INFO:plaidml:Analyzing Ops: 66 of 257 operations complete




KeyboardInterrupt: 