In [None]:
import argparse

import azureml.core
from azureml.core import Experiment,Workspace, Run
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.train.dnn import TensorFlow
from azureml.train.estimator import Estimator
from azureml.widgets import RunDetails
from collections import defaultdict
import cv2
import glob
import json
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np
from numpy import genfromtxt
import os
import pandas as pd
import pickle
from PIL import Image, ImageFilter
import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, Dense, Activation, Flatten, Dropout
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import SGD
from skimage import io as io
from skimage import exposure
from skimage.transform import resize
from skimage.color import rgb2gray
from skimage.feature import blob_dog, blob_log, blob_doh, corner_harris, corner_subpix, corner_peaks, daisy, hog

from sklearn.externals import joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.svm import SVC
import shutil
import tensorflow as tf
from time import time
import traceback
import timeit
import urllib.request

In [None]:
print(azureml.core.VERSION)

subscription_id = os.getenv("SUBSCRIPTION_ID", default="9bce0414-e6b9-4c79-b146-74018a4b09ac")
resource_group = os.getenv("RESOURCE_GROUP", default="Thesis")
workspace_name = os.getenv("WORKSPACE_NAME", default="Multi-label_classification")
workspace_region = os.getenv("WORKSPACE_REGION", default="eastus2")
try:
    ws = Workspace(subscription_id = subscription_id, resource_group = resource_group, workspace_name = workspace_name)
    # write the details of the workspace to a configuration file to the notebook library
    ws.write_config()
    print("Workspace configuration succeeded. Skip the workspace creation steps below")
except Exception as e:
    print(e)
    print("Workspace not accessible. Change your parameters or create a new workspace below")
    
cpu_cluster_name = "cpucluster"

# Verify that cluster does not exist already
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print("Found existing cpucluster")
except ComputeTargetException:
    print("Creating new cpucluster")
    
    # Specify the configuration for the new cluster
    compute_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_D2_V2",
                                                           min_nodes=0,
                                                           max_nodes=4)

    # Create the cluster with the specified name and configuration
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)
    
    # Wait for the cluster to complete, show the output log
    cpu_cluster.wait_for_completion(show_output=True)

gpu_cluster_name = "gpucluster"

# Verify that cluster does not exist already
try:
    gpu_cluster = ComputeTarget(workspace=ws, name=gpu_cluster_name)
    print("Found existing gpu cluster")
except ComputeTargetException:
    print("Creating new gpucluster")
    
    # Specify the configuration for the new cluster
    compute_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_NC6",
                                                           min_nodes=0,
                                                           max_nodes=4)
    # Create the cluster with the specified name and configuration
    gpu_cluster = ComputeTarget.create(ws, gpu_cluster_name, compute_config)

    # Wait for the cluster to complete, show the output log
    gpu_cluster.wait_for_completion(show_output=True)


In [None]:
from azureml.core import Workspace

# Create the workspace using the specified parameters
ws = Workspace.create(name = workspace_name,
                      subscription_id = subscription_id,
                      resource_group = resource_group, 
                      location = workspace_region,
                      create_resource_group = True,
                      exist_ok = True)
ws.get_details()

# write the details of the workspace to a configuration file to the notebook library
ws.write_config()

In [None]:
experiment_name = 'multi-class_classification'
exp = Experiment(workspace=ws, name=experiment_name)

In [None]:
# choose a name for your cluster
compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "cpucluster")
compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 0)
compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 4)

# This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6
vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2")


if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    if compute_target and type(compute_target) is AmlCompute:
        print('found compute target. just use it. ' + compute_name)
else:
    print('creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = vm_size,
                                                                min_nodes = compute_min_nodes, 
                                                                max_nodes = compute_max_nodes)

    # create the cluster
    compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)
    
    # can poll for a minimum number of nodes and for a specific timeout. 
    # if no min node count is provided it will use the scale settings for the cluster
    compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    
     # For a more detailed view of current AmlCompute status, use get_status()
    print(compute_target.get_status().serialize())

In [None]:
ds = ws.get_default_datastore()
print(ds.datastore_type, ds.account_name, ds.container_name)
#ds.upload(src_dir='./data', target_path='data', overwrite=True, show_progress=True)

In [None]:
script_folder = './coco-multi-label'
os.makedirs(script_folder, exist_ok=True)

In [None]:
with open("data/annotations/instances_train2014.json") as read_file:
    train = json.load(read_file)
'''
with open("data/annotations/instances_val2014.json") as read_file:
    val = json.load(read_file)
with open("data/annotations/instances_train2014.json") as read_file:
    instances = json.load(read_file)
with open("data/annotations/person_keypoints_train2014.json") as read_file:
    keypoints = json.load(read_file)
'''

In [None]:
categories = train["categories"]
#print(categories)
train_images = train["images"]
#val_images = val["images"]
print("Number of images in 2014 train: "+str(len(train_images)))
#print("Number of images in 2014 val: "+str(len(val_images)) +"\n")
train_annotations = train["annotations"]
#print(train_annotations[1:20])
#val_annotations = val["annotations"]
print("Number of annotations in 2014 train: "+str(len(train_annotations)))
#print("Number of annotations in 2014 val: "+str(len(val_annotations)) + "\n")
print("Number of annotations/image in 2014 train: "+str(len(train_annotations)/len(train_images)))
#print("Number of annotations/image in 2014 val: " + str(len(val_annotations)/len(val_images)) + "\n")
train_categories = np.zeros(100, dtype=object)
#val_categories = np.zeros(100, dtype=object)
#itemsets = [[] for i in range(581922)]
itemsets = defaultdict(list)
for annotation in train_annotations:
    train_categories[annotation['category_id']] += 1
    itemsets[annotation['image_id']].append(annotation['category_id'])
unique_itemsets = []
for entry in itemsets:
    if(entry not in unique_itemsets):
        unique_itemsets.append(entry)
print(len(unique_itemsets))
#for annotation in val_annotations:
#    val_categories[annotation['category_id']] += 1
train_categories = train_categories[train_categories != 0]
#val_categories = val_categories[val_categories != 0]
print("Label density in 2014 train: "+str(len(train_annotations)/len(train_categories)/len(train_images)))
#print("Label density in 2014 val: "+str(len(val_annotations)/len(val_categories)/len(val_images)))
print(train.keys())
print("\n")
#print(images[1:10])
#print("\n")
#print(annotations[1:10])
#print("\n")
#print(categories)
for i in range(0, len(train_categories)):
    print(str(categories[i]["id"]) + categories[i]["name"] + ": " + str(train_categories[i]))
    train_categories[i] = (categories[i]["name"], train_categories[i])
#for i in range(0, len(val_categories)):
    #print(categories[i]["name"] + ": " + str(train_categories[i]))
#    val_categories[i] = (categories[i]["name"], val_categories[i])
plt.figure(1)
plt.subplot(211)
plt.plot([i[1] for i in train_categories])
#plt.subplot(212)
#plt.plot([i[1] for i in val_categories])
plt.show()
train_categories = sorted(train_categories, key=lambda tup: tup[1], reverse=False)
#val_categories = sorted(val_categories, key=lambda tup: tup[1], reverse=False)
print("\n10 least used features \n")
print(train_categories[0:9])
print("\n")
#print(val_categories[0:9])
print("\n10 most used features \n")
print(train_categories[-10:])
print("\n")
#print(val_categories[-10:])

In [None]:
import sys
ipython_vars = ['In', 'Out', 'exit', 'quit', 'get_ipython', 'ipython_vars']
# Get a sorted list of the objects and their sizes
sorted([(x, sys.getsizeof(globals().get(x))) for x in dir() if not x.startswith('_') and x not in sys.modules and x not in ipython_vars], key=lambda x: x[1], reverse=True)

In [None]:
image_id = 49 #9,25,30,34,36,49
def findAnnotations(id, property):
    items = []
    for annotation in train_annotations: 
        if annotation[property] == id:
            items.append(annotation)
    return items
items = findAnnotations(image_id, 'image_id')

In [None]:
def findValueDictionary(dict,key,value, returnkey):
    for entry in dict:
        if entry[key] == value:
            return entry[returnkey]

image_id_string = str(image_id).zfill(12)
image = np.array(Image.open('data/train2014/COCO_train2014_'+ image_id_string +'.jpg'), dtype=np.uint8)
fig,ax = plt.subplots(1)
ax.imshow(image)
colors = np.random.random((len(categories)+10, 3))
i=0
bbox_list = []
item_list = []
for item in items:
    label = findValueDictionary(categories, 'id', item['category_id'], 'name')
    rect = patches.Rectangle((item['bbox'][0], (item['bbox'][1])), item['bbox'][2], item['bbox'][3],linewidth=1,edgecolor=colors[item['category_id']-1],facecolor='none', label= label)
    ax.add_patch(rect)
    bbox_list.append(rect)
    item_list.append(item['category_id'])
#print(items)
plt.figure(figsize=(3,4))
plt.legend(handles=bbox_list)
plt.show()

In [None]:
import argparse

import azureml.core
from azureml.core import Run
import cv2
import numpy as np
from numpy import genfromtxt
import os
from PIL import Image, ImageFilter
from skimage import io as io
from skimage import exposure
from skimage.transform import resize
from skimage.color import rgb2gray
from skimage.feature import blob_dog, blob_log, blob_doh, corner_harris, corner_subpix, corner_peaks, daisy, hog

from sklearn.externals import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
from time import time

def loadAnnotations(fileLocation):
    with open(fileLocation) as read_file:
        annotations = json.load(read_file)
    return annotations
    
def getBoundingBox(x,y,w,h,img):
    return img[y:y+h,x:x+w]

def findAnnotations(id, property, annotations):
    items = []
    for annotation in annotations: 
        if annotation[property] == id:
            items.append(annotation)
    return items

def zca_whitening_matrix(X):
    # Covariance matrix [column-wise variables]: Sigma = (X-mu)' * (X-mu) / N
    sigma = np.cov(X, rowvar=True) # [M x M]
    # Singular Value Decomposition. X = U * np.diag(S) * V
    U,S,V = np.linalg.svd(sigma)
        # U: [M x M] eigenvectors of sigma.
        # S: [M x 1] eigenvalues of sigma.
        # V: [M x M] transpose of U
    # Whitening constant: prevents division by zero
    epsilon = 1e-5
    # ZCA Whitening matrix: U * Lambda * U'
    ZCAMatrix = np.dot(U, np.dot(np.diag(1.0/np.sqrt(S + epsilon)), U.T)) # [M x M]
    return ZCAMatrix

def preProcessImage(img):
    img = np.asarray(img)
    img = rgb2gray(img)
    #img = cv2.GaussianBlur(img,(5,5),0)
    #img = cv2.medianBlur(img,5)
    #img = cv2.bilateralFilter(img,9,75,75)
    #img = cv2.blur(img,(5,5))
    #kernel = np.ones((5,5),np.float32)/25
    #img = cv2.filter2D(img,-1,kernel)
    return img

def getBoundingBoxesPictures(annotations, path):
    bounded_images = []
    bounded_annotations = []
    try:
        for annotation in annotations:
            image_id = annotation['image_id']
            image_id_string = str(image_id).zfill(12)
            image = Image.open(path + image_id_string +'.jpg')
            image = preProcessImage(image) 
            image_resized = resize(getBoundingBox(int(annotation['bbox'][0]),int(annotation['bbox'][1]),int(annotation['bbox'][2]),int(annotation['bbox'][3]),image)
                                   , (64, 64),
                           anti_aliasing=True)
            bounded_images.append(image_resized)
            bounded_annotations.append(annotation['category_id'])     
    except Exception as ex:
            print(ex)   
    bounded_images = np.asarray(bounded_images)
    bounded_annotations = np.asarray(bounded_annotations)
    return bounded_images,bounded_annotations

def calculateHogFeatures(gray_image, o, pixels, cells):
    features = hog(gray_image, orientations=o, 
                              pixels_per_cell=(pixels, pixels),
                              cells_per_block=(cells, cells), 
                              transform_sqrt=True, 
                              visualize=False, block_norm = "L2-Hys")
    return features
def calculateDaisyFeatures(gray_image):
    descs = daisy(gray_image, step=180, radius=15, rings=3, histograms=6,
                         orientations=8, visualize=False)
    descs_num = descs.shape[0] * descs.shape[1]
    return descs.reshape(descs.size).tolist()

def calculateDoG(gray_image):
    blobs_dog = blob_dog(gray_img, max_sigma=30, threshold=.1)
    blobs_dog[:, 2] = blobs_dog[:, 2] * sqrt(2)
    return blobs_dog

def calculateSIFT(sift, gray_image):
    kp = sift.detect(gray_image,None)
    return kp

def calculateFeatures(imgs):
    #sift = cv2.xfeatures2d.SIFT_create()
    hog_features = []
    dog_features = []
    daisy_features = []
    sift_features = []
    i = 0;
    for img in imgs:
        hog_features.append(calculateHogFeatures(img,8,16,1))
        #dog_features.append(calculateDoG(img))
        #sift_features.append(calculateSIFT(sift, img))
        daisy_features.append(calculateDaisyFeatures(img))
    return hog_features, daisy_features

def svmFit(x_train, y_train):
    print("Fitting the classifier to train")
    t0 = time()
    param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
                  'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
    clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'),
                       param_grid, cv=5)
    clf = clf.fit(x_train, y_train)
    print("Completed in %0.3fs" % (time() - t0))
    print("Best estimator found by grid search:")
    print(clf.best_estimator_)
    return clf

def saveModel(model, filename):
    print("Saving file...")
    joblib.dump(model, open(filename, 'wb'))
    print("File saved")
    
def loadModel(filename):
    print("loading file...")
    joblib.load(filename)
    print("Model loaded")
    
def svmPredict(x_test, y_test, model):
    print("Predicting the test set")
    t0 = time()
    y_pred = model.predict(x_test)
    print("Completed in %0.3fs" % (time() - t0))
    print(classification_report(y_test, y_pred))
    print(confusion_matrix(y_test, y_pred))
    
def randomForestFit(x_train, y_train, estimators):
    print("Fitting the classifier to train")
    t0 = time()
    rf = RandomForestClassifier(n_estimators=estimators)
    rf.fit(x_train, y_train);
    print("Completed in %0.3fs" % (time() - t0))
    return rf

def rfPredict(x_test, y_test, model):
    print("Predicting the test set")
    t0 = time()
    y_pred = model.predict(x_test)
    print("Completed in %0.3fs" % (time() - t0))
    errors = abs(y_pred - y_test)
    print("Accuracy:", accuracy_score(y_test, y_pred))
  
def constructCNN(conv_layer, layer_size, dense_layer, kernel_size, dropout, num_classes):
    NAME = "{}-conv-{}-nodes-{}-dense-{}-kernel-{}".format(conv_layer, layer_size, dense_layer, kernel_size, int(time()))
    print(NAME)
    early_stopping_monitor = EarlyStopping(patience = 4)
    model = Sequential()

    model.add(Conv2D(layer_size, (kernel_size, kernel_size), input_shape=trainset.shape[1:]))

    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    for l in range(conv_layer-1):
        model.add(Conv2D(layer_size, (kernel_size, kernel_size)))
        model.add(Activation('relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))
    if(dropout):
        model.add(Dropout(0.25))
    model.add(Flatten())

    for j in range(dense_layer):
        model.add(Dense(layer_size))
        model.add(Activation('relu'))
    if(dropout):
        model.add(Dropout(0.25))
    model.add(Dense(num_classes))
    model.add(Activation('softmax'))

    tensorboard = TensorBoard(log_dir="logs/{}".format(NAME))
    model.compile(loss='categorical_crossentropy',
        optimizer='adam',
        metrics=['accuracy'])

    return model
    
# Create a function called "chunks" with two arguments, l and n:
def chunks(l, n):
    # For item i in a range that is a length of l,
    for i in range(0, len(l), n):
        # Create an index range for l of n items:
        yield l[i:i+n]

def splitPreprocessing(splits, data_folder, path):
    labels = []
    features = np.asarray([[],[]])
    for split in splits:
        bboxes = getBoundingBoxesPictures(split, os.path.join(data_folder, path))
        splitfeatures = calculateFeatures(bboxes[0])
        features = np.append(features, splitfeatures,1)
        labels.extend(bboxes[1])
    return labels, features.tolist()

data_folder = 'data'

path = "train2014/COCO_train2014_"
valpath = "val2014/val2014/COCO_val2014_"
category = "category_id"

In [None]:
'''
train_annotations = loadAnnotations(os.path.join(data_folder, 'annotations/instances_train2014.json'))
train_annotations = train_annotations["annotations"]

val_annotations = loadAnnotations(os.path.join(data_folder, 'annotations/instances_val2014.json'))
val_annotations = val_annotations["annotations"]
'''
'''
#testsplits = list(chunks(dummy_annotations, 50))
#labels, features = splitPreprocessing(testsplits, data_folder, path)

'''
categories_subset = [2,4]
bbox = getBoundingBoxesPictures(findAnnotations(categories_subset[0], category, train_annotations),  os.path.join(data_folder, path))
imgs = bbox[0]
categories_subset.pop(0)
labels = bbox[1]
for category_id in categories_subset:
    bbox = getBoundingBoxesPictures(findAnnotations(category_id, category, train_annotations),  os.path.join(data_folder, path))
    imgs = np.concatenate((imgs,bbox[0]))
    labels = np.concatenate((labels,bbox[1]))
features = calculateFeatures(imgs)

x_train, x_test, y_train, y_test = train_test_split(
    features[0], labels, test_size=0.25, random_state=42)
svmHOG = svmFit(x_train, y_train)

svmHOGResults = svmPredict(x_test, y_test, svmHOG)

x_train, x_test, y_train, y_test = train_test_split(
    features[1], labels, test_size=0.25, random_state=42)
svmDaisy = svmFit(x_train, y_train)
svmDaisyResults = svmPredict(x_test, y_test, svmDaisy)
#saveModel(svm, 'SVM_DAISY.sav')
os.makedirs('outputs', exist_ok=True)


In [None]:
'''
categories_subset = [2,4]#[2,4,5,6,7]
bbox = getBoundingBoxesPictures(findAnnotations(categories_subset[0], category, train_annotations),  os.path.join(data_folder, path))
imgs = bbox[0]
categories_subset.pop(0)
labels = bbox[1]
for category_id in categories_subset:
    bbox = getBoundingBoxesPictures(findAnnotations(category_id, category, train_annotations),  os.path.join(data_folder, path))
    imgs = np.concatenate((imgs,bbox[0]))
    labels = np.concatenate((labels,bbox[1]))
features = calculateFeatures(imgs)
'''
x_train, x_test, y_train, y_test = train_test_split(
    features[0], labels, test_size=0.25, random_state=42)
rfHOG = randomForestFit(x_train, y_train, 100)
rfHOG_results = rfPredict(x_test, y_test, rfHOG)
x_train, x_test, y_train, y_test = train_test_split(
    features[1], labels, test_size=0.25, random_state=42)
rfDaisy = randomForestFit(x_train, y_train, 100)
rfDaisy_results = rfPredict(x_test, y_test, rfDaisy)

In [None]:
# width 578.2023976316078
# heigth 483.5494085111049

path = "data/train2014/COCO_train2014_"
def getPictures(itemsets, path, instance):
    images = []
    image_ids = []
    image_tags = []
    avg_width, avg_height = (0,0)
    try:
        for itemset in itemsets:
            print(itemset)
            print(itemsets[itemset])
            if(instance in itemsets[itemset]):
                image_id_string = str(itemset).zfill(12)
                image_ids.append(str(itemset))
                image = Image.open(path + image_id_string +'.jpg')
                processed_img = preProcessImage(image)
                image.close()
                resized_image = resize(processed_img, (350, 292),anti_aliasing=True)
                images.append(resized_image)
                image_tags.append(itemsets[itemset])
    except Exception as ex:
            print(ex)   
    return image_ids,images,image_tags

pictureData = getPictures(itemsets, path, 18)
#print(pictureData)

In [None]:
'''
dummy_annotations = np.concatenate((findAnnotations(87, category, train_annotations),findAnnotations(89, category, train_annotations)))
bbox = getBoundingBoxesPictures(dummy_annotations, os.path.join(data_folder, path))

x_train, x_test, y_train, y_test = train_test_split(
    bbox[0], bbox[1], test_size=0.25, random_state=42)
x_train, x_test = x_train / 255.0, x_test / 255.0

from keras.datasets import mnist
(mnist_train, y_minst_train), (mnist_test, y_minst_test) = mnist.load_data()

categories_subset = [2,4,5,6]
bbox = getBoundingBoxesPictures(findAnnotations(categories_subset[0], category, train_annotations),  os.path.join(data_folder, path))
imgs = bbox[0]
categories_subset.pop(0)
labels = bbox[1]
for category_id in categories_subset:
    bbox = getBoundingBoxesPictures(findAnnotations(category_id, category, train_annotations),  os.path.join(data_folder, path))
    imgs = np.concatenate((imgs,bbox[0]))
    labels = np.concatenate((labels,bbox[1]))

#images = np.asarray(pictureData[1])
x_train, x_test, y_train, y_test = train_test_split(
    imgs, labels, test_size=0.25, random_state=42)
x_train = x_train/255
x_test = x_test/255

for (i,value) in enumerate(y_train):
    if value == 2:
        y_train[i] = 0
    if value == 4:
        y_train[i] = 1
    if value == 5:
        y_train[i] = 2
    if value == 6:
        y_train[i] = 3
for (i,value) in enumerate(y_test):
    if value == 2:
         y_test[i] = 0
    if value == 4:
         y_test[i] = 1
    if value == 5:
         y_test[i] = 2
    if value == 6:
         y_test[i] = 2

trainset = np.expand_dims(x_train, axis=len(x_train.shape))

whitened_images = []
for image in x_train:
    whitened_images.append(zca_whitening_matrix(image))
whitened_images = np.asarray(whitened_images)
whitened_images = np.expand_dims(whitened_images, axis=3)


datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    zca_whitening=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True)
#datagen.fit(trainset)
'''
sess = tf.Session()
'''
dense_layers = [0, 1, 2]
layer_sizes = [32, 64, 128]
conv_layers = [1, 2, 3]
'''
dense_layers = [1]
layer_sizes = [64]
conv_layers = [3]
kernel_sizes = [3,4]
y_categorical = to_categorical(y_train)

for dense_layer in dense_layers:
    for layer_size in layer_sizes:
        for conv_layer in conv_layers:
            for kernel_size in kernel_sizes:
                model = constructCNN(conv_layer, layer_size, dense_layer, kernel_size, True, 4)
                model.fit(trainset.astype(np.float32), y_categorical, batch_size=70, epochs=20, validation_split=0.1, callbacks = [tensorboard])
'''


print(trainset.shape)
print(y_train.shape)
mnist_train = np.expand_dims(mnist_train, axis=len(mnist_train.shape))
ylabels_train = to_categorical(y_train)
#hot_encoded = tf.one_hot(y_train, 4)
#model.fit_generator(datagen.flow(trainset, y_train, batch_size=70), epochs=15)
model.fit(trainset, y_train, batch_size=70, epochs=5, validation_split=0.1, callbacks = [tensorboard])
'''
#model.evaluate(augmented_x_train, y_test)


In [None]:
#print(x_train[0])
print(y_train[0:30])
fig,ax = plt.subplots(1)
ax.imshow(x_train[14], cmap='gray')
print(augmented_x_train.shape)
print(mnist_train.shape)
print(trainset.shape)

In [None]:
%%writefile $script_folder/train.py

import argparse

import azureml.core
from azureml.core import Run

import numpy as np
from numpy import genfromtxt
import os
from PIL import Image, ImageFilter
from skimage.transform import resize
from skimage.color import rgb2gray
from skimage.feature import blob_dog, blob_log, blob_doh, corner_harris, corner_subpix, corner_peaks, daisy, hog

from sklearn.externals import joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
import time

def loadAnnotations(fileLocation):
    with open(fileLocation) as read_file:
        annotations = json.load(read_file)
    return annotations
    
def getBoundingBox(x,y,w,h,img):
    return img[y:y+h,x:x+w]

def findAnnotations(id, property, annotations):
    items = []
    for annotation in annotations: 
        if annotation[property] == id:
            items.append(annotation)
    return items

def zca_whitening_matrix(X):
    # Covariance matrix [column-wise variables]: Sigma = (X-mu)' * (X-mu) / N
    sigma = np.cov(X, rowvar=True) # [M x M]
    # Singular Value Decomposition. X = U * np.diag(S) * V
    U,S,V = np.linalg.svd(sigma)
        # U: [M x M] eigenvectors of sigma.
        # S: [M x 1] eigenvalues of sigma.
        # V: [M x M] transpose of U
    # Whitening constant: prevents division by zero
    epsilon = 1e-5
    # ZCA Whitening matrix: U * Lambda * U'
    ZCAMatrix = np.dot(U, np.dot(np.diag(1.0/np.sqrt(S + epsilon)), U.T)) # [M x M]
    return ZCAMatrix

def preProcessImage(img):
    img = np.asarray(img)
    img = rgb2gray(img)
    #img = cv2.GaussianBlur(img,(5,5),0)
    #img = cv2.medianBlur(img,5)
    #img = cv2.bilateralFilter(img,9,75,75)
    #img = cv2.blur(img,(5,5))
    #kernel = np.ones((5,5),np.float32)/25
    #img = cv2.filter2D(img,-1,kernel)
    return img

def getBoundingBoxesPictures(annotations, path):
    bounded_images = []
    bounded_annotations = []
    try:
        for annotation in annotations:
            image_id = annotation['image_id']
            image_id_string = str(image_id).zfill(12)
            image = Image.open(path + image_id_string +'.jpg')
            image = preProcessImage(image) 
            image_resized = resize(getBoundingBox(int(annotation['bbox'][0]),int(annotation['bbox'][1]),int(annotation['bbox'][2]),int(annotation['bbox'][3]),image)
                                   , (64, 64),
                           anti_aliasing=True)
            bounded_images.append(image_resized)
            bounded_annotations.append(annotation['category_id'])     
    except Exception as ex:
            print(ex)   
    bounded_images = np.asarray(bounded_images)
    bounded_annotations = np.asarray(bounded_annotations)
    return bounded_images,bounded_annotations

def calculateHogFeatures(gray_image, o, pixels, cells):
    features = hog(gray_image, orientations=o, 
                              pixels_per_cell=(pixels, pixels),
                              cells_per_block=(cells, cells), 
                              transform_sqrt=True, 
                              visualize=False, block_norm = "L2-Hys")
    return features
def calculateDaisyFeatures(gray_image):
    descs = daisy(gray_image, step=180, radius=15, rings=3, histograms=6,
                         orientations=8, visualize=False)
    descs_num = descs.shape[0] * descs.shape[1]
    return descs.reshape(descs.size).tolist()

def calculateDoG(gray_image):
    blobs_dog = blob_dog(gray_img, max_sigma=30, threshold=.1)
    blobs_dog[:, 2] = blobs_dog[:, 2] * sqrt(2)
    return blobs_dog

def calculateSIFT(sift, gray_image):
    kp = sift.detect(gray_image,None)
    return kp

def calculateFeatures(imgs):
    #sift = cv2.xfeatures2d.SIFT_create()
    hog_features = []
    dog_features = []
    daisy_features = []
    sift_features = []
    i = 0;
    for img in imgs:
        hog_features.append(calculateHogFeatures(img,8,16,1))
        #dog_features.append(calculateDoG(img))
        #sift_features.append(calculateSIFT(sift, img))
        daisy_features.append(calculateDaisyFeatures(img))
    return hog_features, daisy_features

def svmFit(x_train, y_train):
    print("Fitting the classifier to train")
    t0 = time.time()
    param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
                  'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
    clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'),
                       param_grid, cv=5)
    clf = clf.fit(x_train, y_train)
    print("Completed in %0.3fs" % (time.time() - t0))
    print("Best estimator found by grid search:")
    print(clf.best_estimator_)
    return clf

def saveModel(model, filename):
    print("Saving file...")
    joblib.dump(model, open(filename, 'wb'))
    print("File saved")
    
def loadModel(filename):
    print("loading file...")
    joblib.load(filename)
    print("Model loaded")
    
def svmPredict(x_test, y_test, model):
    print("Predicting the test set")
    t0 = time.time()
    y_pred = model.predict(x_test)
    print("Completed in %0.3fs" % (time.time() - t0))
    print(classification_report(y_test, y_pred))
    print(confusion_matrix(y_test, y_pred))
    print(accuracy_score(y_test, y_pred))
    
def randomForestFit(x_train, y_train, estimators, random_state):
    print("Fitting the classifier to train")
    t0 = time.time()
    rf = RandomForestRegressor(n_estimators = estimators, random_state = random_state)
    rf.fit(x_train, y_train);
    print("Completed in %0.3fs" % (time.time() - t0))
    return rf

def rfPredict(x_test, y_test, model):
    print("Predicting the test set")
    t0 = time.time()
    y_pred = model.predict(x_test)
    print("Completed in %0.3fs" % (time.time() - t0))
    errors = abs(y_pred - y_test)
    print('Mean Absolute Error:', round(np.mean(errors), 2), 'degrees.')
    # Calculate mean absolute percentage error (MAPE)
    mape = 100 * (errors / y_test)
    # Calculate and display accuracy
    accuracy = 100 - np.mean(mape)
    print('Accuracy:', round(accuracy, 2), '%.')
    
    
# Create a function called "chunks" with two arguments, l and n:
def chunks(l, n):
    # For item i in a range that is a length of l,
    for i in range(0, len(l), n):
        # Create an index range for l of n items:
        yield l[i:i+n]

def splitPreprocessing(splits, data_folder, path):
    labels = []
    features = np.asarray([[],[]])
    for split in splits:
        bboxes = getBoundingBoxesPictures(split, os.path.join(data_folder, path))
        splitfeatures = calculateFeatures(bboxes[0])
        features = np.append(features, splitfeatures,1)
        labels.extend(bboxes[1])
    return labels, features.tolist()

parser = argparse.ArgumentParser()
parser.add_argument('--data-folder', type=str, dest='data_folder', help='data folder mounting point')
args = parser.parse_args()
data_folder = os.path.join(args.data_folder, 'data')
print('Data folder:', data_folder)
run = Run.get_context()

path = "train2014/COCO_train2014_"
category = "category_id"
train_annotations = loadAnnotations(os.path.join(data_folder, 'annotations/instances_train2014.json'))
train_annotations = train_annotations["annotations"]
'''
dummy_annotations = np.concatenate((findAnnotations(87, category, train_annotations),findAnnotations(89, category, train_annotations)))
bbox_train = getBoundingBoxesPictures(dummy_annotations, os.path.join(data_folder, path))

'''
categories_subset = [2,4,5,6]
bbox = getBoundingBoxesPictures(findAnnotations(categories_subset[0], category, train_annotations),  os.path.join(data_folder, path))
imgs = bbox[0]
categories_subset.pop(0)
labels = bbox[1]
for category_id in categories_subset:
    bbox = getBoundingBoxesPictures(findAnnotations(category_id, category, train_annotations),  os.path.join(data_folder, path))
    imgs = np.concatenate((imgs,bbox[0]))
    labels = np.concatenate((labels,bbox[1]))
features = calculateFeatures(imgs)

#splits = list(chunks(train_annotations, 20000))
#labels, features = splitPreprocessing(splits, data_folder, path)
print("[2,4,5,6]")
x_train, x_test, y_train, y_test = train_test_split(
    features[0], labels, test_size=0.25, random_state=42)
print("HOG")
svm = svmFit(x_train, y_train)
#saveModel(svm, 'SVM_DAISY.sav')
run.log('prediction', svmPredict(x_test, y_test, svm))
os.makedirs('outputs', exist_ok=True)
joblib.dump(value=svm, filename='outputs/SVM.pkl')
joblib.dump(value=features, filename='outputs/features.npy')

x_train, x_test, y_train, y_test = train_test_split(
    features[1], labels, test_size=0.25, random_state=42)
print("DAISY")
svm = svmFit(x_train, y_train)
#saveModel(svm, 'SVM_DAISY.sav')
run.log('prediction', svmPredict(x_test, y_test, svm))

In [None]:
%%writefile $script_folder/neural_network.py

import argparse

import azureml.core
from azureml.core import Run

import numpy as np
from numpy import genfromtxt
import os
from PIL import Image, ImageFilter
import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, Dense
from keras.callbacks import EarlyStopping
from skimage.transform import resize
from skimage.color import rgb2gray
from skimage.feature import blob_dog, blob_log, blob_doh, corner_harris, corner_subpix, corner_peaks, daisy, hog

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix

import tensorflow as tf
import time

def loadAnnotations(fileLocation):
    with open(fileLocation) as read_file:
        annotations = json.load(read_file)
    return annotations
    
def getBoundingBox(x,y,w,h,img):
    return img[y:y+h,x:x+w]

def findAnnotations(id, property, annotations):
    items = []
    for annotation in annotations: 
        if annotation[property] == id:
            items.append(annotation)
    return items

def zca_whitening_matrix(X):
    # Covariance matrix [column-wise variables]: Sigma = (X-mu)' * (X-mu) / N
    sigma = np.cov(X, rowvar=True) # [M x M]
    # Singular Value Decomposition. X = U * np.diag(S) * V
    U,S,V = np.linalg.svd(sigma)
        # U: [M x M] eigenvectors of sigma.
        # S: [M x 1] eigenvalues of sigma.
        # V: [M x M] transpose of U
    # Whitening constant: prevents division by zero
    epsilon = 1e-5
    # ZCA Whitening matrix: U * Lambda * U'
    ZCAMatrix = np.dot(U, np.dot(np.diag(1.0/np.sqrt(S + epsilon)), U.T)) # [M x M]
    return ZCAMatrix

def preProcessImage(img):
    img = np.asarray(img)
    img = rgb2gray(img)
    #img = cv2.GaussianBlur(img,(5,5),0)
    #img = cv2.medianBlur(img,5)
    #img = cv2.bilateralFilter(img,9,75,75)
    #img = cv2.blur(img,(5,5))
    #kernel = np.ones((5,5),np.float32)/25
    #img = cv2.filter2D(img,-1,kernel)
    return img

def getBoundingBoxesPictures(annotations, path):
    bounded_images = []
    bounded_annotations = []
    try:
        for annotation in annotations:
            image_id = annotation['image_id']
            image_id_string = str(image_id).zfill(12)
            image = Image.open(path + image_id_string +'.jpg')
            image = preProcessImage(image) 
            image_resized = resize(getBoundingBox(int(annotation['bbox'][0]),int(annotation['bbox'][1]),int(annotation['bbox'][2]),int(annotation['bbox'][3]),image)
                                   , (64, 64),
                           anti_aliasing=True)
            bounded_images.append(image_resized)
            bounded_annotations.append(annotation['category_id'])     
    except Exception as ex:
            print(ex)   
    bounded_images = np.asarray(bounded_images)
    bounded_annotations = np.asarray(bounded_annotations)
    return bounded_images,bounded_annotations

def saveModel(model, filename):
    print("Saving file...")
    joblib.dump(model, open(filename, 'wb'))
    print("File saved")
    
def loadModel(filename):
    print("loading file...")
    joblib.load(filename)
    print("Model loaded")
    
def cnn_model_fn(features, labels, mode):
    """Model function for CNN."""
    # Input Layer
    input_layer = tf.reshape(features["x"], [-1, 64, 64, 1])

    # Convolutional Layer #1
    conv1 = tf.layers.conv2d(
      inputs=input_layer,
      filters=32,
      kernel_size=[5, 5],
      padding="same",
      activation=tf.nn.relu)

    # Pooling Layer #1
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)
    # Convolutional Layer #2 and Pooling Layer #2
    conv2 = tf.layers.conv2d(
      inputs=pool1,
      filters=64,
      kernel_size=[5, 5],
      padding="same",
      activation=tf.nn.relu)
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
    # Dense Layer
    pool2_flat = tf.reshape(pool2, [-1, 16 * 16 * 64])
    dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
    dropout = tf.layers.dropout(inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)

    # Logits Layer
    logits = tf.layers.dense(inputs=dropout, units=90)
    predictions = {
        # Generate predictions (for PREDICT and EVAL mode)
        "classes": tf.argmax(input=logits, axis=1),
        # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
        # `logging_hook`.
        "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
    }

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # Calculate Loss (for both TRAIN and EVAL modes)
    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)

    # Configure the Training Op (for TRAIN mode)
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
        train_op = optimizer.minimize(
            loss=loss,
            global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

    # Add evaluation metrics (for EVAL mode)
    eval_metric_ops = {
      "accuracy": tf.metrics.accuracy(
          labels=labels, predictions=predictions["classes"])
    }
    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
    
# Create a function called "chunks" with two arguments, l and n:
def chunks(l, n):
    # For item i in a range that is a length of l,
    for i in range(0, len(l), n):
        # Create an index range for l of n items:
        yield l[i:i+n]

def splitPreprocessing(splits, data_folder, path):
    labels = []
    features = np.asarray([[],[]])
    for split in splits:
        bboxes = getBoundingBoxesPictures(split, os.path.join(data_folder, path))
        splitfeatures = calculateFeatures(bboxes[0])
        features = np.append(features, splitfeatures,1)
        labels.extend(bboxes[1])
    return labels, features.tolist()

parser = argparse.ArgumentParser()
parser.add_argument('--data-folder', type=str, dest='data_folder', help='data folder mounting point')
args = parser.parse_args()
data_folder = os.path.join(args.data_folder, 'data')
print('Data folder:', data_folder)
run = Run.get_context()

path = "train2014/COCO_train2014_"
category = "category_id"
train_annotations = loadAnnotations(os.path.join(data_folder, 'annotations/instances_train2014.json'))
train_annotations = train_annotations["annotations"]

categories_subset = [2,4,5,6,7]
bbox = getBoundingBoxesPictures(findAnnotations(categories_subset[0], category, train_annotations),  os.path.join(data_folder, path))
imgs = bbox[0]
categories_subset.pop(0)
labels = bbox[1]
for category_id in categories_subset:
    bbox = getBoundingBoxesPictures(findAnnotations(category_id, category, train_annotations),  os.path.join(data_folder, path))
    imgs = np.concatenate((imgs,bbox[0]))
    labels = np.concatenate((labels,bbox[1]))

#images = np.asarray(pictureData[1])
x_train, x_test, y_train, y_test = train_test_split(
    imgs, labels, test_size=0.25, random_state=42)
x_train = x_train/255
x_test = x_test/255

coco_classifier = tf.estimator.Estimator(
    model_fn=cnn_model_fn, model_dir="/tmp/coco_convnet_model")
tensors_to_log = {"probabilities": "softmax_tensor"}

logging_hook = tf.train.LoggingTensorHook(
    tensors=tensors_to_log, every_n_iter=50)
# Train the model
train_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"x": x_train},
    y=y_train,
    batch_size=200,
    num_epochs=None,
    shuffle=True)

# train one step and display the probabilties
coco_classifier.train(
    input_fn=train_input_fn,
    steps=1,
    hooks=[logging_hook])
coco_classifier.train(input_fn=train_input_fn, steps=20000)#1000 steps default
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"x": x_test},
    y=y_test,
    num_epochs=1,
    shuffle=False)
eval_results = coco_classifier.evaluate(input_fn=eval_input_fn)
print(eval_results)
run.log('eval_results', eval_results)

In [None]:
import shutil
shutil.copy('utils.py', script_folder)

In [None]:
from azureml.train.estimator import Estimator


script_params = {
    '--data-folder': ds.as_mount(),
}


est = Estimator(source_directory=script_folder,
                script_params=script_params,
                compute_target=compute_target,
                entry_script='train.py',
                conda_packages=['scikit-learn', 'scikit-image', 'pillow', 'opencv', 'numpy'])

run = exp.submit(config=est)
run
'''

keras_est = TensorFlow(source_directory=script_folder,
                       script_params=script_params,
                       compute_target=compute_target,
                       entry_script='neural_network.py',
                       pip_packages=['keras', 'scikit-learn', 'scikit-image', 'pillow'])

run = exp.submit(config=keras_est)
run
'''
from azureml.widgets import RunDetails
RunDetails(run).show()

run.wait_for_completion(show_output=True) # specify True for a verbose log

print(run.get_metrics())

print(run.get_file_names())

# register model 
model = run.register_model(model_name='svm_hog', model_path='outputs/svm_hog.pkl')
print(model.name, model.id, model.version, sep = '\t')

In [None]:
for r in exp.get_runs():  
    print(r.id, r.get_status())
    if r.get_status() not in ['Complete', 'Failed']:
        r.cancel()

# if you know the run id, you can "rehydrate" the run
from azureml.core import get_run
#r = get_run(experiment=exp, run_id="multi-class_classification_1552234551_7c1e71ae", rehydrate=True)
# check the returned run type and status
print(type(r), r.get_status())

# you can cancel a run if it hasn't completed or failed
#if r.get_status() not in ['Complete', 'Failed']:
    #r.cancel()

In [None]:
gray_img = rgb2gray(image)
color = ('b','g','r')
plt.figure()
for i,col in enumerate(color):
    histr = cv2.calcHist([image],[i],None,[256],[0,256])
    plt.plot(histr,color = col)
    plt.xlim([0,256])
plt.show()


In [None]:
#df.head()
#print(x_train[0:9].values)
#print(y_train[0:9])

In [None]:
#print(features[0][0])
#print(features[1])
'''
fig, ax = plt.subplots(1, figsize=(8, 4))

hog_image_rescaled = exposure.rescale_intensity(features[0][0][1], in_range=(0, 10))

ax.axis('off')
ax.imshow(hog_image_rescaled, cmap=plt.cm.gray)
ax.set_title('Histogram of Oriented Gradients')
plt.show()
'''

In [None]:
'''
blobs_log = blob_log(gray_img, max_sigma=30, num_sigma=10, threshold=.1)

blobs_log[:, 2] = blobs_log[:, 2] * sqrt(2)

blobs_dog = blob_dog(gray_img, max_sigma=30, threshold=.1)
blobs_dog[:, 2] = blobs_dog[:, 2] * sqrt(2)

blobs_doh = blob_doh(gray_img, max_sigma=30, threshold=.01)

blobs_list = [blobs_log, blobs_dog, blobs_doh]
colors = ['yellow', 'lime', 'red']
titles = ['Laplacian of Gaussian', 'Difference of Gaussian',
          'Determinant of Hessian']
sequence = zip(blobs_list, colors, titles)

fig, axes = plt.subplots(1, 3, figsize=(9, 3), sharex=True, sharey=True)
ax = axes.ravel()

for idx, (blobs, color, title) in enumerate(sequence):
    ax[idx].set_title(title)
    ax[idx].imshow(image, interpolation='nearest')
    for blob in blobs:
        y, x, r = blob
        c = plt.Circle((x, y), r, color=color, linewidth=2, fill=False)
        ax[idx].add_patch(c)
    ax[idx].set_axis_off()

plt.tight_layout()
plt.show()
'''

In [None]:
'''
fd, hog_image = hog(image, orientations=8, pixels_per_cell=(16, 16),
                    cells_per_block=(1, 1), visualize=True, multichannel=True)

fig, ax = plt.subplots(1, figsize=(8, 4))

hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 10))

ax.axis('off')
ax.imshow(hog_image_rescaled, cmap=plt.cm.gray)
ax.set_title('Histogram of Oriented Gradients')
plt.show()
'''

In [None]:
'''
print(gray_img)
coords = corner_peaks(corner_harris(gray_img), min_distance=5)

fig, (ax) = plt.subplots(1, figsize=(8, 4))

ax.axis('off')
ax.imshow(image)
ax.plot(coords[:, 1], coords[:, 0], 'or', ms=4)
'''

In [None]:
'''
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
sift = cv2.xfeatures2d.SIFT_create()
kp = sift.detect(gray,None)

siftimg=cv2.drawKeypoints(gray,kp,img)
plt.figure()
plt.axis('off')
plt.imshow(siftimg)
plt.show()
'''