# Setup

In [None]:
# Common imports
import sys
import os
import random
import sklearn
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import PIL
from google.cloud import storage

# to make this notebook's output stable across runs
np.random.seed(42)
tf.random.set_seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib.pyplot as plt

# Ignore useless warnings (see SciPy issue #5998)
import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")

# Google Cloud Storage Properties
bucket_name = 'iot-lego-sorter.appspot.com'
folder = 'datasets'
storage_client = storage.Client.from_service_account_json('service_account.json')
bucket = storage_client.bucket(bucket_name)

# Directories
localDir = 'images/lego/'

# Images
imgWidth = 64
imgHeight = 64

# List available buckets.

In [None]:
buckets = list(storage_client.list_buckets())
print(buckets)

# Download files from Google Storage

Get available files in datasets.

In [None]:
blobs = storage_client.list_blobs(bucket_name, prefix=folder)
fileNameByBrickId = {}

for blob in blobs:
    nameNoFolder = blob.name[len(folder)+1:]
    split = nameNoFolder.split('/')
    
    if(len(split) > 1):
        id = split[0];
        fileDir = blob.name;
        
        if(id in fileNameByBrickId):
            fileNameByBrickId[id].append(fileDir)
        else:
            fileNameByBrickId[id] = [fileDir]
print('Retrieved all filenames')

In [None]:
# Metode til at Downloade billeder hvis de ikke allerede er downloaded.
def downloadFile(fileName):
    nameNoFolder = fileName[len(folder)+1:]
    testFileDir = localDir + nameNoFolder
    testDir = os.path.dirname(testFileDir)
    
    if not os.path.isfile(testFileDir):
        if not os.path.exists(testDir):
            os.makedirs(testDir)
        blob = bucket.blob(fileName)
        blob.download_to_filename(testFileDir)
    
    return testFileDir

In [None]:

localFileNameByBrickId = {}
for key in fileNameByBrickId.keys():
    print(key)
    localFileNameByBrickId[key] = []
    listLenStr = str(len(fileNameByBrickId[key]))
    for index, fileName in enumerate(fileNameByBrickId[key]):
        print(str(index+1).zfill(len(listLenStr)) + '/' + listLenStr + ' - ' + fileName + (' ' * 256), end='\r')
        localFileNameByBrickId[key].append(downloadFile(fileName))
    print(listLenStr + '/' + listLenStr + ' - Finished downloading' + (' ' * 256))
print('\nFinished all downloads')

# Make images into multidimensional arrays

In [None]:
# Metode til at konvertere billede til multidimensionelt array.
def convertImageToArray(fileName):
    an_image = PIL.Image.open(fileName).convert('L')
    image_sequence = an_image.getdata()
    return np.array(image_sequence).reshape(imgWidth, imgHeight)

In [None]:
IndexToBrickId = {}
brickIdImageKeyPairs = []
# Sammensætter index & billeder
for index, key in enumerate(localFileNameByBrickId.keys()):
    print(key)
    IndexToBrickId[index] = key
    listLenStr = str(len(localFileNameByBrickId[key]))
    for index2, fileName in enumerate(localFileNameByBrickId[key]):
        print(str(index2+1).zfill(len(listLenStr)) + '/' + listLenStr + ' - ' + fileName + (' ' * 256), end='\r')
        brickIdImageKeyPairs.append((index, convertImageToArray(fileName)))
    print(listLenStr + '/' + listLenStr + ' - Finished converting\n')
print('\nFinished converting all')

# Randomize order

In [None]:
# Tilføjer billeder & indexes & shuffle inden
IndexesForBrickId = []
images = []
random.shuffle(brickIdImageKeyPairs)
for item in brickIdImageKeyPairs:
    IndexesForBrickId.append(item[0])
    images.append(item[1])
    

# Splitting datasets and normalizing


In [None]:
# Tager en procentdel af datasættet til træning og en procentdel til test.
percentageToTakeTrainFull = int(len(IndexesForBrickId) * 0.8)

# Her skalerer vi pixel intensiteten til mellem 0-1, konverterer til float ved at dividerer med 255
images = np.asarray(images) / 255.

print('Train ' + str(percentageToTakeTrainFull) + '/' + str(len(IndexesForBrickId)))
X_train_full = np.asarray(images[:percentageToTakeTrainFull])
y_train_full = np.asarray(IndexesForBrickId[:percentageToTakeTrainFull])

X_test = np.asarray(images[percentageToTakeTrainFull:])
y_test = np.asarray(IndexesForBrickId[percentageToTakeTrainFull:])


In [None]:
# Størrelsen på træningssætet & dimensionerne samt datatype
print(X_train_full.shape)
# Hver pixel intensity er repræsenteret en float mellem 0 - 1
print(X_train_full.dtype)

In [None]:
percentageToTakeForValidation = int(len(X_train_full) * 0.1)
print('Length of Validation set ' + str(percentageToTakeForValidation) + ' / ' + str(len(X_train_full)))

# Splitter træningssættet til en validation og en lidt mindre træningssæt
X_valid, X_train = X_train_full[:percentageToTakeForValidation], X_train_full[percentageToTakeForValidation:]
y_valid, y_train = y_train_full[:percentageToTakeForValidation], y_train_full[percentageToTakeForValidation:]

In [None]:
print("Training Set shape: {0}".format(X_train.shape))
print("Test Set shape: {0}".format(X_test.shape))
print("Valid Set Shape: {0}".format(X_valid.shape))


# Build a model using the Sequential API

### Method to create model with parameters

In [None]:
def build_model(var_activation='relu',var_optimizer='adam', var_neurons='50', var_hiddenL=2, var_lr=0.005, var_momentum = 0.5):

    model = keras.models.Sequential()
    # Input layer:
    # Flatten layer konverterer hvert input billede til et 1 dimensionelt array.
    model.add(keras.layers.Flatten(input_shape=[imgWidth, imgHeight]))

    # Hidden layers:
    # Tilføjer hidden layers
    for h in range(var_hiddenL):
        model.add(keras.layers.Dense(var_neurons, activation=var_activation)),

    # Output layer.
    # Laget skal indeholde en neuron pr. klasse, dvs. i det her tilfælde 3 neurons
    # Da det er multiclass classification skal vi bruge softmax activation function
    # Det sørger for, at de estimerede sandsynligheder er mellem 0 og 1.
    model.add(keras.layers.Dense(3, activation="softmax"))
    # Modellen bliver kompileret
    if var_optimizer.lower() == 'sgd':
        model.compile(loss="sparse_categorical_crossentropy",
        optimizer=keras.optimizers.SGD(lr=var_lr, momentum=var_momentum),
        metrics=["accuracy"])
    elif var_optimizer.lower() == 'adam': 
        
        model.compile(loss="sparse_categorical_crossentropy",
                  optimizer=keras.optimizers.Adam(learning_rate=var_lr),
                  metrics=["accuracy"])
    else:
                model.compile(loss="sparse_categorical_crossentropy",
                  optimizer=var_optimizer,
                  metrics=["accuracy"])
    return model


In [None]:
## RandomizedSearchCV, prøver og at finde de bedste parametrer. 

from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV


kerasModel = KerasClassifier(build_model, epochs=30)

# Vælger de forskellige parameter der skal blive brugt i forbindelse med RandomizedSearchCV
_optimizers=['SGD', 'Adam']
_neurons=[60, 80, 100]
_batch_size=[16,32,64]
_activations=['relu','selu']
_hiddenLayer=[2, 4]
_lr=[0.005, 0.010, 0.0005]
_momentum=[0.5, 0.7]
params=dict(var_activation=_activations,
            var_optimizer=_optimizers,
            batch_size=_batch_size,
            var_neurons=_neurons,
            var_hiddenL=_hiddenLayer,
            var_lr=_lr,
            var_momentum=_momentum)

rscv = RandomizedSearchCV(kerasModel, param_distributions=params, cv=3,n_iter=10)
rscv_results = rscv.fit(X_train, y_train, validation_data=(X_valid, y_valid))
## Får printet ud de bedste score & parameterere
print('Best score is: {} using {}'.format(rscv_results.best_score_,
rscv_results.best_params_))


In [None]:
model_sgd = build_model('relu', 'sgd', 100, 2, 0.005, 0.5)
hidden1_sgd = model_sgd.layers[1]

weights_sgd, biases_sgd = hidden1_sgd.get_weights()
print("Weights:\n {0}\n\nShape of weights {1}\n".format(weights_sgd, weights_sgd.shape))
print("Biases:\n {0}\n\nShape of biases {1}".format(biases_sgd, biases_sgd.shape))

## Train the model (SGD)

In [None]:
print(y_train)
# Laver earlyStopping og træner modellen. Earlystopping ruller tilbage til den bedste model
earlyStopping_cb = keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)
history = model_sgd.fit(X_train, y_train, epochs=30, batch_size=32,
                    validation_data=(X_valid, y_valid), 
                    callbacks=[earlyStopping_cb])

In [None]:
pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1)
plt.show()

## Evaluate the SGD model.

In [None]:
model_sgd.evaluate(X_test, y_test)

In [None]:
# Predictions med de første 3 instanser af test-sættet
X_new = X_test[:3]
y_pred = np.argmax(model_sgd.predict(X_new), axis=-1)
y_pred

In [None]:
# Ser hvis det er korrekt.
y_new = y_test[:3]
y_new

In [None]:
# Legoklods billede & ID til legoklods
newClassList = np.array(list(IndexToBrickId.values()))[y_pred]
for i in range(3):
    plt.imshow(X_new[i], cmap='gray')
    plt.show()
    print(newClassList[i])

# Trainining the Model (Adam) & evaluating it.

In [None]:
model_adam = build_model('relu', 'adam', 60, 4, 0.0005)


In [None]:
history = model_adam.fit(X_train, y_train, epochs=30, batch_size=16,
                    validation_data=(X_valid, y_valid))

In [None]:
pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1)
plt.show()

In [None]:
model_adam.evaluate(X_test, y_test)