In [3]:
'''
Utility function 

'''
import numpy as np

from matplotlib.pyplot import show, imshow


def to_one_hot_enc(arr):
    
    one_hot_enc = []

    for element in arr:
        
        sample = np.array([0, 0, 0, 0])
        sample[int(element)] = 1

        one_hot_enc.append(sample)
    
    return np.array(one_hot_enc)


def from_one_hot_enc(arr):

    lst = []
    
    for element in arr:

      idx = list(element).index(max(element))

      lst.append(idx)

    return np.array(lst)


def reshape_images(arr):

    img_list = []

    for img in arr:
        img = np.reshape(np.array(img),(50,50))
        img_list.append(img)

    return np.array(img_list)


def num_samples_in_classes(arr):

    samples_in_class = np.bincount(arr.astype(int))

    return samples_in_class[0], samples_in_class[1], samples_in_class[2], samples_in_class[3]


def show_img(img):
    img = np.reshape(np.array(img),(50,50))
    imshow(img,cmap='gray')
    show()


def get_indices(arr, val):
    idxs = np.where(arr == val)[0]
    return idxs


def balance_set(x_data, y_data):

  occurrence_lst = num_samples_in_classes(y_data)

  max_samples_of_class = max(occurrence_lst)

  y_data_balanced = []
  x_data_balanced = []

  for clas, occurrence in enumerate(occurrence_lst):
  
    sample_idxs = get_indices(y_data, clas)

    multiplier = int(max_samples_of_class/occurrence)
  
    for sample_idx in sample_idxs:

      temp_lst_x = [x_data[sample_idx]]*multiplier
      temp_lst_y = [y_data[sample_idx]]*multiplier

      x_data_balanced += temp_lst_x
      y_data_balanced += temp_lst_y

  x_data_balanced = np.array(x_data_balanced)
  y_data_balanced = np.array(y_data_balanced)
  
  return x_data_balanced, y_data_balanced


'''
Loading and reading data 

'''
# import numpy as np
# from os import getcwd

# x_data = np.load('Xtrain_Classification_Part1.npy') # x_data.shape: (7366, 2500)
# y_data = np.load('Ytrain_Classification_Part2.npy') # y_data.shape: (7366,)

# # Evaluate data

# class1, class2, class3, class4 = num_samples_in_classes(y_data)

# total = class1 + class2 + class3 + class4

# # The training set is unbalanced

# print('Share of Class 1: {:.2f}%'.format(100*class1/total))
# print('Share of Class 2: {:.2f}%'.format(100*class2/total))
# print('Share of Class 3: {:.2f}%'.format(100*class3/total))
# print('Share of Class 4: {:.2f}%'.format(100*class4/total))

# # Share of Class 1: 60.79%
# # Share of Class 2: 4.63%
# # Share of Class 3: 18.16%
# # Share of Class 4: 16.41%


'\nLoading and reading data \n\n'

In [None]:
pip install -U imbalanced-learn

In [None]:
import numpy as np
from os import getcwd

x_data = np.load(getcwd() + "/training_set/Xtrain_Classification_Part2.npy") # x_data.shape: (7366, 2500)
y_data = np.load(getcwd() + "/training_set/Ytrain_Classification_Part2.npy") # y_data.shape: (7366,)

'''
Prepare data

'''
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE

# # Add copies of underrepresented classes - naive approach

x_training, x_test, y_training, y_test = train_test_split(x_data, y_data, test_size=0.10, stratify=y_data)

x_data_balanced, y_data_balanced = balance_set(x_training, y_training)

# # Add synthetic samples with SMOTE - not an popular in image processing

sm = SMOTE(random_state=42)

x_smote, y_smote = sm.fit_resample(x_training, y_training)

# #

x_training = reshape_images(x_training)
x_training = x_training.reshape(x_training.shape[0],50,50,1)

x_data_balanced = reshape_images(x_data_balanced)
x_data_balanced = x_data_balanced.reshape(x_data_balanced.shape[0],50,50,1)

x_smote = reshape_images(x_smote)
x_smote = x_smote.reshape(x_smote.shape[0],50,50,1)

y_training      = to_one_hot_enc(y_training)
y_data_balanced = to_one_hot_enc(y_data_balanced)
y_smote         = to_one_hot_enc(y_smote)

print(num_samples_in_classes(y_smote))

In [None]:
pip install keras-tuner --upgrade

In [7]:
from tensorflow import keras
from kerastuner.tuners import RandomSearch

'''
Tuning model

'''



def model_builder(hp):
  
  model = keras.Sequential()

  # First layer

  model.add(keras.layers.Conv2D(hp.Choice("First layer", [16, 32, 64]), 3, activation='relu', input_shape=(50,50,1)))

  # Tuning hidden layer

  for i in range(hp.Int("Conv Layers", min_value=0, max_value=3)):
    model.add(keras.layers.Conv2D(hp.Choice(f"layer_{i}_filters", [16,32,64]), 3, activation='relu'))

  # End part

  model.add(keras.layers.MaxPool2D(3,3))

  model.add(keras.layers.Flatten())

  model.add(keras.layers.Dense(hp.Choice("Dense layer", [64, 128, 256, 512, 1024]), activation='relu'),)

  model.add(keras.layers.Dense(4, activation='softmax'))
  
  model.compile(optimizer='adam',
                loss='binary_crossentropy',
                metrics=['binary_accuracy'])
  

  return model

tuner = RandomSearch(
    model_builder,
    objective='binary_accuracy',
    max_trials=32,
    overwrite=True,
)

stop_early = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

tuner.search(x_smote, y_smote, epochs=50, validation_split=0.2, callbacks=[stop_early])

# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

Trial 32 Complete [00h 00m 29s]
binary_accuracy: 0.9972665905952454

Best binary_accuracy So Far: 0.999263346195221
Total elapsed time: 00h 18m 09s
INFO:tensorflow:Oracle triggered exit


In [9]:

'''
Build model

'''

model   = tuner.hypermodel.build(best_hps)
history = model.fit(x_smote, y_smote, epochs=50, validation_split=0.2)

binary_accuracy = history.history['binary_accuracy']
best_epoch = binary_accuracy.index(max(binary_accuracy)) + 1
print('Best epoch: %d' % (best_epoch,))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Best epoch: 48


In [37]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_2 (Conv2D)           (None, 48, 48, 32)        320       
                                                                 
 conv2d_3 (Conv2D)           (None, 46, 46, 16)        4624      
                                                                 
 conv2d_4 (Conv2D)           (None, 44, 44, 16)        2320      
                                                                 
 conv2d_5 (Conv2D)           (None, 42, 42, 16)        2320      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 14, 14, 16)       0         
 2D)                                                             
                                                                 
 flatten_1 (Flatten)         (None, 3136)              0         
                                                      

In [10]:

hypermodel = tuner.hypermodel.build(best_hps)

# Retrain the model
hypermodel.fit(x_smote, y_smote, epochs=best_epoch, validation_split=0.2)

Epoch 1/48
Epoch 2/48
Epoch 3/48
Epoch 4/48
Epoch 5/48
Epoch 6/48
Epoch 7/48
Epoch 8/48
Epoch 9/48
Epoch 10/48
Epoch 11/48
Epoch 12/48
Epoch 13/48
Epoch 14/48
Epoch 15/48
Epoch 16/48
Epoch 17/48
Epoch 18/48
Epoch 19/48
Epoch 20/48
Epoch 21/48
Epoch 22/48
Epoch 23/48
Epoch 24/48
Epoch 25/48
Epoch 26/48
Epoch 27/48
Epoch 28/48
Epoch 29/48
Epoch 30/48
Epoch 31/48
Epoch 32/48
Epoch 33/48
Epoch 34/48
Epoch 35/48
Epoch 36/48
Epoch 37/48
Epoch 38/48
Epoch 39/48
Epoch 40/48
Epoch 41/48
Epoch 42/48
Epoch 43/48
Epoch 44/48
Epoch 45/48
Epoch 46/48
Epoch 47/48
Epoch 48/48


<keras.callbacks.History at 0x7fd4cadc4dd0>

In [21]:
from sklearn.metrics import balanced_accuracy_score


x_test = reshape_images(x_test)
x_test = x_test.reshape(x_test.shape[0],50,50,1)

pred = hypermodel.predict(x_test)

pred = from_one_hot_enc(pred)

print(balanced_accuracy_score(pred, y_test))



0.7844673914373902


In [33]:
test = np.load('drive/MyDrive/Xtest_Classification_Part2.npy') 
test = reshape_images(test)
test = test.reshape(test.shape[0],50,50,1)

pred = hypermodel.predict(test)

pred = from_one_hot_enc(pred)

np.save("test_set_predictions.npy", pred)