In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import time 
import cv2
from sklearn.metrics import confusion_matrix, precision_score, recall_score, accuracy_score, balanced_accuracy_score

from google.colab import drive, files
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install wandb
!wandb login
import wandb
from wandb.keras import WandbMetricsLogger, WandbModelCheckpoint

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting wandb
  Downloading wandb-0.15.2-py3-none-any.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m23.3 MB/s[0m eta [36m0:00:00[0m
Collecting GitPython!=3.1.29,>=1.0.0 (from wandb)
  Downloading GitPython-3.1.31-py3-none-any.whl (184 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184.3 kB[0m [31m23.4 MB/s[0m eta [36m0:00:00[0m
Collecting sentry-sdk>=1.0.0 (from wandb)
  Downloading sentry_sdk-1.23.0-py2.py3-none-any.whl (205 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m205.1/205.1 kB[0m [31m24.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting docker-pycreds>=0.4.0 (from wandb)
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting pathtools (from wandb)
  Downloading pathtools-0.1.2.tar.gz (11 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting

In [3]:
def resize(input_image, height, width):
  input_image = tf.image.resize(input_image, [height, width],
                                method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
  return input_image

def soft_augmentation(input_image):
  # Resizing to 158x158
  input_image = resize(input_image, 158, 158)

  # Random cropping back to 128x128
  input_image = tf.image.random_crop(input_image, size=[128, 128, 3])

  if tf.random.uniform(()) > 0.5:
    # Random mirroring
    input_image = cv2.GaussianBlur(input_image.numpy(),(3,3),0) 
    input_image = tf.image.flip_left_right(input_image)

  return input_image

In [4]:
wandb.init(
    # set the wandb project where this run will be logged
    project="test",
    name = 'Pix',
    config={
        "model":'ResNet50',
        "include_top":False,
        "weights":None,
        "input_shape":(128,128,3),
        "pooling":"avg",
        "num_outputs": 2,
        "activation": "softmax",
        "optimizer": 'adam',
        "loss": "categorical_crossentropy",
        "metric": ["Accuracy"],
        "epochs": 50,
        "batch_size": 16,
        "num_models":10,
    }
)

config = wandb.config

[34m[1mwandb[0m: Currently logged in as: [33mmarekpasson[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [5]:
# Load base model for adaboost (ResNet50)
base_model = tf.keras.applications.resnet50.ResNet50(
    include_top=config.include_top, 
    weights=config.weights,
    input_shape=config.input_shape,
    pooling=config.pooling,
)

model_resnet = keras.Sequential()
model_resnet.add(base_model)
model_resnet.add(keras.layers.Dense(units=config.num_outputs, activation=config.activation))

# Data prepare

In [11]:
test_normal = tf.data.Dataset.load("/content/drive/MyDrive/Dane/IDA/test_normal_classification")
test_patology = tf.data.Dataset.load("/content/drive/MyDrive/Dane/IDA/test_patology_classification")

train_normal = tf.data.Dataset.load("/content/drive/MyDrive/Dane/IDA/image_normal_train")
train_patology = tf.data.Dataset.load("/content/drive/MyDrive/Dane/IDA/image_patology_train")

generated_pix = tf.data.Dataset.load("/content/drive/MyDrive/Dane/IDA/generated_pix2pix")
generated_paste = tf.data.Dataset.load("/content/drive/MyDrive/Dane/IDA/generated_paste")

In [12]:
def get_patology_label(x):
    return np.array([[0,1]])

def get_normal_label(x):
    return np.array([[1,0]])

test_normal = test_normal.map(lambda image: (image, tf.py_function(get_normal_label,[image],tf.int8)))
test_patology = test_patology.map(lambda image: (image, tf.py_function(get_patology_label,[image],tf.int8))).skip(50)

train_normal = train_normal.map(lambda image: (image, tf.py_function(get_normal_label,[image],tf.int8)))
train_patology = train_patology.map(lambda image: (image, tf.py_function(get_patology_label,[image],tf.int8)))

generated_pix = generated_pix.map(lambda image: (image, tf.py_function(get_patology_label,[image],tf.int8)))
generated_paste = generated_paste.map(lambda image: (image, tf.py_function(get_patology_label,[image],tf.int8)))

test_data = test_normal.concatenate(test_patology)

valid_data = train_normal.take(20).concatenate(train_patology.take(3))
train_data = train_normal.skip(20).concatenate(train_patology.skip(3)).shuffle(150)

merged_valid = train_normal.take(10).concatenate(train_patology)
merged_data_pix = train_normal.skip(10).concatenate(generated_pix).shuffle(170)
merged_data_paste = train_normal.skip(10).concatenate(generated_paste).shuffle(170)

#soft augmentation
train_patology_augmented = train_patology.repeat(8)
train_data_augmented = train_normal.skip(10).concatenate(train_patology_augmented).repeat(3).map(lambda image, label: (tf.py_function(soft_augmentation,[image],tf.float32),label)).shuffle(500)
valid_data_augmented = train_normal.take(10).concatenate(train_patology)

merged_data_pix_augmented = merged_data_pix.repeat(3).map(lambda image, label: (tf.py_function(soft_augmentation,[image],tf.float32),label))
merged_data_paste_augmented = merged_data_paste.repeat(3).map(lambda image, label: (tf.py_function(soft_augmentation,[image],tf.float32),label))

test_data = test_data.map(lambda image, label: (image, tf.reshape(label, (2,))))
valid_data = valid_data.map(lambda image, label: (image, tf.reshape(label, (2,))))
train_data = train_data.map(lambda image, label: (image, tf.reshape(label, (2,))))
train_data_augmented = train_data_augmented.map(lambda image, label: (image, tf.reshape(label, (2,))))
valid_data_augmented = valid_data_augmented.map(lambda image, label: (image, tf.reshape(label, (2,))))
merged_valid = merged_valid.map(lambda image, label: (image, tf.reshape(label, (2,))))
merged_data_pix = merged_data_pix.map(lambda image, label: (image, tf.reshape(label, (2,))))
merged_data_paste = merged_data_paste.map(lambda image, label: (image, tf.reshape(label, (2,))))
merged_data_pix_augmented = merged_data_pix_augmented.map(lambda image, label: (image, tf.reshape(label, (2,))))
merged_data_paste_augmented = merged_data_paste_augmented.map(lambda image, label: (image, tf.reshape(label, (2,))))

In [13]:
datasets = {
            'Base_data':[train_data, valid_data], 
            'BaseSoft_data':[train_data_augmented, valid_data_augmented], 
            'Paste_data':[merged_data_paste, merged_valid], 
            'PasteSoft_data':[merged_data_paste_augmented, merged_valid], 
            'Pix_data':[merged_data_pix, merged_valid], 
            'PixSoft_data':[merged_data_pix_augmented, merged_valid]
            }

# Adaboost


In [14]:
def compute_metrics(y_pred):
    y_pred_labels = np.argmax(y_pred, axis=1)

    # Convert class probabilities to class labels
    y_true_labels = [y.numpy().argmax(axis=0) for _, y in test_data]
    y_true = [y for _,y in test_data]
    # Evaluate boosted model on test set
    test_loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred).numpy().mean()
    test_acc = np.mean(y_pred_labels == y_true_labels)

    precision = precision_score(y_true_labels, y_pred_labels)
    recall = recall_score(y_true_labels, y_pred_labels)
    balanced_acc = balanced_accuracy_score(y_true_labels, y_pred_labels)

    print(" Test")
    print(f' loss: {test_loss}, accuracy: {test_acc}')
    print(confusion_matrix(y_true_labels, y_pred_labels))
    print(f'Accuracy {test_acc}, Precision: {precision}, Recall: {recall}, Balanced accuracy: {balanced_acc}\n')
    return test_acc, precision, recall, balanced_acc

def base_model_builder():
    classifier = keras.models.clone_model(model_resnet)
    classifier.compile(optimizer=config.optimizer, 
                            loss=config.loss,
                            metrics=config.metric)
    return classifier

class Adaboost():
    def __init__(self, n_estimators, base_model_fn, epochs, batch_size, monitor=False):
        self.n_estimators = n_estimators
        self.base_model_fn = base_model_fn
        self.epochs = epochs
        self.batch_size = batch_size
        self.monitor = monitor
        self.models = []
        self.alphas = []
        
    def fit(self, train_data, valid_data):
        self.N_train = train_data.__len__().numpy()
        weights_train = np.ones(self.N_train) / self.N_train
        N_valid = valid_data.__len__().numpy()
        weights_valid = np.ones(N_valid) / N_valid

        X_train = train_data.map(lambda x,y: x).batch(1)
        y_train = [y for x,y in train_data]

        X_valid = valid_data.map(lambda x,y: x).batch(1)
        y_valid = [y for x,y in valid_data]

        for estimator in range(self.n_estimators):
            base_estimator = self.base_model_fn()
            base_estimator = self.train_estimator(base_estimator, train_data, valid_data, weights_train)

            y_pred_train = base_estimator.predict(X_train, verbose=0)
            errors_train = [int(y_pred_train[i].argmax(axis=0) != y_train[i].numpy().argmax(axis=0)) for i in range(self.N_train)]

            y_pred_valid = base_estimator.predict(X_valid, verbose=0)
            errors_valid = [int(y_pred_valid[i].argmax(axis=0) != y_valid[i].numpy().argmax(axis=0)) for i in range(N_valid)]
            
            # updating weights of model
            err_valid = np.dot(weights_valid, errors_valid)
            if err_valid==0:
                alpha = 1.5
                if len(self.alphas)>4:
                    self.alphas.append(alpha)
                    self.models.append(base_estimator)
                    break
            else:
                alpha = 0.5 * np.log((1 - err_valid) / err_valid)
            self.alphas.append(alpha)

            # updating the sample weights
            weights_train /= np.sum(weights_train)
            weights_valid /= np.sum(weights_train)
            self.models.append(base_estimator)

            # evaluating the model
            evaluate_train = base_estimator.evaluate(train_data.batch(1),verbose=0)
            evaluate_valid = base_estimator.evaluate(valid_data.batch(1),verbose=0)
            print(f"Model {estimator+1}/{self.n_estimators}")
            print(" Train")
            print(f' loss: {evaluate_train[0]}, accuracy: {evaluate_train[1]}')
            print(" Valid")
            print(f' loss: {evaluate_valid[0]}, accuracy: {evaluate_valid[1]}')
            evaluate_test = base_estimator.evaluate(test_data.batch(1),verbose=0)
            print(" Test")
            print(f' loss: {evaluate_test[0]}, accuracy: {evaluate_test[1]}')
            print(" Ensemble test")
            y_pred = self.predict(test_data)
            compute_metrics(y_pred)
            
            if self.monitor==True:
                y_pred_labels = np.argmax(y_pred, axis=1)
                y_true_labels = [y.numpy().argmax(axis=0) for _, y in test_data]
                y_true = [y for _,y in test_data]
                test_loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred).numpy().mean()
                test_acc = np.mean(y_pred_labels == y_true_labels)
                wandb.log({'ensemble_loss': test_loss, 'ensemble_accuracy': test_acc})


    def train_estimator(self, model, train_data, valid_data, weights):
        indices = np.arange(self.N_train)
        for epoch in range(self.epochs):
            i=0
            for x,y in train_data.batch(self.batch_size):
                batch_indices = indices[i:i+self.batch_size]
                X_batch, y_batch, w_batch = x, y, weights[batch_indices]
                i+=self.batch_size
                model.train_on_batch(X_batch, y_batch, sample_weight=w_batch)
                
            if self.monitor==True:
                loss, acc = model.evaluate(train_data.batch(self.batch_size),verbose=0)
                wandb.log({'loss': loss, 'accuracy': acc})
                val_loss, val_acc = model.evaluate(valid_data.batch(self.batch_size),verbose=0)
                wandb.log({'val_loss': val_loss, 'val_accuracy': val_acc})
        return model

    def predict(self, data):
        N = data.__len__().numpy()
        X_data = data.map(lambda x,y: x).batch(1)
        y_pred = np.zeros((N, 2))

        for i, model in enumerate(self.models):
            y_pred += self.alphas[i] * model.predict(X_data, verbose=0)

        return y_pred


In [15]:
adaboost = Adaboost(10, base_model_builder, epochs=50, batch_size=config.batch_size, monitor=True)
data = datasets['PixSoft_data']
adaboost.fit(data[0], data[1])

Model 1/10
 Train
 loss: 0.04631476104259491, accuracy: 0.9745097756385803
 Valid
 loss: 0.7313240170478821, accuracy: 0.8500000238418579
 Test
 loss: 1.4020709991455078, accuracy: 0.8367347121238708
 Ensemble test
 Test
 loss: 1.3848306463158329, accuracy: 0.8367346938775511
[[ 7  1]
 [ 7 34]]
Accuracy 0.8367346938775511, Precision: 0.9714285714285714, Recall: 0.8292682926829268, Balanced accuracy: 0.8521341463414633

Model 2/10
 Train
 loss: 0.042452000081539154, accuracy: 0.9823529124259949
 Valid
 loss: 0.156570702791214, accuracy: 0.8999999761581421
 Test
 loss: 0.4029144048690796, accuracy: 0.8571428656578064
 Ensemble test
 Test
 loss: 0.4555032386901087, accuracy: 0.8571428571428571
[[ 7  1]
 [ 6 35]]
Accuracy 0.8571428571428571, Precision: 0.9722222222222222, Recall: 0.8536585365853658, Balanced accuracy: 0.8643292682926829

Model 3/10
 Train
 loss: 0.08119933307170868, accuracy: 0.9647058844566345
 Valid
 loss: 0.7740867137908936, accuracy: 0.75
 Test
 loss: 0.712430000305175

In [16]:
print('Final result:\n')
y_pred = adaboost.predict(test_data)
accuracy, precision, recall, balanced_acc = compute_metrics(y_pred)

Final result:

 Test
 loss: 0.314870844153297, accuracy: 0.8979591836734694
[[ 8  0]
 [ 5 36]]
Accuracy 0.8979591836734694, Precision: 1.0, Recall: 0.8780487804878049, Balanced accuracy: 0.9390243902439024



In [18]:
with open("results/results_pix.txt", "w") as f:
    f.write("{:.5f}, {:.5f}, {:.5f}, {:.5f}".format(accuracy, precision, recall, balanced_acc))

In [19]:
wandb.finish()

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁▂███▄█▆█▅███▅▂▇█▃▇█▁▄▅█▁▇▆█▁███▃▂██▄▆██
ensemble_accuracy,▁▃▃▃▃█████
ensemble_loss,█▂▂▂▁▁▁▁▁▁
loss,▃▃▁▁▁▂▁▁▁▂▁▁▁▃▅▂▁▂▁▁▂█▇▁▃▁▁▁▃▁▁▁▂▂▁▁▃▂▁▁
val_accuracy,▂▂█▇▃▂▇█▆▄▇▇▇▅▆▅▇▃▇▇▂▅▇▇▁▇▆▄▂██▇▂▂▅▄▄▇▁▃
val_loss,▂▂▁▁▁▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▂▁▁▁▂█▁▁▁▁▂▁

0,1
accuracy,0.94902
ensemble_accuracy,0.89796
ensemble_loss,0.31487
loss,0.15445
val_accuracy,0.6
val_loss,0.5005
