In [1]:
# Fix randomness and hide warnings
seed = 42

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ['MPLCONFIGDIR'] = os.getcwd()+'/configs/'

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)

import numpy as np
np.random.seed(seed)

import logging

import random
random.seed(seed)

# Tensorflow
import tensorflow as tf
from tensorflow import keras as tfk
from keras import layers as tfkl
tf.autograph.set_verbosity(0)
tf.get_logger().setLevel(logging.ERROR)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)
print(tf.__version__)

# Some libraries
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
import seaborn as sns

input_file = np.load('public_data.npz', allow_pickle=True)
data = input_file['data']

labels = input_file['labels']

label_dict = {'healthy': 0, 'unhealthy': 1}
labels = np.array([label_dict[label] for label in labels])


2.14.0


In [2]:
#data = data/255.0

from keras.applications.efficientnet_v2 import preprocess_input
data = preprocess_input(data)

In [3]:
shrek_indices = []
trol_indices = []
new_data = []
new_labels = []
for i, image in enumerate(data):
  if np.sum(data[506] - image) == 0:
    shrek_indices.append(i)
  elif np.sum(data[338] - image) == 0:
    trol_indices.append(i)
  else:
    new_data.append(image)
    new_labels.append(labels[i])

images = np.array(new_data)
labels = np.array(new_labels)

In [4]:
X_train, X_val, y_train, y_val = train_test_split(images, labels, test_size=0.1, stratify=labels, random_state=seed)

In [5]:
input_shape = X_train.shape[1:]  # Input shape for the model

In [6]:
learned_model = tfk.applications.EfficientNetV2M(
    input_shape=(96, 96, 3),
    include_top=False,
    weights="imagenet",
    pooling='max',
)

In [7]:
learned_model.trainable = True

In [8]:
N = 85
for i, layer in enumerate(learned_model.layers[:N]):
  layer.trainable=False

In [9]:
preprocessing = tf.keras.Sequential([
    tfkl.RandomFlip("horizontal_and_vertical"),
    tfkl.RandomRotation(0.9),
    tfkl.GaussianNoise(0.3),
    tfkl.RandomContrast(0.4),
    tfkl.RandomBrightness(0.4),
], name='preprocessing')


input_layer = tfkl.Input(shape=input_shape)
x = preprocessing(input_layer)

In [10]:
x = learned_model(x)

In [11]:
from keras import regularizers

x = tfkl.Flatten(name='flattenLast')(x)
x = tfkl.Dropout(0.2)(x)
x = tfkl.Dense(512, activation='relu', name='Dense_1', kernel_regularizer=regularizers.l1_l2(0.01))(x)
x = tfkl.Dropout(0.2)(x)
x = tfkl.Dense(256, activation='relu', name='Dense_2', kernel_regularizer=regularizers.l1_l2(0.01))(x)
x = tfkl.Dropout(0.2)(x)
x = tfkl.Dense(128, activation='relu', name='Dense_3', kernel_regularizer=regularizers.l1_l2(0.01))(x)
x = tfkl.Dropout(0.2)(x)
x = tfkl.Dense(64, activation='relu', name='Dense_4', kernel_regularizer=regularizers.l1_l2(0.01))(x)
x = tfkl.Dropout(0.2)(x)
x = tfkl.Dense(16, activation='relu', name='Dense_5', kernel_regularizer=regularizers.l1_l2(0.01))(x)
x = tfkl.Dropout(0.2)(x)
output_layer = tfkl.Dense(1, activation='sigmoid', name='output')(x)

model = tf.keras.Model(inputs=input_layer, outputs=output_layer)

In [12]:
model.compile(loss=tfk.losses.BinaryCrossentropy(), optimizer=tfk.optimizers.AdamW(1e-5), metrics='accuracy')
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 96, 96, 3)]       0         
                                                                 
 preprocessing (Sequential)  (None, 96, 96, 3)         0         
                                                                 
 efficientnetv2-m (Function  (None, 1280)              53150388  
 al)                                                             
                                                                 
 flattenLast (Flatten)       (None, 1280)              0         
                                                                 
 dropout (Dropout)           (None, 1280)              0         
                                                                 
 Dense_1 (Dense)             (None, 512)               655872    
                                                             

In [13]:
early_stopping = tfk.callbacks.EarlyStopping(monitor='val_loss', patience=30, verbose=1, mode='min', restore_best_weights=True)

In [14]:
model_checkpoint = tfk.callbacks.ModelCheckpoint('best_model_finetune.h5', monitor='val_accuracy', save_best_only=True, mode='max')

In [15]:
from sklearn.utils.class_weight import compute_class_weight
#class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
#class_weights = compute_class_weight('balanced', classes=np.unique(labels), y=labels)
class_weights_dict = {0: 0.81, 1: 1.29}

print(f"Class weights: {class_weights_dict}")

Class weights: {0: 0.81, 1: 1.29}


In [16]:
batch_size=32
epochs=30
history = model.fit(
    x=X_train,
    y=y_train,
    epochs=epochs,
    batch_size=batch_size,
    validation_data=(X_val, y_val),
    class_weight=class_weights_dict,
    callbacks=[model_checkpoint],
    verbose=1
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [23]:
batch_size=16
epochs=10
history = model.fit(
    x=X_train,
    y=y_train,
    epochs=epochs,
    batch_size=batch_size,
    validation_data=(X_val, y_val),
    class_weight=class_weights_dict,
    callbacks=[model_checkpoint],
    verbose=1
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [26]:
# Evaluate the model on the test set
model.load_weights("best_model_finetune.h5")
test_loss, test_accuracy = model.evaluate(X_train, y_train, verbose=1)

print("Test loss:", test_loss)
print("Test accuracy:", test_accuracy)

Test loss: 0.7730275988578796
Test accuracy: 0.9606928825378418


In [27]:
# Evaluate the model on the test set
#model.load_weights("best_model.h5")
test_loss, test_accuracy = model.evaluate(X_val, y_val, verbose=1)

print("Test loss:", test_loss)
print("Test accuracy:", test_accuracy)

Test loss: 0.8219051361083984
Test accuracy: 0.9241517186164856


In [23]:
# Evaluate the model on the test set
#model.load_weights("best_model.h5")
test_loss, test_accuracy = model.evaluate(X_val, y_val, verbose=1)

print("Test loss:", test_loss)
print("Test accuracy:", test_accuracy)

Test loss: 0.3704891800880432
Test accuracy: 0.9241517186164856


In [28]:
model.save('saved_model')

In [13]:
model.load_weights("best_model.h5")

In [24]:
batch_size=16
epochs=10
model.compile(loss=tfk.losses.BinaryCrossentropy(), optimizer=tfk.optimizers.Adam(1e-6), metrics='accuracy')
history = model.fit(
    x=X_val,
    y=y_val,
    epochs=epochs,
    batch_size=batch_size,
    validation_data=(X_train, y_train),
    class_weight=class_weights_dict,
    verbose=1
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [33]:
model.save_weights("96-97_acc.h5")

In [38]:
model.save("saved_model")

In [33]:
from keras.models import Model

intermediate_layer = model.get_layer('flattenLast')

# Create a new model with the selected intermediate layer as the output
new_model = Model(inputs=model.input, outputs=intermediate_layer.output)

# Get the embeddings for the example image
embeddings = new_model(X_train[:1])

print("Embeddings shape:", embeddings.shape)

Embeddings shape: (1, 1280)


In [36]:
from sklearn.metrics.pairwise import cosine_similarity
cosine_sim_matrix = cosine_similarity(new_model(X_train[:3]), new_model(X_train[3:4]))
cosine_sim_matrix

array([[0.9210138 ],
       [0.9158238 ],
       [0.92049253]], dtype=float32)

In [37]:
embeddings = new_model(images)

# Step 3: Calculate Cosine Similarity
similarity_matrix = cosine_similarity(embeddings)

array([[ 1.0000001 ,  0.884811  ,  0.84379697, ..., -0.5961801 ,
         0.79545754,  0.8370678 ],
       [ 0.884811  ,  1.0000004 ,  0.95030785, ..., -0.38202488,
         0.9380702 ,  0.92915016],
       [ 0.84379697,  0.95030785,  1.0000002 , ..., -0.28425545,
         0.9600454 ,  0.9284129 ],
       ...,
       [-0.5961801 , -0.38202488, -0.28425545, ...,  1.        ,
        -0.20630479, -0.33521122],
       [ 0.79545754,  0.9380702 ,  0.9600454 , ..., -0.20630479,
         1.0000001 ,  0.9270675 ],
       [ 0.8370678 ,  0.92915016,  0.9284129 , ..., -0.33521122,
         0.9270675 ,  0.9999999 ]], dtype=float32)

In [60]:
probs = model.predict(images)



In [65]:
probs[:,0].shape

(5004,)

In [71]:
sorted_indices = np.argsort(-probs[:,0])

In [73]:
probs[sorted_indices[:10]]

array([[0.99034846],
       [0.9879616 ],
       [0.9867209 ],
       [0.98564816],
       [0.9852684 ],
       [0.98370355],
       [0.9834063 ],
       [0.9821669 ],
       [0.9819166 ],
       [0.98140407]], dtype=float32)

In [74]:
sorted_indices[:10]

array([ 804, 2291, 2940, 2739,  209,   96,  423, 3350, 3836, 2734])

In [None]:
predictions = [1 if pred > 0.5 else 0 for pred in predictions]

In [86]:
mislabeled_indices = np.where((predictions != labels) & (labels == 1))[0]

In [87]:
mislabeled_imgs = images[mislabeled_indices]

In [88]:
msl_predictions = new_model(mislabeled_imgs)

In [76]:
dist_embeddings = new_model(images[sorted_indices[:10]])

In [89]:
sim_m = cosine_similarity(msl_predictions, dist_embeddings)

In [90]:
sim_m.shape

(151, 10)

In [92]:
for sim in sim_m:
    print(np.max(sim))

0.9807346
0.96228737
0.9423652
0.99145913
0.9948994
0.99623567
0.9845739
0.94965637
0.98275465
0.97669923
0.93783194
0.9158164
0.91619456
0.928523
0.99519205
0.9948138
0.9923296
0.9918233
0.9895495
0.9444544
0.9237901
0.9669758
0.94057655
0.95045507
0.9883579
0.9959743
0.9915093
0.6564909
0.9800378
0.9554721
0.9706751
0.9870877
0.9940381
0.80235994
0.587065
0.3121146
0.9646051
0.7997391
0.99345064
0.9941846
0.923667
0.9605049
0.9534553
0.95862633
0.5778718
0.9774114
0.9311223
0.9336894
0.9886695
0.99656165
0.85372055
0.91917145
0.95877135
0.9660882
0.9931451
0.99248946
0.9962543
0.8905393
0.9918245
0.9936083
0.44616234
0.9437507
0.9798285
0.90647894
0.9614972
0.5294775
0.9914633
0.938882
0.94660854
0.89864135
0.99053776
0.5944795
0.95937335
0.94885933
0.9723991
0.9492033
0.97186923
0.9448581
0.9951218
0.9945717
0.991589
0.9543682
0.99386996
0.9954542
0.95208156
0.9624305
0.99160886
0.9704664
0.99054736
0.8164587
0.9673587
0.9612248
0.9912182
0.9114741
0.7568513
0.98157084
0.6950026
0.9