In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Fix randomness and hide warnings
seed = 42

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ['MPLCONFIGDIR'] = os.getcwd()+'/configs/'

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)

import numpy as np
np.random.seed(seed)

import logging

import random
random.seed(seed)

# Import tensorflow
import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl
tf.autograph.set_verbosity(0)
tf.get_logger().setLevel(logging.ERROR)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)
print(tf.__version__)

In [None]:
# Import other libraries
import cv2
from tensorflow.keras.applications.efficientnet_v2 import preprocess_input
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
#load data
data = np.load('/kaggle/input/cleaned-dataset/cleaned_dataset.npz', allow_pickle=True)


X = data["data"]
Y = data["labels"]
X.shape, Y.shape

In [None]:
X = (X/255).astype(np.float32)

#change labels in 0 for healthy and 1 for unhealthy

for i in range(Y.size):
  if Y[i] == "healthy":
    Y[i] = 0
  else:
    Y[i] = 1

X, Y

In [None]:
# Number of images to display
num_img = 30

# Create subplots for displaying pictures
fig, axes = plt.subplots(2, num_img//2, figsize=(20, 9))
for i in range(num_img):
    ax = axes[i%2, i%num_img//2]
    ax.imshow(np.clip(X[i], 0, 255))  # Display clipped item images
    ax.axis('off')
plt.tight_layout()
plt.show()

In [None]:
#train, validation, test split (80,10,10)

#one-hot encoding
y = tfk.utils.to_categorical(Y,2)

# Split data into train_val and test sets
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, random_state=seed, test_size=520, stratify=np.argmax(y,axis=1))

# Further split train_val into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, random_state=seed, test_size=520, stratify=np.argmax(y_train_val,axis=1))

# Print shapes of the datasets
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_val shape: {X_val.shape}, y_val shape: {y_val.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

In [None]:
# Create ConvNeXtXLarge model with specified settings
mobile = tf.keras.applications.ConvNeXtXLarge(
    input_shape=(96, 96, 3),
    include_top=False,
    weights="imagenet",
    pooling='avg'
)

mobile.summary()

In [None]:
# Use the supernet as feature extractor, i.e. freeze all its weigths
mobile.trainable = False


# Create an input layer with shape (96, 96, 3)
inputs = tfk.Input(shape=(96, 96, 3))

preprocessing = tf.keras.Sequential([
        tfkl.RandomTranslation(height_factor=(-0.2, 0.3), width_factor=(-0.2, 0.3)),
        tfkl.RandomFlip(mode="horizontal_and_vertical", seed=None),
    ], name='preprocessing')

preprocessing = preprocessing(inputs)

# Connect ConvNeXtXLarge to the input
x = mobile(preprocessing)
# Add a Dense layer with 2 units and softmax activation as the classifier
outputs = tfkl.Dense(2, activation='softmax')(x)

# Create a Model connecting input and output
tl_model = tfk.Model(inputs=inputs, outputs=outputs, name='model')

# Compile the model with Categorical Cross-Entropy loss and Adam optimizer
tl_model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(), metrics=['accuracy'])

# Display model summary
tl_model.summary()

In [None]:
# Train the model
tl_history = tl_model.fit(
    x = preprocess_input(X_train*255), # We need to apply the preprocessing thought for the MobileNetV2 network
    y = y_train,
    batch_size = 32,
    epochs = 200,
    validation_data = (preprocess_input(X_val*255), y_val), # We need to apply the preprocessing thought for the MobileNetV2 network
    callbacks = [tfk.callbacks.EarlyStopping(monitor='val_accuracy', mode='max', patience=20, restore_best_weights=True)]
).history

In [None]:
# Evaluate the model on the test set
test_accuracy = tl_model.evaluate(preprocess_input(X_test*255),y_test,verbose=0)[-1]
print('Test set accuracy %.4f' % test_accuracy)
from sklearn.metrics import precision_score, recall_score
y_pred = tl_model.predict(preprocess_input(X_test*255))
y_pred = tf.argmax(y_pred, axis=-1)
y_test_true = np.argmax(y_test, axis=-1)
# Calculate precision and recall
precision = precision_score(y_test_true, y_pred)
recall = recall_score(y_test_true, y_pred)

# Print the precision and recall
print("Precision:", precision)
print("Recall:", recall)

In [None]:
# Save the best model
tl_model.save('Bestione_con_aug')
del tl_model

**FINE TUNING**

In [None]:
# Re-load the model after transfer learning
ft_model = tfk.models.load_model('Bestione_con_aug')
ft_model.summary()

In [None]:
# Set all  layers as trainable
layers = ft_model.get_layer('convnext_xlarge')


# Use the supernet as feature extractor, i.e. freeze all its weigths
layers.trainable = True


# Create an input layer with shape (96, 96, 3)
inputs = tfk.Input(shape=(96, 96, 3))


# Connect ConvNeXtXLarge to the input
x = layers(inputs)

x = tfkl.Dropout(0.2)(x)
# Add a Dense layer with 2 units and softmax activation as the classifier
outputs = tfkl.Dense(2, activation='softmax')(x)

# Create a Model connecting input and output
ft_model = tfk.Model(inputs=inputs, outputs=outputs, name='model')

# Compile the model with Categorical Cross-Entropy loss and Adam optimizer
ft_model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.AdamW(), metrics=['accuracy'])

# Display model summary
ft_model.summary()

In [None]:
# Freeze first N layers, e.g., until the 300rd one
N = 250
for i, layer in enumerate(ft_model.get_layer('convnext_xlarge').layers[:N]):
  layer.trainable=False
for i, layer in enumerate(ft_model.get_layer('convnext_xlarge').layers):
   print(i, layer.name, layer.trainable)
ft_model.summary()

In [None]:
# Compile the model
ft_model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(1e-5), metrics='accuracy')

In [None]:
# Fine-tune the model
ft_history = ft_model.fit(
    x = preprocess_input(X_train*255), # We need to apply the preprocessing thought for the MobileNetV2 network
    y = y_train,
    batch_size = 32,
    epochs = 200,
    validation_data = (preprocess_input(X_val*255), y_val), # We need to apply the preprocessing thought for the MobileNetV2 network
    callbacks = [tfk.callbacks.EarlyStopping(monitor='val_accuracy', mode='max', patience=20, restore_best_weights=True)]
).history

In [None]:
# Evaluate the model on the test set
test_accuracy = ft_model.evaluate(preprocess_input(X_test*255),y_test,verbose=0)[-1]
print('Test set accuracy %.4f' % test_accuracy)

from sklearn.metrics import precision_score, recall_score
y_pred = ft_model.predict(preprocess_input(X_test*255))
y_pred = tf.argmax(y_pred, axis=-1)
y_test_true = np.argmax(y_test, axis=-1)
# Calculate precision and recall
precision = precision_score(y_test_true, y_pred)
recall = recall_score(y_test_true, y_pred)

# Print the precision and recall
print("Precision:", precision)
print("Recall:", recall)

In [None]:
# Save the best model
ft_model.save('bestion_aug_finetuning_dropout')
del ft_model