# Data Labelling

In [9]:
!pip install tensorflow


Collecting tensorflow
  Downloading tensorflow-2.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (588.3 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m588.3/588.3 MB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0m
Collecting flatbuffers>=2.0
  Downloading flatbuffers-23.3.3-py2.py3-none-any.whl (26 kB)
Collecting astunparse>=1.6.0
  Using cached astunparse-1.6.3-py2.py3-none-any.whl (12 kB)
Collecting absl-py>=1.0.0
  Using cached absl_py-1.4.0-py3-none-any.whl (126 kB)
Collecting keras<2.12,>=2.11.0
  Downloading keras-2.11.0-py2.py3-none-any.whl (1.7 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m0:01[0m:01[0m
[?25hCollecting gast<=0.4.0,>=0.2.1
  Using cached gast-0.4.0-py3-none-any.whl (9.8 kB)
Collecting tensorflow-io-gcs-filesystem>=0.23.1
  Using cached tensorflow_io_gcs_filesystem-0.31.0

In [12]:
!pip install tqdm


Collecting tqdm
  Downloading tqdm-4.65.0-py3-none-any.whl (77 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.1/77.1 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tqdm
Successfully installed tqdm-4.65.0


In [13]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.model_selection import cross_validate
from sklearn.linear_model import LinearRegression
from ml_logic.params import *
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
# from google.cloud import storage
# from google.oauth2 import service_account
import tqdm
import numpy as np

### Import the train, test, val directory paths which will be used by the image_dataset_from_directory function

In [14]:
train_directory = '/root/code/FlorenceBoutin/raw_data/Gi_disease_data/train'
test_directory = '/root/code/FlorenceBoutin/raw_data/Gi_disease_data/test'
val_directory = '/root/code/FlorenceBoutin/raw_data/Gi_disease_data/val'



In [15]:
# from keras.preprocessing.image import ImageDataGenerator
# train_normal_directory = ImageDataGenerator(rescale=1./255)

In [27]:
def load_images(path):
        """
        Enter a path to load images from.
        """
        datagen = ImageDataGenerator(rescale = 1. / 255)
        images = datagen.flow_from_directory(path,
                                             target_size = (224,224),
                                             color_mode = "rgb",
                                             batch_size = 32,
                                             class_mode = "categorical")

        return images


In [28]:
train_dataset = load_images(train_directory)
val_dataset = load_images(val_directory)
test_dataset = load_images(test_directory)

In [29]:
test_dataset

In [30]:
def initialize_baseline_model(input_shape=(224, 224, 3)) -> Model:
    """
    Initialize the Neural Network with random weights
    """

    # Create instance of to model
    model = Sequential()

    # Add Convolution layers + Pooling and Dropout layers to limit overfitting.
    model.add(layers.Conv2D(64, kernel_size=(3,3), input_shape=input_shape, activation='relu', padding='same'))
    model.add(layers.MaxPool2D(pool_size=(3,3)))
    model.add(layers.Dropout(0.3))
    model.add(layers.Conv2D(32, kernel_size=(2,2), activation='relu'))
    model.add(layers.MaxPool2D(pool_size=(2,2)))
    model.add(layers.Conv2D(16, kernel_size=(2,2), activation='relu'))
    model.add(layers.MaxPool2D(pool_size=(2,2)))

    # Flatten and Dense layers
    model.add(layers.Flatten())
    model.add(layers.Dense(15, activation='relu'))
    model.add(layers.Dropout(0.3))
    model.add(layers.Dense(5, activation='relu'))

    # Output layer
    model.add(layers.Dense(3, activation='softmax'))

    print("✅ baseline model initialized")

    return model

In [31]:
def compile_baseline_model(model: Model, learning_rate=0.001) -> Model:
    """
    Compile the Neural Network
    """
    recall = keras.metrics.Recall()
    optimizer = optimizers.Adam(learning_rate=learning_rate)
    model.compile(loss='categorical_crossentropy',
               optimizer=optimizer,
               metrics=[recall, 'accuracy'])

    print("✅ model compiled")
    return model

In [32]:
def train_baseline_model(model: Model,
                         train_data,
                         validation_data,
                         patience=5) :
    """
    Fit model and return a the tuple (fitted_model, history)
    """

    es = EarlyStopping(
        monitor="val_loss",
        patience=patience,
        restore_best_weights=True,
        verbose=0
    )

    history = model.fit(train_data,
        validation_data=validation_data,
        epochs=30,
        callbacks=[es],
        verbose=1)

    print(f"✅ model trained with accuracy of {round(np.max(history.history['val_accuracy']), 2)} and a recall of {round(np.max(history.history['val_recall']), 2)}.")

    return model, history

In [36]:
model = initialize_baseline_model(input_shape=(224, 224, 3))
model

In [34]:
model = compile_baseline_model(model, learning_rate=0.001)

In [35]:
model = train_baseline_model(model,
                         train_dataset,
                         val_dataset,
                         patience=5)

In [24]:
model

In [25]:
history