# Colab Setup

In [1]:
!pip install -q --upgrade keras-cv

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m613.1/613.1 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m950.8/950.8 kB[0m [31m16.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
%cd /content/drive/MyDrive/Colab Notebooks/NoLaTeX

/content/drive/MyDrive/Colab Notebooks/NoLaTeX


In [3]:
!pwd

/home/chilleex/code/ChilleeX/NoLaTeX/notebooks


In [4]:
!pip install -e ..

Obtaining file:///home/chilleex/code/ChilleeX/NoLaTeX
  Preparing metadata (setup.py) ... [?25ldone
Installing collected packages: nolatex
  Attempting uninstall: nolatex
    Found existing installation: nolatex 0.0.1
    Uninstalling nolatex-0.0.1:
      Successfully uninstalled nolatex-0.0.1
  Running setup.py develop for nolatex
Successfully installed nolatex-0.0.1


In [None]:
#uncoment to have time displayed on every cell

# !pip install ipython-autotime
# %load_ext autotime

# Imports

In [1]:
#import packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import math
#import tensorflow as tf
#import functions
from keras.utils import load_img
from keras.callbacks import ModelCheckpoint
from keras.optimizers import SGD
from keras.models import load_model

from keras_cv import visualization
from keras_cv.layers import Resizing, NonMaxSuppression
from keras_cv.callbacks import PyCOCOCallback
from keras_cv.models import YOLOV8Detector

from nolatex.ml_logic.utils import load_dataset

2024-03-15 14:17:46.680785: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-15 14:17:47.474517: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-15 14:17:52.210528: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as notebook_tqdm


# Define Paths

In [2]:
json_path = "../raw_data/JSON Kaggle Dataset/kaggle_data_1.json"
img_dir = "../initial_test_data/batch_1_salmple100"

# Load Data into the right format

In [3]:
dataset, class_mapping, low_contrast_imgs = load_dataset(img_dir=img_dir, json_path=json_path)

num_classes = len(class_mapping)

: 

In [None]:
from sys import getsizeof
getsizeof(dataset)

# Image Resizing

In [None]:
inference_resizing = Resizing(
    640, 640, pad_to_aspect_ratio=True, bounding_box_format="xywh"
)

In [None]:
dataset_resized = inference_resizing(dataset)

# Starting the model

In [None]:
model_dir = ""
model_name = "name" + ".keras"
model_path = os.path.join(model_dir, model_name)

#Set model Params here
backbone_model = "resnet50_imagenet"
#Optimizer Params
base_lr = 0.005
momentum = 0.9
global_clipnorm = 10.0
#Loss Params
classification_loss = "binary_crossentropy"
box_loss = "ciou"
#Train Params
number_epochs = 1
batch_size = 32

In [None]:
model = YOLOV8Detector.from_preset(
    backbone_model,
    bounding_box_format="xywh",
    num_classes=num_classes,
)

In [None]:
# including a global_clipnorm is extremely important in object detection tasks
optimizer = SGD(
    learning_rate=base_lr, momentum=momentum, global_clipnorm=global_clipnorm
)

In [None]:
model.compile(
    classification_loss=classification_loss,
    box_loss=box_loss,
    optimizer=optimizer,
)

In [None]:
#Needs to be tested
# coco_metrics_callback = PyCOCOCallback(
#     resized_data, bounding_box_format="xywh"
# )

In [None]:
# Include the epoch in the file name (uses `str.format`)
checkpoint_path = os.path.join(model_dir, f"{model_name}-cp-{epoch:04d}.ckpt")
checkpoint_dir = os.path.dirname(checkpoint_path)

# Calculate the number of batches per epoch
n_batches = len(dataset['images']) / batch_size
n_batches = math.ceil(n_batches)    # round up the number of batches to the nearest whole integer

# Create a callback that saves the model's weights every 5 epochs
cp_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    verbose=1,
    save_weights_only=True,
    save_freq="epoch"
    #save_freq=5*n_batches
    )

# Save the weights using the `checkpoint_path` format
model.save_weights(checkpoint_path.format(epoch=0))

In [None]:
#by just following the example notebook I managed to train 1 epoch in 45 minutes
#did not manage to run fit with 16 images, colab breaks when reaching around the 3 min mark
model.fit(
    dataset_resized,
    # Run for 10-35~ epochs to achieve good scores.
    epochs=number_epochs,
    batch_size=batch_size,
    callbacks=[cp_callback]
    #callbacks=[coco_metrics_callback]
)

model.save(model_path)

# Loading Models

In [None]:
# Restore the weights

#Important model needs to be initialized first
model.load_weights('./checkpoints/my_checkpoint')

In [None]:
# Loading a complete model

new_model = load_model('my_model.keras')

# Ploting predict

In [None]:
test_image_path = "/content/drive/MyDrive/Pitch/test data/example_img.jpg"
image_test = load_img(test_image_path)
image_test = np.array(image_test)
image_test_resized = inference_resizing([image_test])

In [None]:
#Simple visualization (Already tested)
y_pred = model.predict(image_test_resized)
# y_pred is a bounding box Tensor:
# {"classes": ..., boxes": ...}
visualization.plot_bounding_box_gallery(
    image_test_resized,
    value_range=(0, 255),
    rows=1,
    cols=1,
    y_pred=y_pred,
    scale=5,
    font_scale=0.7,
    bounding_box_format="xywh",
    class_mapping=class_mapping,
)

In [None]:
#Needs to be tested
model.prediction_decoder = NonMaxSuppression(
    bounding_box_format="xywh",
    from_logits=True,
    iou_threshold=0.5,
    confidence_threshold=0.75,
)
visualize_detections(model, dataset=visualization_ds, bounding_box_format="xywh")

# visualization_ds on example notebook is en evaluation set
# visualization_ds = eval_ds.unbatch()
# visualization_ds = visualization_ds.ragged_batch(16)
# visualization_ds = visualization_ds.shuffle(8)

#Experimental Stuff


In [None]:
class VisualizeDetections(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs):
        visualize_detections(
            self.model, bounding_box_format="xywh", dataset=visualization_ds
        )
