# Colab training routine

## Set Up the Environment

In [None]:
! git clone https://github.com/LeonardoDiCaterina/DL.git

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## Hardware Testing

In [None]:
from psutil import virtual_memory
import tensorflow as tf
print('TensorFlow version:', tf.__version__)

ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

In [None]:
print("GPU Available:", tf.config.list_physical_devices('GPU'))

In [None]:
print("Built with CUDA:", tf.test.is_built_with_cuda())
print("Is GPU available:", tf.test.is_gpu_available())

In [None]:
dummy_model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(256, 256, 3)),
    tf.keras.layers.Conv2D(32, 3, activation='relu'),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(10)
])


dummy_input = tf.random.normal((1, 256, 256, 3))
out = dummy_model(dummy_input)
print(out.shape)

In [None]:
%cd DL

## prepare the direcory for the dataset and preprcess the data

In [None]:
! mkdir /content/DL/data/downloaded_dataset

In [None]:
! gdown --id 1PyxqW_nsORX4PetkQo6OIL0mUL1pFsTD --output /content/DL/data/downloaded_dataset/rare_species.zip

In [None]:
! unzip data/downloaded_dataset/rare_species.zip -d data/downloaded_dataset

copy this in the config file to the `preprocessing_config.py` file
```python
DATA_DIR = 'data/downloaded_dataset'
DEST_DIR = 'data/rearranged'
CSV_PATH = f'{DATA_DIR}/metadata.csv'
N_SPLITS = 5 #it's a positive integer
TEST_SIZE = 0.2 # it's a ratio therefore has to be between 0 and 1
OVERSAMPLE = True
LOG_LEVEL = 'INFO'
LABEL_COL = 'family'
```


In [None]:
! python /content/DL/data_preprocessing/main.py

## Load the dataset

In [None]:
from data_preprocessing.data_loading import load_data
train_folds, test_ds = load_data()

## Run Grid Search

copy this in the config file to the `preprocessing_config.py` file
```python
DATA_DIR = 'data/downloaded_dataset'
DEST_DIR = 'data/rearranged'
CSV_PATH = f'{DATA_DIR}/metadata.csv'
N_SPLITS = 5 #it's a positive integer
TEST_SIZE = 0.2 # it's a ratio therefore has to be between 0 and 1
OVERSAMPLE = True
LOG_LEVEL = 'INFO'
LABEL_COL = 'family'
```


In [None]:
from training.model_selection_utilis import build_param_grid, evaluated_cross_val

In [None]:
#modelnames = ['MobileNetV2', 'ResNet50']
modelnames = ['ResNet50']
freeze_options = [50, 100, 150]
dense_options = [[512], [512, 256]]
learning_rates = [1e-5, 1e-4]

param_grid = build_param_grid(modelnames,freeze_options, dense_options, learning_rates)


In [None]:
models_list, results_df = evaluated_cross_val(train_folds, input_shape=(256, 256, 3), num_classes=202,
                                              param_grid=param_grid, epochs=5)
