# Colab training routine

## Set Up the Environment

In [1]:
! git clone https://github.com/LeonardoDiCaterina/DL.git

Cloning into 'DL'...
remote: Enumerating objects: 456, done.[K
remote: Counting objects: 100% (39/39), done.[K
remote: Compressing objects: 100% (30/30), done.[K
remote: Total 456 (delta 10), reused 33 (delta 9), pack-reused 417 (from 1)[K
Receiving objects: 100% (456/456), 40.65 MiB | 14.95 MiB/s, done.
Resolving deltas: 100% (186/186), done.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

## Hardware Testing

In [2]:
from psutil import virtual_memory
import tensorflow as tf
print('TensorFlow version:', tf.__version__)

ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

TensorFlow version: 2.18.0
Your runtime has 13.6 gigabytes of available RAM

Not using a high-RAM runtime


In [3]:
print("GPU Available:", tf.config.list_physical_devices('GPU'))

GPU Available: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [4]:
print("Built with CUDA:", tf.test.is_built_with_cuda())
print("Is GPU available:", tf.test.is_gpu_available())

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


Built with CUDA: True
Is GPU available: True


In [5]:
dummy_model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(256, 256, 3)),
    tf.keras.layers.Conv2D(32, 3, activation='relu'),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(10)
])


dummy_input = tf.random.normal((1, 256, 256, 3))
out = dummy_model(dummy_input)
print(out.shape)

(1, 10)


In [6]:
%cd DL

/content/DL


## prepare the direcory for the dataset and preprcess the data

In [7]:
! mkdir /content/DL/data/downloaded_dataset

In [8]:
! gdown --id 1PyxqW_nsORX4PetkQo6OIL0mUL1pFsTD --output /content/DL/data/downloaded_dataset/rare_species.zip

Downloading...
From (original): https://drive.google.com/uc?id=1PyxqW_nsORX4PetkQo6OIL0mUL1pFsTD
From (redirected): https://drive.google.com/uc?id=1PyxqW_nsORX4PetkQo6OIL0mUL1pFsTD&confirm=t&uuid=dbc0521a-ee53-4509-9cbb-5cf599388930
To: /content/DL/data/downloaded_dataset/rare_species.zip
100% 4.26G/4.26G [01:24<00:00, 50.5MB/s]


In [9]:
! unzip data/downloaded_dataset/rare_species.zip -d data/downloaded_dataset

[1;30;43mOutput streaming troncato alle ultime 5000 righe.[0m
  inflating: data/downloaded_dataset/chordata_procellariidae/13599615_45511297_eol-full-size-copy.jpg  
  inflating: data/downloaded_dataset/chordata_procellariidae/28929661_1049466_eol-full-size-copy.jpg  
  inflating: data/downloaded_dataset/chordata_procellariidae/20683888_1049466_eol-full-size-copy.jpg  
  inflating: data/downloaded_dataset/chordata_procellariidae/22222316_45511297_eol-full-size-copy.jpg  
   creating: data/downloaded_dataset/arthropoda_theraphosidae/
  inflating: data/downloaded_dataset/arthropoda_theraphosidae/21669750_1181772_eol-full-size-copy.jpg  
  inflating: data/downloaded_dataset/arthropoda_theraphosidae/10785719_1181772_eol-full-size-copy.jpg  
  inflating: data/downloaded_dataset/arthropoda_theraphosidae/21669743_1181772_eol-full-size-copy.jpg  
  inflating: data/downloaded_dataset/arthropoda_theraphosidae/28994684_1181772_eol-full-size-copy.jpg  
  inflating: data/downloaded_dataset/arthro

copy this in the config file to the `preprocessing_config.py` file
```python
DATA_DIR = 'data/downloaded_dataset'
DEST_DIR = 'data/rearranged'
CSV_PATH = f'{DATA_DIR}/metadata.csv'
N_SPLITS = 5 #it's a positive integer
TEST_SIZE = 0.2 # it's a ratio therefore has to be between 0 and 1
OVERSAMPLE = True
LOG_LEVEL = 'INFO'
LABEL_COL = 'family'
```


In [11]:
! python /content/DL/data_preprocessing/main.py

Traceback (most recent call last):
  File "/content/DL/data_preprocessing/main.py", line 2, in <module>
    from preprocessing_config.py import CSV_PATH, DATA_DIR, DEST_DIR, N_SPLITS, TEST_SIZE, OVERSAMPLE,LOG_LEVEL, LABEL_COL
ModuleNotFoundError: No module named 'preprocessing_config.py'; 'preprocessing_config' is not a package


## Load the dataset

In [None]:
from data_preprocessing.data_loading import load_data
train_folds, test_ds = load_data()

## Run Grid Search

copy this in the config file to the `preprocessing_config.py` file
```python
DATA_DIR = 'data/downloaded_dataset'
DEST_DIR = 'data/rearranged'
CSV_PATH = f'{DATA_DIR}/metadata.csv'
N_SPLITS = 5 #it's a positive integer
TEST_SIZE = 0.2 # it's a ratio therefore has to be between 0 and 1
OVERSAMPLE = True
LOG_LEVEL = 'INFO'
LABEL_COL = 'family'
```


In [None]:
from training.model_selection_utilis import build_param_grid, evaluated_cross_val

In [None]:
#modelnames = ['MobileNetV2', 'ResNet50']
modelnames = ['ResNet50']
freeze_options = [50]
dense_options = [[512]]
learning_rates = [1e-4]

param_grid = build_param_grid(modelnames,freeze_options, dense_options, learning_rates)


In [None]:
models_list, results_df = evaluated_cross_val(train_folds, input_shape=(256, 256, 3), num_classes=202,
                                              param_grid=param_grid, epochs=5)
