### Information:
Unfortunately, JupyterLab does not support displaying logs/status progress while a cell is running (at least on Windows). Instead, everything is displayed at the end. Colab, on the other hand, displays the logs/status progress correctly.


In [None]:
if "google.colab" in str(get_ipython()):
    colab = True
else:
    colab = False

In [None]:
import shutil
import zipfile
from pathlib import Path

if colab:
    from google.colab import files

## Check for GPU Usage

In [None]:
if colab:
    %tensorflow_version 2.x
import tensorflow as tf

device_name = tf.test.gpu_device_name()
if device_name != "/device:GPU:0":
    raise SystemError("No GPU found, CPU will be used!")
print("Found GPU at: {}".format(device_name))

## Install necessary modules
Only neccessary if you working in colab. If you use your local system, please use the requirements.txt

In [None]:
if colab:
    ! pip -q install pytorch-lightning --upgrade
    ! pip -q install neptune-client
    ! pip -q install kornia
    ! pip -q install split-folders

## Clone files from Github

In [None]:
if colab:
    ! git clone https://github.com/RobinMaas95/GTSRB_Visualization.git
else:
    print("You're not on Colab, are you sure you haven't cloned the repo already?")

## Setup paths and create target folder


In [None]:
# Define paths
base_path = Path.cwd()

## If we use a local machine, we are presumably in the code folder. If so, change to parent folder
if base_path.parent.name == "GTSRB_Visualization":
    base_path = base_path.parent
    %cd ..

colab_suffix = "GTSRB_Visualization" if colab else ""   

data_path = base_path.joinpath(colab_suffix,"data")
src_path = data_path.joinpath("cropped_training")
trg_path = data_path.joinpath("cropped_training_init")
code_path = base_path.joinpath(colab_suffix, "code")
get_dataset_path = code_path.joinpath("get_datasets.py")
initialize_dataset_path = code_path.joinpath("initialize_dataset.py")
build_model_path = code_path.joinpath("build_model.py")
visualization_path = code_path.joinpath("visualization.py")
visualization_with_nn_path = code_path.joinpath("visualization_with_nn.py")
mask_images_path = code_path.joinpath("mask_images.py")
visual_path = base_path.joinpath("visualizations")
masking_jsons_path = base_path.joinpath("masking_jsons")
masking_results_path = base_path.joinpath("masking_results")

# Create target folder
visual_path.mkdir(exist_ok=True, parents=True)

## Download Datasets

In [None]:
! python $get_dataset_path --crop True --working_dir $base_path
# Backup folder_from repo
data_path.joinpath("backup_repo").mkdir(exist_ok=True)
try:
    shutil.move(str(data_path.joinpath("cropped_test")), str(data_path.joinpath("backup_repo", "cropped_test")))
except FileNotFoundError:
    print(f"cropped_test not found. Probably it was already moved")

# Move new folder into data folder
for file_name in ["cropped_test", "cropped_training", "original_test", "original_training"
                  "GT-final_test.csv", "GTSRB_Final_Test_GT.zip", 
                  "GTSRB_Final_Test_Images.zip", 
                  "GTSRB_Final_Training_Images.zip"]:

    try:
        shutil.move(str(base_path.joinpath(file_name)), str(data_path.joinpath(file_name)))
    except FileNotFoundError:
        print(f"""{file_name} not found. Probably it was already moved or never existed 
              (cropped/original depending on --crop true/false)""")

## Initialize Datasets

In [None]:
!python $initialize_dataset_path \
    --source_path $src_path \
    --target_path $trg_path

## Build Model

In [None]:
shutil.rmtree(base_path.joinpath("checkpoints"), ignore_errors=True)

In [None]:
train_dataset = str(data_path.joinpath("cropped_training_init"))
test_dataset = str(data_path.joinpath("cropped_test"))
checkpoint_dest = base_path.joinpath("checkpoints")

# Comment in this lines, if you wanna test all cells inside
# this notebook with only one image per class (fastest way to test)
# train_dataset = data_path.joinpath("medium_dataset", "cropped_training_init")
# test_dataset = data_path.joinpath("medium_dataset", "cropped_test")

**ATTENTION**: 


The setting used in Thesis with a momentum of `0.9` seems to be unstable! This means that - depending on how the weights are initialized at the beginning - the CNN may not learn. If this is the case, it typically remains at a loss of about `3.8` and an accuracy of about `0.02`.


The lower the momentum is set, the more stable the training process is.However, this may require more epochs to obtain similar accuracy values. 


The CNN used for the evaluations in the thesis was trained over `15 epochs` with a momentum of `0.9`. In further tests, however, other CNNs with lower momentum could be trained to almost identical accuracy values. With a momentum of `0.7`, for example, this required `20 epochs`.



In [None]:
# Possible flags for fast training:
# -d 0.01 --> Only one percent dropout range
# --epochs 2    --> Only two epochs

! python $build_model_path \
  --train_dataset $train_dataset \
  --test_dataset  $test_dataset \
  --destination $checkpoint_dest \
  --momentum 0.9

The value given here under `avg_test_acc` seems to be slightly higher than the value reached in `run_test_performance.ipynb`. However, since the value in `run_test_performance.ipynb` is the reproducible value, it is used within the thesis.

## Set mean/std variables
Set mean/std variables. 
You find them in the logs above close the top,
just before the training begins.
E.g.:

```
/content/GTSRB_Visualization/code/build_model.py - INFO - Mean: [0.32352597020361307, 0.2917746800988572, 0.31248061517888454]
/content/GTSRB_Visualization/code/build_model.py - INFO - Std: [0.2770588879067902, 0.2602109151608635, 0.2735251700292777]
```

Replace the values of the _list variables with the lists at the end of the log lines:

 

In [None]:
mean_list = [0.3232291265307408, 0.2916548292210219, 0.31239060828717274]
std_list = [0.2765753268041087, 0.26007308085671965, 0.27338755423173944]
mean = f"{mean_list[0]} {mean_list[1]} {mean_list[2]}"
std = f"{std_list[0]} {std_list[1]} {std_list[2]}"

## Run visualizations

In [None]:
# Autoselect best checkpoint
from pathlib import Path

checkpoint_results_path = checkpoint_dest.joinpath(
    "lightning_logs", "version_0", "checkpoints"
)
p = Path(checkpoint_results_path).glob("**/*")
files = [x for x in p if x.is_file()]
for file in files:
    if file.name != "last.ckpt":
        best_checkpoint = str(file)

### GradCam

In [None]:
! python $visualization_path \
  --dest $visual_path --filetype "ppm" \
  --model $best_checkpoint \
  --src $test_dataset --vis "GradCam" \
  --mean $mean \
  --std $std

### GradCam++

In [None]:
! python $visualization_path \
  --dest $visual_path --filetype "ppm" \
  --model $best_checkpoint \
  --src $test_dataset --vis "GradCam++" \
  --mean $mean \
  --std $std

### Saliency Map

In [None]:
# Install nn_interpretability, make sure you push the restart button!
nn_path = code_path.joinpath("nn_interpretability")
! pip install -e $nn_path

In [None]:
! python $visualization_with_nn_path \
  --dest $visual_path --filetype "ppm" \
  --model $best_checkpoint \
  --src $test_dataset --vis "Saliency" \
  --mean $mean \
  --std $std

### Activation Maximation

In [None]:
! python $visualization_with_nn_path \
  --dest $visual_path --filetype "ppm" \
  --model $best_checkpoint \
  --src $test_dataset --vis "Activation Maximation" \
  --mean $mean \
  --std $std

## Mask images

In [None]:
# For our masking, we only need the following three folders.
# We move them to an extra folder, for easier looping
target = base_path.joinpath("to_be_masked")
target.mkdir(exist_ok=True, parents=True)
shutil.move(str(visual_path.joinpath("heatmap_grad_cam")), target)
shutil.move(str(visual_path.joinpath("heatmap_grad_cam_pp")), target)
shutil.move(str(visual_path.joinpath("heatmap_saliency")), target)

In [None]:
! python $mask_images_path --heatmaps $target \
--json_target $masking_jsons_path --json_file _heatmap_masked.csv \
--org_images $test_dataset \
--target $masking_results_path

## Download files
Download the created datasets and results. Not necessary if you use a local system.

### Datasets

In [None]:
if colab:
    %cd /content
    # Datasets
    cropped_training = str(data_path.joinpath("cropped_training_init"))
    cropped_test = str(data_path.joinpath("cropped_test"))
    ! zip -r -q cropped_training_init.zip /content/data/cropped_training_init
    ! zip -r -q cropped_test.zip /content/data/cropped_test

    ! mkdir /content/datasets
    ! mv cropped_training_init.zip /content/datasets
    ! mv cropped_test.zip /content/datasets

    ! zip -r -q datasets.zip /content/datasets/
    
    # Results
    ! zip -r -q checkpoints.zip  /content/checkpoints
    ! zip -r -q masking_jsons.zip  /content/masking_jsons
    ! zip -r -q to_be_masked.zip  /content/to_be_masked
    ! zip -r -q visualizations.zip  /content/visualizations/
    ! zip -r -q masking_results.zip /content/masking_results

    ! mkdir /content/results
    ! mv checkpoints.zip  /content/results
    ! mv masking_jsons.zip  /content/results
    ! mv to_be_masked.zip  /content/results
    ! mv visualizations.zip /content/results
    ! mv masking_results.zip /content/results

    ! zip -r -q results.zip  /content/results  
    
    # Example for download
    # files.download(datasets.zip)
    # files.download(results.zip) 

    # Example for copying to Google Drive
    # You have to run the cell below first, to connect to Google Drive!
    ! cp datasets.zip /content/drive/MyDrive/
    ! cp results.zip /content/drive/MyDrive/

In [None]:
from google.colab import drive

drive.mount("/content/drive")