# Image recognition algorithm: climate regions of streetview panoramas

##### to run this in google colab: git clone the repo and use google drive to save results
##### it is recommended to use a GPU runtime

In [None]:
!git clone https://github.com/Quibusque/snc_project
!mv snc_project/* .

In [None]:
import os
import tensorflow as tf

from source.training import train_model, build_model, build_dataset
from source.utils import (
    prepare_dataframe_and_files_for_training,
    reset_images_position,
    make_labelled_dataframe,
)
from source.metrics import confusion_matrix, save_class_metrics
from source.plots import confusion_matrix_plot, accuracy_loss_plot

In [None]:
from google.colab import drive

drive.mount("/content/drive")

##### download the dataset from kaggle. You must use your personal username and key

In [None]:
!pip install opendatasets

In [None]:
import opendatasets

opendatasets.download(
    "https://www.kaggle.com/datasets/nikitricky/streetview-photospheres"
)

##### setup the directories and csv path (defaults should be correct)

In [None]:
main_dir = "streetview-photospheres"
img_dir = os.path.join(main_dir, "images")
csv_path = os.path.join(main_dir, "images.csv")
bad_img_dir = os.path.join(main_dir, "bad_images")
test_img_dir = os.path.join(main_dir, "test_images")

##### setup the checkpoint directory (here we use google drive)

In [None]:
checkpoint_dir = "/content/drive/MyDrive/checkpoints/"

(optional) reset image file positions if you have already run the code before

In [None]:
reset_images_position(img_dir, bad_img_dir, test_img_dir)

##### create the dataframe with images and labels

In [None]:
df, name_dict = make_labelled_dataframe(csv_path, img_dir)

##### see what the most populated regions are, to choose those with a large enough number of images

In [None]:
num_entries_to_print = 15
print(df['label'].value_counts().head(num_entries_to_print))


### Multi-MIP Climate Change ATLAS reference regions
![Multi-MIP Climate Change ATLAS reference regions](https://raw.githubusercontent.com/SantanderMetGroup/ATLAS/main/reference-regions/reference_regions_names.png)

##### choose your labels (they should have a reasonably high number of entries)

In [None]:
chosen_labels = [5, 17, 16, 19, 4, 39, 38]
num_classes = len(chosen_labels)
label_map = {num: index for index, num in enumerate(chosen_labels)}

##### choose how many images to use for training for each label (excess images will be used later for testing)

In [None]:
samples_per_label = 9000

##### choose some parameters (model name is used for saved files with results)

In [None]:
seed = 42
model_name = "my_model"
# savedir is used to save results
save_dir = os.path.join(checkpoint_dir, "results", model_name)

In [None]:
df_good, df_test = prepare_dataframe_and_files_for_training(
    df,
    chosen_labels,
    img_dir,
    bad_img_dir,
    test_img_dir,
    samples_per_label,
    seed,
)

##### build training, validation and test datasets

In [None]:
tra_ds, val_ds = build_dataset(
    img_dir, df_good, label_map, shuffle=True, seed=seed, validation_split=0.2
)

In [None]:
test_ds = build_dataset(
    test_img_dir, df_test, label_map, shuffle=False, seed=None, validation_split=None
)

##### choose parameters for the model (defaults are ok)

In [None]:
metric = tf.keras.metrics.SparseCategoricalAccuracy(name="accuracy")
loss = tf.keras.losses.SparseCategoricalCrossentropy()
dropout_rate = 0.2
epochs = 10
model = build_model(num_classes, metric, loss, dropout_rate)

##### train the model

In [None]:
history = train_model(
    model,
    tra_ds,
    val_ds,
    epochs,
    model_name,
    checkpoint_dir)

##### plot and save accuracy and loss curves over epochs

In [None]:
accuracy_loss_plot(history,model,model_name,save_dir)

##### plot and save confusion matrix on validation dataset

In [None]:
matrix_val = confusion_matrix(val_ds, model, num_classes)
confusion_matrix_plot(matrix_val, "row", name_dict, chosen_labels, model_name, save_dir)

##### plot and save confusion matrix on test dataset

In [None]:
matrix_test = confusion_matrix(test_ds, model, num_classes)
confusion_matrix_plot(
    matrix_test, "row", name_dict, chosen_labels, model_name, save_dir
)

##### save class metrics (accuracy, loss and f1score) on the test dataset

In [None]:
save_class_metrics(matrix_test,save_dir,model_name)