# Image recognition algorithm: climate regions of streetview panoramas

In [None]:
import os
from main import make_labelled_dataframe
from source.training import train_model, build_model, build_dataset
from source.utils import prepare_dataframe_and_files_for_training, reset_images_position
import tensorflow as tf

##### Download the dataset from kaggle. You must use your personal username and key

In [None]:
!pip install opendatasets

In [None]:
import opendatasets

opendatasets.download(
    "https://www.kaggle.com/datasets/nikitricky/streetview-photospheres"
)

##### setup the directories and csv path (defaults should be correct)

In [None]:
main_dir = "streetview-photospheres"
img_dir = os.path.join(main_dir, "images")
csv_path = os.path.join(main_dir, "images.csv")
bad_img_dir = os.path.join(main_dir, "bad_images")
test_img_dir = os.path.join(main_dir, "test_images")


##### setup the checkpoint directory (here we use google drive)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
checkpoint_dir = "/content/drive/MyDrive/checkpoints/"

(optional) reset image file positions if you have already run the code before

In [None]:
reset_images_position(img_dir, bad_img_dir, test_img_dir)

##### create the dataframe with images and labels

In [None]:
# you can choose the dataframe keys for the label and file names
label_key = "label"
file_name_key = "file_name"
df, name_dict = make_labelled_dataframe(csv_path, img_dir, label_key, file_name_key)

##### choose your labels (they should have a reasonably high number of entries)

In [None]:
chosen_labels = [5, 17, 16, 19, 4, 39, 38, 3, 36, 14]
num_classes = len(chosen_labels)

##### choose how many images to use for training for each label (excess images will be used later for testing)

In [None]:
samples_per_label = 5000

##### choose seed, model name (used for saved files with results)

In [None]:
seed = 42
model_name = "modellino"

In [None]:
df_good, df_test = prepare_dataframe_and_files_for_training(
    df,
    chosen_labels,
    file_name_key,
    label_key,
    img_dir,
    bad_img_dir,
    test_img_dir,
    samples_per_label,
    seed,
)

##### training and validation datasets

In [None]:
tra_ds, val_ds = build_dataset(img_dir,df_good,label_key,shuffle=True,seed=seed, validation_split=0.2)

##### choose parameters for the model (defaults are ok)

In [None]:
metric = tf.keras.metrics.SparseCategoricalAccuracy(name="accuracy")
loss = tf.keras.losses.SparseCategoricalCrossentropy()
dropout_rate = 0.2
epochs = 20

In [None]:
model = build_model(num_classes, metric, loss, dropout_rate)

history = train_model(
    model,
    tra_ds,
    val_ds,
    epochs,
    model_name,
    checkpoint_dir)

In [None]:
model = build_model(num_classes, metric, loss, dropout_rate)