In [24]:
import pandas as pd
import numpy as np
import tensorflow as tf
from PIL import Image
import random
from tensorflow import keras
from tensorflow.keras import datasets, layers, models, regularizers
from tensorboard.plugins.hparams import api as hp
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import os

### Reads the Excel sheets and loads the indices as DataFrames

In [4]:
desert_indices = pd.read_excel("Biome_tags_Spreadsheet.xlsx", "Robert", header=None, usecols=[0])
aquatic_indices = pd.read_excel("Biome_tags_Spreadsheet.xlsx", "Nathan", header=None ,usecols=[0])
tundra_indices = pd.read_excel("Biome_tags_Spreadsheet.xlsx", "Leo", header=None, usecols=[0])
forest_indices = pd.read_excel("Biome_tags_Spreadsheet.xlsx", "Jacob", header=None, usecols=[0])
grassland_indices = pd.read_excel("Biome_tags_Spreadsheet.xlsx", "Arkesh", header=None, usecols=[0])


### Making lists of indices and tags

In [5]:

desert_indices = desert_indices.to_numpy().astype(int).T.tolist()[0]
aquatic_indices = aquatic_indices.to_numpy().astype(int).T.tolist()[0]
tundra_indices = tundra_indices.to_numpy().astype(int).T.tolist()[0]
forest_indices = forest_indices.to_numpy().astype(int).T.tolist()[0]
grassland_indices = grassland_indices.to_numpy().astype(int).T.tolist()[0]


In [6]:
desert_type = (0*np.ones(len(desert_indices))).tolist()
aquatic_type = (1*np.ones(len(aquatic_indices))).tolist()
tundra_type = (2*np.ones(len(tundra_indices))).tolist()
forest_type = (3*np.ones(len(forest_indices))).tolist()
grassland_type = (4*np.ones(len(grassland_indices))).tolist()

### Adds the indices and types into one

In [7]:
indices = desert_indices + aquatic_indices + tundra_indices + forest_indices + grassland_indices

In [8]:
len(indices) ## should match len(types)

1089

In [9]:
types = desert_type + aquatic_type + tundra_type + forest_type + grassland_type

In [10]:
len(types) ## should match len(indices)

1089

### Function for Loading Images

In [11]:
def load_label_images(indices, types, image_loc):
    images = {}
    names = []
    
    for i in range(len(indices)):
        image_index = "{:07d}".format(indices[i])
        image_name = "img_" + image_index
        names.append(image_name)
        image = np.array(Image.open(image_loc + "\\" + image_index + ".png")) / 255
        images[image_name] = [image, types[i]]
    
    return images

### Splitting into testing and training sets

In [12]:
pairs = list(zip(desert_indices, desert_type))
des_train, des_test = train_test_split(pairs, test_size=0.2)
des_train_ind, des_train_typ = zip(*des_train)
des_test_ind, des_test_typ = zip(*des_test) 

pairs = list(zip(aquatic_indices, aquatic_type))
aqu_train, aqu_test = train_test_split(pairs, test_size=0.2)
aqu_train_ind, aqu_train_typ = zip(*aqu_train)
aqu_test_ind, aqu_test_typ = zip(*aqu_test)

pairs = list(zip(tundra_indices, tundra_type))
tun_train, tun_test = train_test_split(pairs, test_size=0.2)
tun_train_ind, tun_train_typ = zip(*tun_train)
tun_test_ind, tun_test_typ = zip(*tun_test)

pairs = list(zip(forest_indices, forest_type))
for_train, for_test = train_test_split(pairs, test_size=0.2)
for_train_ind, for_train_typ = zip(*for_train)
for_test_ind, for_test_typ = zip(*for_test) 

pairs = list(zip(grassland_indices, grassland_type))
gra_train, gra_test = train_test_split(pairs, test_size=0.2)
gra_train_ind, gra_train_typ = zip(*gra_train)
gra_test_ind, gra_test_typ = zip(*gra_test)


### Loading Images and preparing train/test sets

In [13]:
image_loc = "D:\Dataset Images\lhq_256"
training_loop = [[des_train_ind,des_train_typ],[aqu_train_ind, aqu_train_typ],[tun_train_ind, tun_train_typ],[for_train_ind, for_train_typ],[gra_train_ind, gra_train_typ]]
testing_loop = [[des_test_ind, des_test_typ],[aqu_test_ind, aqu_test_typ],[tun_test_ind, tun_test_typ],[for_test_ind, for_test_typ],[gra_test_ind, gra_test_typ]]

In [14]:
train_dict = {}
test_dict = {}
for i in range(len(training_loop)):
    train_dict.update(load_label_images(training_loop[i][0], training_loop[i][1], image_loc))

for i in range(len(testing_loop)):
    test_dict.update(load_label_images(testing_loop[i][0], testing_loop[i][1], image_loc))

In [15]:
train_df = pd.DataFrame(train_dict).T
train_df.columns = ['image', 'label']

test_df = pd.DataFrame(test_dict).T
test_df.columns = ['image', 'label']


In [16]:
train_img = np.array(train_df['image'].tolist())
train_lab = np.array(train_df['label'].tolist())
test_img = np.array(test_df['image'].tolist())
test_lab = np.array(test_df['label'].tolist())

### Tensorboard for Hyperparameter Tuning

In [70]:
%load_ext tensorboard
log_dir = 'D:/logs/hyperparams'
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=log_dir)
hparams_dir = os.path.join(log_dir, 'validation')

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [78]:
HP_L2 = hp.HParam('regularizer', hp.RealInterval(0.005, 0.5))
HP_DROPOUT = hp.HParam('dropout', hp.RealInterval(0.1, 0.5))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam', 'sgd']))

METRIC_ACCURACY = 'accuracy'

with tf.summary.create_file_writer('D:/logs/hparam_tuning').as_default():
    hp.hparams_config(
    hparams=[HP_L2, HP_DROPOUT, HP_OPTIMIZER],
    metrics=[hp.Metric(METRIC_ACCURACY, display_name='Validation Accuracy')],
    )

In [79]:
def train_test_model(hparams):
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=(256, 256, 3)),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.Flatten(),
        layers.Dense(64, activation='relu', kernel_regularizer=regularizers.l2(hparams[HP_L2])),
        layers.Dropout(hparams[HP_DROPOUT]),
        layers.Dense(5, activation="softmax")
    ])
    model.compile(optimizer=hparams[HP_OPTIMIZER],
              loss="sparse_categorical_crossentropy",
              metrics=[METRIC_ACCURACY])
    model.fit(train_img, train_lab, epochs=1, validation_data=(test_img, test_lab))
    _, accuracy = model.evaluate(test_img, test_lab)
    return accuracy

In [80]:
def run(run_dir, hparams):
    with tf.summary.create_file_writer(run_dir).as_default():
        hp.hparams(hparams)  # record the values used in this trial
        accuracy = train_test_model(hparams)
        tf.summary.scalar(METRIC_ACCURACY, accuracy, step=1)


In [84]:
session_num = 0

for l2 in np.linspace(0.005, 0.5, 3):
    for dropout_rate in np.linspace(0.1, 0.5, 3):
        for optimizer in HP_OPTIMIZER.domain.values:
            hparams = {
                  HP_L2: l2,
                  HP_DROPOUT: dropout_rate,
                  HP_OPTIMIZER: optimizer,
            }
            run_name = "run-%d" % session_num
            print('--- Starting trial: %s' % run_name)
            print({h.name: hparams[h] for h in hparams})
            run('D:/logs/' + run_name, hparams)
            session_num += 1


--- Starting trial: run-0
{'regularizer': 0.005, 'dropout': 0.1, 'optimizer': 'adam'}
--- Starting trial: run-1
{'regularizer': 0.005, 'dropout': 0.1, 'optimizer': 'sgd'}
--- Starting trial: run-2
{'regularizer': 0.005, 'dropout': 0.30000000000000004, 'optimizer': 'adam'}
--- Starting trial: run-3
{'regularizer': 0.005, 'dropout': 0.30000000000000004, 'optimizer': 'sgd'}

UnknownError: {{function_node __wrapped__FlushSummaryWriter_device_/job:localhost/replica:0/task:0/device:CPU:0}} FlushFileBuffers failed for: \\?\D:\logs\run-3\events.out.tfevents.1712246591.BOB_INSPIRON.7352.28.v2 : The volume for a file has been externally altered so that the opened file is no longer valid.
; Unknown error
	Failed to flush 1 events to D:/logs/run-3/events.out.tfevents.1712246591.BOB_INSPIRON.7352.28.v2
	Could not flush events file. [Op:FlushSummaryWriter] name: 