In [2]:
import pickle
import numpy as np
import pandas as pd
import math
import time
import os, re, sys, random, shutil #, cv2 

In [3]:
# parameters
DATA_SIZE=1500
LAND_USE_ARRAY_SIZE=250
TRIAL_SIZE=1000     # after running it once change to a larger number (up to 15000)
LOAD_CHUNK_SIZE=TRIAL_SIZE
DATA_RUN = 'local_VM' #  select from these options:'gcp' local_VM' , 'local' 

# on VM workbench in GCP when loading data locally from VM  - change to what is has to be 
VM_dataset_folder = "/home/jupyter/GlobalGreening"  # need to change this 
VM_images_dir = "zoomed_photos"
VM_masks_dir = "masks"
VM_output_folder = "/home/jupyter/GlobalGreening/training_outputs" # need to change this 


In [4]:
# Define where to store model outputs

timestamp = time.strftime("%Y%m%d-%H%M%S")

if DATA_RUN == 'local_VM':
    model_path = os.path.join(VM_output_folder, "models", f"{timestamp}_InceptionResNetV2-UNet.h5")
    results_path = os.path.join(VM_output_folder, "metrics", f"{timestamp}_model_training.csv")
    model_metrics_plot_path = os.path.join(VM_output_folder, "metrics", f"{timestamp}_model_metrics_plot.png")
    predictions_dir = os.path.join(VM_output_folder, f"{timestamp}_predictions")
        
print("--------")
print(f' model path is {model_path}')
print("--------")
print(f' results path is {results_path}')
print("--------")
print(f' model metrics path is {model_metrics_plot_path}')
print("--------")
print(f'predictions folder is {predictions_dir}')

--------
 model path is /home/jupyter/GlobalGreening/training_outputs/models/20230613-125635_InceptionResNetV2-UNet.h5
--------
 results path is /home/jupyter/GlobalGreening/training_outputs/metrics/20230613-125635_model_training.csv
--------
 model metrics path is /home/jupyter/GlobalGreening/training_outputs/metrics/20230613-125635_model_metrics_plot.png
--------
predictions folder is /home/jupyter/GlobalGreening/training_outputs/20230613-125635_predictions


In [5]:
!pwd

/home/jupyter/GlobalGreening


In [6]:
images_with_crap = os.listdir("./zoomed_photos")  # here all images
crap = [file for file in images_with_crap if not file.endswith(".png")]
len(images_with_crap), len(crap), crap

(15420,
 5,
 ['text.dot',
  '.ipynb_checkpoints',
  '.text.txt',
  'Untitled.ipynb',
  '.test.txt'])

In [7]:
images = ["zoomed_photos/" + image for image in images_with_crap if image.endswith(".png")]
len(images), images[0:5]

(15415,
 ['zoomed_photos/image6967_-105.84_37.47.png',
  'zoomed_photos/image8942_-104.94_37.51.png',
  'zoomed_photos/image1546_-108.32_38.79.png',
  'zoomed_photos/image7226_-105.75_40.5.png',
  'zoomed_photos/image380_-108.83_37.17.png'])

In [8]:
total_files = len(images)
total_files

15415

In [9]:
# set seed so it can be regenerated
random.seed( 3 )

# Randomly select files into three lists
random.shuffle(images)

# Calculate the number of files for each list
files_for_train = 8000  # Specify the desired number of files to select
files_for_val = int(files_for_train * 0.2)  # Specify the desired number of files to select
files_for_test = total_files - files_for_train - files_for_val # Specify the desired number of files to select

files_for_train, files_for_val, files_for_test

(8000, 1600, 5815)

In [13]:
# Split the files into three lists
images_path_train = images[:files_for_train]
images_path_val = images[files_for_train:(files_for_train+files_for_val)]
images_path_test = images[-files_for_test:]

In [14]:
print(len(images_path_train), images_path_train[0:20])
print('---')
print(len(images_path_val), images_path_val[0:20])
print('---')
print(len(images_path_test), images_path_test[0:20])

8000 ['zoomed_photos/image10952_-104.05_39.05.png', 'zoomed_photos/image2133_-108.06_39.78.png', 'zoomed_photos/image14647_-102.38_40.29.png', 'zoomed_photos/image11380_-103.83_37.26.png', 'zoomed_photos/image9141_-104.86_37.98.png', 'zoomed_photos/image4649_-106.91_38.84.png', 'zoomed_photos/image5620_-106.48_40.16.png', 'zoomed_photos/image5142_-106.69_39.82.png', 'zoomed_photos/image5266_-106.61_37.09.png', 'zoomed_photos/image1551_-108.32_39.01.png', 'zoomed_photos/image1371_-108.4_39.35.png', 'zoomed_photos/image12154_-103.49_38.2.png', 'zoomed_photos/image6759_-105.97_40.63.png', 'zoomed_photos/image9417_-104.73_37.73.png', 'zoomed_photos/image7712_-105.5_37.17.png', 'zoomed_photos/image3342_-107.51_39.22.png', 'zoomed_photos/image14668_-102.34_37.17.png', 'zoomed_photos/image12790_-103.19_37.26.png', 'zoomed_photos/image8785_-105.03_38.84.png', 'zoomed_photos/image8339_-105.24_39.86.png']
---
1600 ['zoomed_photos/image3070_-107.63_39.65.png', 'zoomed_photos/image14869_-102.25_37

In [16]:
import json

In [24]:
with open("images_path_train.txt", "w") as f:
    json.dump(images_path_train, f) 

with open("images_path_val.txt", "w") as f:
    json.dump(images_path_val, f) 
    
with open("images_path_test.txt", "w") as f:
    json.dump(images_path_test, f) 

In [21]:
with open("images_path_test_json.txt", "w") as f:
    json.dump(images_path_test, f) 

In [22]:
with open("images_path_test_json.txt", "r") as f:
    images_path_test_from_json = json.load(f)
#    x = json.load(f)

In [23]:
images_path_test_from_json

['zoomed_photos/image8174_-105.33_40.84.png',
 'zoomed_photos/image1009_-108.57_39.95.png',
 'zoomed_photos/image4815_-106.82_37.9.png',
 'zoomed_photos/image4049_-107.16_37.3.png',
 'zoomed_photos/image11515_-103.79_39.01.png',
 'zoomed_photos/image4392_-107.04_39.9.png',
 'zoomed_photos/image3251_-107.55_39.35.png',
 'zoomed_photos/image12943_-103.15_39.78.png',
 'zoomed_photos/image11097_-103.96_37.21.png',
 'zoomed_photos/image4011_-107.21_39.69.png',
 'zoomed_photos/image5999_-106.31_40.29.png',
 'zoomed_photos/image12602_-103.28_37.26.png',
 'zoomed_photos/image8048_-105.37_39.48.png',
 'zoomed_photos/image10049_-104.47_40.63.png',
 'zoomed_photos/image7251_-105.71_37.56.png',
 'zoomed_photos/image6800_-105.93_38.37.png',
 'zoomed_photos/image8168_-105.33_40.59.png',
 'zoomed_photos/image9483_-104.73_40.54.png',
 'zoomed_photos/image13106_-103.06_38.71.png',
 'zoomed_photos/image9866_-104.56_40.84.png',
 'zoomed_photos/image7660_-105.54_38.96.png',
 'zoomed_photos/image10048_-104