# Format Dataset

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
%config Completer.use_jedi = False

In [None]:
# Libraries
import os
import glob
import matplotlib.pyplot as plt
os.environ["OPENCV_IO_ENABLE_OPENEXR"]="1"
import cv2
import numpy as np
from tqdm.notebook import tqdm as tqdm
import tensorflow as tf
import random
from pathlib import Path

from utils.tools import *
from utils.preprocess import *
from utils.visualize import *
from utils.data import *

In [None]:
physical_devices = tf.config.list_physical_devices('GPU')
print("Num GPUs:", physical_devices)

#select the working GPU
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)

In [None]:
## define dataset import paths
ROOT = Path('datasets/vineyard_pergola/')
DATASETS = sorted([ROOT.joinpath(d) for d in os.listdir(ROOT)
                   if not d.startswith('.') and not d.endswith('.yaml')])

In [None]:
# define some variables and read config
config_path = 'utils/config.yaml'
model_dir = Path('bin')
logs_dir = Path('logs')
config = read_yaml(config_path)
config

In [None]:
DATASETS

In [None]:
def format_subdataset(D):
    ROOT_I = D.joinpath('images')
    ROOT_M = D.joinpath('masks')
    try:
        os.mkdir(ROOT_I)
        os.mkdir(ROOT_M)
    except:
        pass

    # LRM = sorted([str(D.joinpath(f)) for f in os.listdir(ROOT_M) if f.startswith('I')])
    # LRI = sorted([str(D.joinpath(f)) for f in os.listdir(ROOT_I) if f.startswith('S')])

    LRM = sorted([str(D.joinpath(f).joinpath(os.listdir(D.joinpath(f))[2]))
                  for f in os.listdir(D) if f.startswith('I')])
    LRI = sorted([str(D.joinpath(f).joinpath(os.listdir(D.joinpath(f))[3]))
                  for f in os.listdir(D) if f.startswith('I')])
    
    move_files(l=LRM, root=ROOT_M)
    move_files(l=LRI, root=ROOT_I)

In [None]:
LRM

In [None]:
LRI

In [None]:
def move_files(l, root):
    for i, f in enumerate(sorted(l)):
        fname = f.split('/')[-1]
        print(fname)
        destf = root.joinpath(f'Image{i+1}')
        os.mkdir(destf)
        print(destf)
        os.replace(f, destf.joinpath(fname))
        print(f, destf.joinpath(fname))
        #break

In [None]:
for d in DATASETS:
    format_subdataset(d)

In [None]:
### DANGER ###

# for i in LRM+LRI:
#     os.remove(i)

# Display some Images

In [None]:
import os 
from PIL import Image
from pathlib import Path

In [None]:
def display_samples(dataset='zucchini', i=0, n=5, s=0.25):
    
    init = 'z' if dataset == 'zucchini' else 'd'
    
    ROOT = Path(f'Agrivol_datasets/dataset_{dataset}/')
    DATASETS = sorted([ROOT.joinpath(d) for d in os.listdir(ROOT) if d.startswith(init)])
    
    D = DATASETS[i]
    ROOT_I = D.joinpath('images')

    LRI = [str(ROOT_I.joinpath(f).joinpath(os.listdir(ROOT_I.joinpath(f))[0])) for f in os.listdir(ROOT_I)]
    
    for j in range(n):
        im = Image.open(LRI[j])
        display(im.resize((int(im.width*s),int(im.height*s))))

In [None]:
display_samples('zucchini',i=0)

# Format Tree Dataset

In [None]:
import os 
from PIL import Image
from pathlib import Path

In [None]:
s = 0.5

ROOT_I = Path('datasets/dataset_vite/dataset_vite_1/images/')
ROOT_M = Path('datasets/dataset_vite/dataset_vite_1/masks/')
LRI = [str(ROOT_I.joinpath(f)) for f in os.listdir(ROOT_I)]
LRM = [str(ROOT_M.joinpath(f)) for f in os.listdir(ROOT_M)]
    
# for j in range(5):
#     im = Image.open(LRI[j])
#     #display(im.resize((int(im.width*s),int(im.height*s))))
#     im = np.array(im)
#     print(im.min(), im.max())
#     plt.imshow(im)
#     plt.show()

In [None]:
sorted(LRM)

In [None]:
for i, f in enumerate(sorted(LRI)):
    fname = f.split('/')[-1]
    #print(fname)
    destf = ROOT_I.joinpath(f'Image{i+1}')
    os.mkdir(destf)
    #print(destf)
    os.replace(f, destf.joinpath(fname))
    #print(f, destf.joinpath(fname))
    #break

In [None]:
ROOT_I

In [None]:
img_ds = tf.keras.preprocessing.image_dataset_from_directory(
    directory=ROOT_I,
    label_mode=None,
    class_names=None,
    color_mode="rgb",
    batch_size=128,
    image_size=(224, 224),
    shuffle=False,
    seed=None,
    #validation_split=0,
    #subset=train,
    interpolation="bilinear",
    follow_links=False)

# Load Datasets

In [None]:
img_ds = tf.keras.preprocessing.image_dataset_from_directory(
    directory=ROOT_I,
    label_mode=None,
    class_names=None,
    color_mode="rgb",
    batch_size=128,
    image_size=(224, 224),
    shuffle=False,
    seed=None,
    #validation_split=0,
    #subset=train,
    interpolation="bilinear",
    follow_links=False)

In [None]:
mask_ds = tf.keras.preprocessing.image_dataset_from_directory(
    directory=ROOT_M,
    label_mode=None,
    class_names=None,
    color_mode="grayscale",
    batch_size=128,
    image_size=(224, 224),
    shuffle=False,
    seed=None,
    #validation_split=0,
    #subset=train,
    interpolation="bilinear",
    follow_links=False)

In [None]:
if 'albero' in str(ROOT_M):
    mask_ds = mask_ds.map(binarize_mask)
else:
    mask_ds = mask_ds.map(normalize)

In [None]:
img_ds = img_ds.map(lambda x: tf.keras.applications.imagenet_utils.preprocess_input(x, mode='torch'))

In [None]:
for i in img_ds:
    for n, j in enumerate(i):
        #print(j)
        print(np.min(j), np.max(j))
        plt.imshow(j/255.0)
        plt.show()
        if n >= 3:
            break
    break

In [None]:
for i in mask_ds:
    for n, j in enumerate(i):
        #print(j)
        print(np.min(j), np.max(j))
        plt.imshow(j/255.0)
        plt.show()
        if n >= 2:
            break
    break

In [None]:
ds1 = tf.data.Dataset.zip((img_ds, mask_ds))

In [None]:
for i, (x, y) in enumerate(ds1):
    plt.imshow(x[0]/255.0, alpha=.8)
    plt.imshow(y[0]/255.0, alpha=.2)
    plt.show()
    
    x_, y_ = random_resize_crop(x[0], y[0], 0.7)
    print(x_.shape, y_.shape)
    
    plt.imshow(x_/255.0, alpha=.8)
    plt.imshow(y_/255.0, alpha=.2)
    plt.show()
    
    if i > 3:
        break

In [None]:
np.mean(y_), np.std(y_)

In [None]:
plt.hist(y_)

In [None]:
y_ = y_/255.0

In [None]:
np.mean(y_), np.std(y_)

In [None]:
plt.hist(y_)

In [None]:
y_ = tf.math.round(y_)

In [None]:
np.mean(y_), np.std(y_)

In [None]:
plt.hist(y_)

# Dataset Check

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
%config Completer.use_jedi = False

In [None]:
# Libraries
import os
import glob
import matplotlib.pyplot as plt
os.environ["OPENCV_IO_ENABLE_OPENEXR"]="1"
import cv2
import numpy as np
from tqdm.notebook import tqdm as tqdm
import tensorflow as tf
import random
from pathlib import Path

from utils.tools import *
from utils.preprocess import *
from utils.visualize import *
from utils.data import *

In [None]:
physical_devices = tf.config.list_physical_devices('GPU')
print("Num GPUs:", physical_devices)

#select the working GPU
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)

In [None]:
# define some variables and read config
config_path = 'utils/config.yaml'
data_dir = Path('datasets')
model_dir = Path('bin')
logs_dir = Path('logs')
config = read_yaml(config_path)
config

In [None]:
source_dataset = sorted([data_dir.joinpath(d) 
                         for d in config['SOURCE'] if d != config['TARGET']])
target_dataset = data_dir.joinpath(config['TARGET'])

ds_source, ds_target = load_multi_dataset(None, target_dataset, config)

In [None]:
len(ds_target)

# Old

In [None]:
import os

count = 0
for root_dir, cur_dir, files in os.walk('datasets/dataset_albero/'):
    count += len(files)
print('file count:', count)

In [None]:
count/4

In [None]:
from PIL import Image

In [None]:
i = np.array(Image.open('20191010_L4_S_1100.png'))

In [None]:
i.min(), i.max(), i.mean()

In [None]:
plt.imshow(i/2.0)

In [None]:
Image.open('Agrivol_datasets/dataset_zucchini/zucchini_dataset1/masks/Image100/Segmentation0001.png')

In [None]:
Image.open('Agrivol_datasets/dataset_lattuga/dataset_lattuga_4/images/Image100/Image0001.png')

In [None]:
Image.open('Agrivol_datasets/dataset_pero/dataset_pero_4/images/Image100/Image0001.png')

In [None]:
Image.open('Agrivol_datasets/dataset_zucchini/zucchini_dataset4/images/Image100/Image0001.png')