# Format Dataset

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
%config Completer.use_jedi = False

In [None]:
# Libraries
import os
import glob
import matplotlib.pyplot as plt
os.environ["OPENCV_IO_ENABLE_OPENEXR"]="1"
import cv2
import numpy as np
from tqdm.notebook import tqdm as tqdm
import tensorflow as tf
import random
from pathlib import Path

from utils.tools import *
from utils.preprocess import *
from utils.visualize import *
from utils.data import *

In [None]:
physical_devices = tf.config.list_physical_devices('GPU')
print("Num GPUs:", physical_devices)

#select the working GPU
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)

In [None]:
## define dataset import paths
ROOT = Path('../AgriSeg_Dataset/misc/')
DATASETS = sorted([ROOT.joinpath(d) for d in os.listdir(ROOT)
                   if not d.startswith('.') and not d.endswith('.yaml')])

In [None]:
# define some variables and read config
config_path = 'utils/config.yaml'
model_dir = Path('bin')
logs_dir = Path('logs')
config = read_yaml(config_path)
config

In [None]:
DATASETS

In [None]:
D = DATASETS[0]

ROOT_I = D.joinpath('images')
ROOT_M = D.joinpath('masks')
try:
    os.mkdir(ROOT_I)
    os.mkdir(ROOT_M)
except:
    pass

LRM = sorted([str(ROOT_M.joinpath(f)) for f in os.listdir(ROOT_M) if f.startswith('image')])
LRI = sorted([str(ROOT_I.joinpath(f)) for f in os.listdir(ROOT_I) if f.startswith('image')])

In [None]:
len(LRI), len(LRM)

In [None]:
def format_subdataset(D):
    ROOT_I = D.joinpath('images')
    ROOT_M = D.joinpath('masks')
    try:
        os.mkdir(ROOT_I)
        os.mkdir(ROOT_M)
    except:
        pass

    LRM = sorted([str(D.joinpath(f)) for f in os.listdir(ROOT_M) if f.startswith('img')])
    LRI = sorted([str(D.joinpath(f)) for f in os.listdir(ROOT_I) if f.startswith('img')])

    # LRM = sorted([str(D.joinpath(f).joinpath(os.listdir(D.joinpath(f))[2]))
    #               for f in os.listdir(D) if not f.endswith('.json')])
    # LRI = sorted([str(D.joinpath(f).joinpath(os.listdir(D.joinpath(f))[3]))
    #               for f in os.listdir(D)if not f.endswith('.json')])
    print(LRM)
    print(LRI)
    #move_files(l=LRM, root=ROOT_M)
    #move_files(l=LRI, root=ROOT_I)

In [None]:
D = DATASETS[0]
[str(D.joinpath(f).joinpath(os.listdir(D.joinpath(f))[0])) for f in os.listdir(D) if not f.endswith('.json') and not f.startswith('.')]

In [None]:
def move_files(l, root):
    for i, f in enumerate(sorted(l)):
        fname = f.split('/')[-1]
        print(fname)
        destf = root.joinpath(f'Image{i+1}')
        os.mkdir(destf)
        print(destf)
        os.replace(f, destf.joinpath(fname))
        print(f, destf.joinpath(fname))
        #break

In [None]:
move_files(l=LRI, root=ROOT_I)

In [None]:
for d in DATASETS:
    format_subdataset(d)

In [None]:
### DANGER ###

# for i in LRM+LRI:
#     os.remove(i)

# Display some Images

In [None]:
import os 
from PIL import Image
from pathlib import Path

In [None]:
def display_samples(dataset='zucchini', i=0, n=5, s=0.25):
    
    init = 'z' if dataset == 'zucchini' else 'd'
    
    ROOT = Path(f'Agrivol_datasets/dataset_{dataset}/')
    DATASETS = sorted([ROOT.joinpath(d) for d in os.listdir(ROOT) if d.startswith(init)])
    
    D = DATASETS[i]
    ROOT_I = D.joinpath('images')

    LRI = [str(ROOT_I.joinpath(f).joinpath(os.listdir(ROOT_I.joinpath(f))[0])) for f in os.listdir(ROOT_I)]
    
    for j in range(n):
        im = Image.open(LRI[j])
        display(im.resize((int(im.width*s),int(im.height*s))))

In [None]:
display_samples('zucchini',i=0)

# Format Tree Dataset

In [None]:
import os 
from PIL import Image
from pathlib import Path

In [None]:
s = 0.5

ROOT_I = Path('datasets/dataset_vite/dataset_vite_1/images/')
ROOT_M = Path('datasets/dataset_vite/dataset_vite_1/masks/')
LRI = [str(ROOT_I.joinpath(f)) for f in os.listdir(ROOT_I)]
LRM = [str(ROOT_M.joinpath(f)) for f in os.listdir(ROOT_M)]
    
# for j in range(5):
#     im = Image.open(LRI[j])
#     #display(im.resize((int(im.width*s),int(im.height*s))))
#     im = np.array(im)
#     print(im.min(), im.max())
#     plt.imshow(im)
#     plt.show()

In [None]:
sorted(LRM)

In [None]:
for i, f in enumerate(sorted(LRI)):
    fname = f.split('/')[-1]
    #print(fname)
    destf = ROOT_I.joinpath(f'Image{i+1}')
    os.mkdir(destf)
    #print(destf)
    os.replace(f, destf.joinpath(fname))
    #print(f, destf.joinpath(fname))
    #break

In [None]:
ROOT_I

In [None]:
img_ds = tf.keras.preprocessing.image_dataset_from_directory(
    directory=ROOT_I,
    label_mode=None,
    class_names=None,
    color_mode="rgb",
    batch_size=128,
    image_size=(224, 224),
    shuffle=False,
    seed=None,
    #validation_split=0,
    #subset=train,
    interpolation="bilinear",
    follow_links=False)

# Load Datasets

In [None]:
ROOT_I = D.joinpath('images')
ROOT_M = D.joinpath('masks')

In [None]:
img_ds = tf.keras.preprocessing.image_dataset_from_directory(
    directory=ROOT_I,
    label_mode=None,
    class_names=None,
    color_mode="rgb",
    batch_size=128,
    image_size=(224, 224),
    shuffle=False,
    seed=None,
    #validation_split=0,
    #subset=train,
    interpolation="bilinear",
    follow_links=False)

In [None]:
mask_ds = tf.keras.preprocessing.image_dataset_from_directory(
    directory=ROOT_M,
    label_mode=None,
    class_names=None,
    color_mode="grayscale",
    batch_size=128,
    image_size=(224, 224),
    shuffle=False,
    seed=None,
    #validation_split=0,
    #subset=train,
    interpolation="bilinear",
    follow_links=False)

In [None]:
if 'albero' in str(ROOT_M):
    mask_ds = mask_ds.map(binarize_mask)
else:
    mask_ds = mask_ds.map(normalize)

In [None]:
#img_ds = img_ds.map(lambda x: tf.keras.applications.imagenet_utils.preprocess_input(x, mode='torch'))

In [None]:
for i in img_ds:
    for n, j in enumerate(i):
        #print(j)
        print(np.min(j), np.max(j))
        plt.imshow(j/255.0)
        plt.show()
        if n >= 3:
            break
    break

In [None]:
for i in mask_ds:
    for n, j in enumerate(i):
        #print(j)
        print(np.min(j), np.max(j))
        plt.imshow(j/255.0)
        plt.show()
        if n >= 2:
            break
    break

In [None]:
ds1 = tf.data.Dataset.zip((img_ds, mask_ds))

In [None]:
for i, (x, y) in enumerate(ds1):
    plt.imshow(x[0]/255.0, alpha=.8)
    plt.imshow(y[0]/255.0, alpha=.2)
    plt.show()
    
    x_, y_ = random_resize_crop(x[0], y[0], 0.7)
    print(x_.shape, y_.shape)
    
    plt.imshow(x_/255.0, alpha=.8)
    plt.imshow(y_/255.0, alpha=.2)
    plt.show()
    
    if i > 3:
        break

In [None]:
np.mean(y_), np.std(y_)

In [None]:
plt.hist(y_)

In [None]:
y_ = y_/255.0

In [None]:
np.mean(y_), np.std(y_)

In [None]:
plt.hist(y_)

In [None]:
y_ = tf.math.round(y_)

In [None]:
np.mean(y_), np.std(y_)

In [None]:
plt.hist(y_)

# Annotations

In [None]:
import os
from matplotlib import image
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import re
import shutil
from pathlib import Path

In [None]:
if not os.path.exists("masks"):
    os.makedirs("masks")
    
root = "../AgriSeg_Dataset/misc/misc_1/"
annFile = root + "annotations.json"
masks = root + "masks/"
images = root + "images/"

old = Path("../AgriSeg_Dataset/vineyard_real/vineyard_real_1/images/")

coco = COCO(annFile)

catIds = coco.getCatIds()
imgIds = coco.getImgIds()
annsIds = coco.getAnnIds()

In [None]:
annsIds

In [None]:
%matplotlib inline

for aid in annsIds:
    if aid >= 500:
        break
    img = coco.loadImgs(aid)[0]
    print(img)
    
    annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)
    anns = coco.loadAnns(annIds)
    img_id = re.split(r'[/.]',img["file_name"])[-2]
    
    if anns:
        mask = np.zeros(coco.annToMask(anns[0]).shape) 
        for ann in anns:
            mask += coco.annToMask(ann)
    
        mask[mask >= 1] = 1
        image.imsave(masks + img_id + ".png", mask, cmap='gray')
    
        for i in list(old.rglob("*")):
            if str(i).endswith(f"{img_id}.jpg"):
                print(i)
                shutil.copyfile(i, images+f"{img_id}.jpg")

In [None]:
len(annsIds)

# Dataset Check

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
%config Completer.use_jedi = False

In [None]:
# Libraries
import os
import glob
import matplotlib.pyplot as plt
os.environ["OPENCV_IO_ENABLE_OPENEXR"]="1"
import cv2
import numpy as np
from tqdm.notebook import tqdm as tqdm
import tensorflow as tf
import random
from pathlib import Path

from utils.tools import *
from utils.preprocess import *
from utils.visualize import *
from utils.data import *

In [None]:
physical_devices = tf.config.list_physical_devices('GPU')
print("Num GPUs:", physical_devices)

#select the working GPU
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)

In [None]:
# define some variables and read config
config_path = 'utils/config.yaml'
data_dir = Path('../AgriSeg_Dataset/')
model_dir = Path('bin')
logs_dir = Path('logs')
config = read_yaml(config_path)
config

In [None]:
config['SOURCE'] = ['pear']
config['TARGET'] = 'chard'

In [None]:
source_dataset = sorted([data_dir.joinpath(d) 
                         for d in config['SOURCE'] if d != config['TARGET']])
target_dataset = data_dir.joinpath(config['TARGET'])

ds_source, ds_target = load_multi_dataset(source_dataset, target_dataset, config)

In [None]:
len(ds_source[0])

In [None]:
for i, (x, y) in enumerate(ds_source[0]):
    print(x.shape, y.shape)
    plt.imshow(x[0], alpha=.6)
    plt.imshow(y[0], alpha=.4)
    plt.show()
    if i > 3:
        break

# Old

In [None]:
import os

count = 0
for root_dir, cur_dir, files in os.walk('datasets/dataset_albero/'):
    count += len(files)
print('file count:', count)

In [None]:
count/4

In [None]:
from PIL import Image

In [None]:
i = np.array(Image.open('20191010_L4_S_1100.png'))

In [None]:
i.min(), i.max(), i.mean()

In [None]:
plt.imshow(i/2.0)

In [None]:
Image.open('Agrivol_datasets/dataset_zucchini/zucchini_dataset1/masks/Image100/Segmentation0001.png')

In [None]:
Image.open('Agrivol_datasets/dataset_lattuga/dataset_lattuga_4/images/Image100/Image0001.png')

In [None]:
Image.open('Agrivol_datasets/dataset_pero/dataset_pero_4/images/Image100/Image0001.png')

In [None]:
Image.open('Agrivol_datasets/dataset_zucchini/zucchini_dataset4/images/Image100/Image0001.png')

# PyTorch

In [None]:
import os
from pathlib import Path

import random
import torch
import torchvision.transforms as T
import PIL.Image as Image
import matplotlib.pyplot as plt

from utils.tools import read_yaml

In [None]:
config_path = 'utils/config.yaml'
data_dir = Path('datasets')
model_dir = Path('bin')
logs_dir = Path('logs')
config = read_yaml(config_path)

In [None]:
class SemanticSegmentationDataset(torch.utils.data.Dataset):
    """Image (semantic) segmentation dataset."""

    def __init__(self, root_dir, config, augment=False):
        self.root_dir = Path(root_dir)  
        self.config = config
        self.augment = augment
        self.images, self.masks = [], []
        
        self.get_file_lists()
        self.get_transforms()
        
    def get_file_lists(self):
        for subdir in self.root_dir.iterdir():
            if subdir.is_file() or subdir.name.startswith('.'): continue
            image_file_names = [list(f.glob('**/*'))[0].absolute() 
                                for f in subdir.joinpath('images').iterdir()]
            mask_file_names = [list(f.glob('**/*'))[0].absolute() 
                               for f in subdir.joinpath('masks').iterdir()]
            self.images += sorted(image_file_names)
            self.masks += sorted(mask_file_names)
        
    
    def get_transforms(self):
        if self.augment:
            self.image_transforms = T.Compose([
                T.RandomResizedCrop(self.config['IMG_SIZE'], 
                                    scale=(self.config['RND_CROP'], 1.0),
                                    interpolation=T.InterpolationMode.BILINEAR),
                T.RandomHorizontalFlip(self.config['RND_FLIP']),
                T.ColorJitter(brightness=self.config['RND_JITTER_RNG'],
                              contrast=self.config['RND_JITTER_RNG']),
                T.RandomGrayscale(config['RND_GREY']),
                T.ToTensor(),
                T.Normalize(mean=[0.485, 0.456, 0.406],
                            std=[0.229, 0.224, 0.225])
            ])
            
            self.mask_transforms = T.Compose([
                T.RandomResizedCrop(self.config['IMG_SIZE'], 
                                    scale=(self.config['RND_CROP'], 1.0),
                                    interpolation=T.InterpolationMode.NEAREST),
                T.RandomHorizontalFlip(self.config['RND_FLIP']),
                T.ToTensor(),
                T.Lambda(lambda mask: torch.where(mask > 0, 1.0, 0.0))
                
            ])
            
        else:
            self.image_transforms = T.Compose([
                T.ToTensor(),
                T.Normalize(mean=[0.485, 0.456, 0.406],
                            std=[0.229, 0.224, 0.225])
            ])
            
            self.mask_transforms = T.Compose([
                T.ToTensor(),
                T.Lambda(lambda mask: torch.where(mask > 0, 1.0, 0.0))
            ])
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        image = Image.open(self.images[idx]).convert('RGB')
        mask = Image.open(self.masks[idx]).convert('L')
        
        self.seed = np.random.randint(2147483647) # make a seed with numpy generator 

        image = self.preprocess_image(image)
        mask = self.preprocess_mask(mask)
        return image, mask
    
    def preprocess_image(self, image):
        random.seed(self.seed) 
        torch.manual_seed(self.seed) 
        return self.image_transforms(image)
    
    def preprocess_mask(self, mask):
        random.seed(self.seed) 
        torch.manual_seed(self.seed) 
        return self.mask_transforms(mask)

In [None]:
ds = SemanticSegmentationDataset('datasets/vineyard_pergola/', config, augment=True)
len(ds)

In [None]:
dl = torch.utils.data.DataLoader(ds, batch_size=4, shuffle=True, num_workers=24)

In [None]:
for i, m in dl:
    print(i.shape, m.shape)
    print(torch.min(m), torch.max(m))
    plt.imshow(torch.swapaxes(i[0], 0, -1))
    plt.show()
    plt.imshow(torch.swapaxes(m[0], 0, -1))
    break

In [None]:
a = Path('datasets/chard/chard_1/images/Image1/')

In [None]:
list(a.iterdir())