In [1]:
import sys
import os
import cv2
import numpy as np

import matplotlib.pyplot as plt

# Add the src directory to the path. TEMPORARY FIX
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "../..")))

from src.models.data_management.data_loader import DataLoaderManager

In [13]:
data_path = os.path.abspath(os.path.join(os.getcwd(), "../../data/patchify_shoreline/"))
data_loader = DataLoaderManager().load_data(data_path)
print(len(data_loader['train']['images']))

12389


In [22]:
subsets = ["train", "validation", "test"]

labels = {}

for subset in subsets:
    print(f"Subset: {subset}")
    images = data_loader[subset]["images"]
    masks = data_loader[subset]["masks"]

    for i in range(len(images)):
        image = cv2.imread(images[i])
        mask = cv2.imread(masks[i], cv2.IMREAD_GRAYSCALE)

        unique, counts = np.unique(mask, return_counts=True)

        for u, c in zip(unique, counts):
            if u not in labels:
                labels[u] = 0
            labels[u] += c

print(labels)

Subset: train
Subset: validation
Subset: test
{0: 1155193552, 1: 4269360}


In [26]:
total_pixels = sum(labels.values())
print("Total non-shoreline pixels: ", labels[0])
print("Total shoreline pixels: ", labels[1])
print("Total pixels: ", total_pixels)

print("Shoreline percentage: ", labels[1] / total_pixels * 100)

Total non-shoreline pixels:  1155193552
Total shoreline pixels:  4269360
Total pixels:  1159462912
Shoreline percentage:  0.3682187636890968


# 4 Classes

- 0 = Not classified
- 1 = Land
- 2 = Sea
- 3 = Shoreline

In [3]:
data_path = os.path.abspath(os.path.join(os.getcwd(), "../../data/patchify_4_classes/"))
data_loader = DataLoaderManager().load_data(data_path)
print(len(data_loader['train']['images']))

21352


In [5]:
subsets = ["train", "validation", "test"]
LABELS = {0: 'Not classified', 1: 'Land', 2: 'Sea', 3: 'Shoreline'}
labels = {}

for subset in subsets:
    print(f"Subset: {subset}")
    images = data_loader[subset]["images"]
    masks = data_loader[subset]["masks"]

    for i in range(len(images)):
        image = cv2.imread(images[i])
        mask = cv2.imread(masks[i], cv2.IMREAD_GRAYSCALE)

        unique, counts = np.unique(mask, return_counts=True)

        for u, c in zip(unique, counts):
            if u not in labels:
                labels[u] = 0
            labels[u] += c
            
for k, v in labels.items():
    print(f"{LABELS[k]}: {v}")

Subset: train
Subset: validation
Subset: test
Not classified: 844268873
Land: 622764355
Sea: 527712804
Shoreline: 3053392


In [12]:
total_pixels = sum(labels.values())
print("Total pixels: ", total_pixels)
print("")
print(f"Not classified percentage: {labels[0] / total_pixels * 100:.3f}%")
print(f"Land percentage: {labels[1] / total_pixels * 100:.3f}%")
print(f"Sea percentage: {labels[2] / total_pixels * 100:.3f}%")
print(f"Shoreline percentage: {labels[3] / total_pixels * 100:.3f}%")


Total pixels:  1997799424

Not classified percentage: 42.260%
Land percentage: 31.173%
Sea percentage: 26.415%
Shoreline percentage: 0.153%


---

## Patchify reclect

In [16]:
data_path = os.path.abspath(os.path.join(os.getcwd(), "../../data/patchify_reflect/"))
data_loader = DataLoaderManager().load_data(data_path)
print(len(data_loader['train']['images']))

subsets = ["train", "validation", "test"]
LABELS = {0: 'Not classified', 1: 'Land', 2: 'Sea', 3: 'Shoreline'}
labels = {}

for subset in subsets:
    print(f"Subset: {subset}")
    images = data_loader[subset]["images"]
    masks = data_loader[subset]["masks"]

    for i in range(len(images)):
        image = cv2.imread(images[i])
        mask = cv2.imread(masks[i], cv2.IMREAD_GRAYSCALE)

        unique, counts = np.unique(mask, return_counts=True)

        for u, c in zip(unique, counts):
            if u not in labels:
                labels[u] = 0
            labels[u] += c
            
for k, v in labels.items():
    print(f"{LABELS[k]}: {v}")

25263
Subset: train
Subset: validation
Subset: test
Not classified: 544889699
Land: 884160498
Sea: 931962292
Shoreline: 4902647


In [18]:
total_pixels = sum(labels.values())
print("Total pixels: ", total_pixels)
print("")
print(f"Not classified percentage: {labels[0] / total_pixels * 100:.3f}%")
print(f"Land percentage: {labels[1] / total_pixels * 100:.3f}%")
print(f"Sea percentage: {labels[2] / total_pixels * 100:.3f}%")
# print(f"Shoreline percentage: {labels[3] / total_pixels * 100:.3f}%")


Total pixels:  2365915136

Not classified percentage: 23.031%
Land percentage: 37.371%
Sea percentage: 39.391%
