In [1]:
%load_ext autoreload
%autoreload 2

In [40]:
import os
import pandas as pd
import numpy as np
import glob
from tqdm import tqdm

In [3]:
import sys
sys.path.append('..')

In [4]:
import config
from src.utils import get_sn6_df, get_sn6_not_processed, cleanup_sn6_data

In [5]:
train_dataset = get_sn6_df(split='train')

In [6]:
train_dataset.iloc[0]['image_id']

'20190822070610_20190822070846_tile_3721'

In [7]:
test_dataset = get_sn6_df(split='test')

In [8]:
test_dataset.iloc[0]['image_path']

'/Users/akerke/Documents/stacked-unets/dataset/data-spacenet6/test_public/AOI_11_Rotterdam/SAR-Intensity/SN6_Test_Public_AOI_11_Rotterdam_SAR-Intensity_20190823073047_20190823073350_tile_7996.tif'

In [9]:
summary_df = pd.read_csv(config.sn6_summary_datapath)
image_ids = summary_df.ImageId.unique()
len(image_ids)

3401

In [10]:
not_processed = get_sn6_not_processed(mask_train_dir=config.mask_train_dir, image_ids=image_ids)

In [11]:
len(not_processed)

248

In [12]:
train_dataset.shape

(3401, 3)

In [13]:
train_dataset = cleanup_sn6_data(train_dataset, not_processed)
train_dataset.shape

(3153, 3)

In [14]:
train_dataset

Unnamed: 0,image_id,image_path,mask_path
0,20190822070610_20190822070846_tile_3721,/Users/akerke/Documents/stacked-unets/dataset/...,/Users/akerke/Documents/stacked-unets/dataset/...
1,20190823065938_20190823070236_tile_2141,/Users/akerke/Documents/stacked-unets/dataset/...,/Users/akerke/Documents/stacked-unets/dataset/...
2,20190823154802_20190823155103_tile_9773,/Users/akerke/Documents/stacked-unets/dataset/...,/Users/akerke/Documents/stacked-unets/dataset/...
3,20190804122434_20190804122704_tile_6458,/Users/akerke/Documents/stacked-unets/dataset/...,/Users/akerke/Documents/stacked-unets/dataset/...
4,20190823094036_20190823094408_tile_10244,/Users/akerke/Documents/stacked-unets/dataset/...,/Users/akerke/Documents/stacked-unets/dataset/...
...,...,...,...
3396,20190823141628_20190823141922_tile_8731,/Users/akerke/Documents/stacked-unets/dataset/...,/Users/akerke/Documents/stacked-unets/dataset/...
3397,20190823141628_20190823141922_tile_8739,/Users/akerke/Documents/stacked-unets/dataset/...,/Users/akerke/Documents/stacked-unets/dataset/...
3398,20190822155902_20190822160145_tile_892,/Users/akerke/Documents/stacked-unets/dataset/...,/Users/akerke/Documents/stacked-unets/dataset/...
3399,20190822134328_20190822134645_tile_7053,/Users/akerke/Documents/stacked-unets/dataset/...,/Users/akerke/Documents/stacked-unets/dataset/...


In [21]:
import cv2

def is_grayscale_image(image_path):
    image = cv2.imread(image_path)
    return len(image.shape) == 2 or (len(image.shape) == 3 and image.shape[2] == 1)

In [16]:
train_dataset.image_id.iloc[0]

'20190822070610_20190822070846_tile_3721'

In [18]:
index = 0 
mode = "SAR-Intensity"
image_id = train_dataset.image_id.iloc[index]
image_path = f'{config.train_dir}/{mode}/SN6_Train_AOI_11_Rotterdam_{mode}_{image_id}.tif'

In [19]:
image_path

'/Users/akerke/Documents/stacked-unets/dataset/data-spacenet6/train/AOI_11_Rotterdam/SAR-Intensity/SN6_Train_AOI_11_Rotterdam_SAR-Intensity_20190822070610_20190822070846_tile_3721.tif'

In [26]:
import rasterio as rs

def is_grayscale_image(image_path):
    with rs.open(image_path) as src:
        return src.count == 1

image_path = f'{config.train_dir}/{mode}/SN6_Train_AOI_11_Rotterdam_{mode}_{image_id}.tif'

In [27]:
print("Is the image grayscale?", is_grayscale_image(image_path))

Is the image grayscale? False


In [31]:
import rasterio

def inspect_image(image_path):
    with rasterio.open(image_path) as src:
        print("Image shape:", src.shape)
        print("Number of channels:", src.count)
        print("Image CRS:", src.crs)
        print("Image transform:", src.transform)

In [32]:
index = 0 
mode = "SAR-Intensity"
image_id = train_dataset.image_id.iloc[index]
image_path = f'{config.train_dir}/{mode}/SN6_Train_AOI_11_Rotterdam_{mode}_{image_id}.tif'
inspect_image(image_path)

Image shape: (900, 900)
Number of channels: 4
Image CRS: EPSG:32631
Image transform: | 0.50, 0.00, 592736.98|
| 0.00,-0.50, 5747555.52|
| 0.00, 0.00, 1.00|


In [35]:
index = 3000
mode = "SAR-Intensity"
image_id = train_dataset.image_id.iloc[index]
image_path = f'{config.train_dir}/{mode}/SN6_Train_AOI_11_Rotterdam_{mode}_{image_id}.tif'
inspect_image(image_path)

Image shape: (900, 900)
Number of channels: 4
Image CRS: EPSG:32631
Image transform: | 0.50, 0.00, 592964.32|
| 0.00,-0.50, 5753176.32|
| 0.00, 0.00, 1.00|


In [34]:
index = 100
mode = "SAR-Intensity"
image_id = train_dataset.image_id.iloc[index]
image_path = f'{config.train_dir}/{mode}/SN6_Train_AOI_11_Rotterdam_{mode}_{image_id}.tif'
inspect_image(image_path)

Image shape: (900, 900)
Number of channels: 4
Image CRS: EPSG:32631
Image transform: | 0.50, 0.00, 593466.16|
| 0.00,-0.50, 5745928.03|
| 0.00, 0.00, 1.00|


In [36]:
def get_image_dtype(image_path):
    with rs.open(image_path) as src:
        dtype = src.dtypes[0]  # Assuming all bands have the same data type
    return dtype

In [37]:
index = 0 
mode = "SAR-Intensity"
image_id = train_dataset.image_id.iloc[index]
image_path = f'{config.train_dir}/{mode}/SN6_Train_AOI_11_Rotterdam_{mode}_{image_id}.tif'
dtype = get_image_dtype(image_path)
print(f"Image data type: {dtype}")

Image data type: float32


In [38]:
index = 300 
mode = "SAR-Intensity"
image_id = train_dataset.image_id.iloc[index]
image_path = f'{config.train_dir}/{mode}/SN6_Train_AOI_11_Rotterdam_{mode}_{image_id}.tif'
dtype = get_image_dtype(image_path)
print(f"Image data type: {dtype}")

Image data type: float32


In [43]:
def check_image_range(image_path):
    with rs.open(image_path) as src:
        image = src.read()
        min_value = image.min()
        max_value = image.max()

    return min_value, max_value

all_files = glob.glob(os.path.join(config.train_dir, 'SAR-Intensity', '*.tif'))

In [44]:
index = 0 
mode = "SAR-Intensity"
image_id = train_dataset.image_id.iloc[index]
image_path = f'{config.train_dir}/{mode}/SN6_Train_AOI_11_Rotterdam_{mode}_{image_id}.tif'
min_value, max_value = check_image_range(image_path)
print(f"Image pixel value range: {min_value} - {max_value}")

Image pixel value range: 0.0 - 88.83648681640625


In [45]:
index = 3000
mode = "SAR-Intensity"
image_id = train_dataset.image_id.iloc[index]
image_path = f'{config.train_dir}/{mode}/SN6_Train_AOI_11_Rotterdam_{mode}_{image_id}.tif'
min_value, max_value = check_image_range(image_path)
print(f"Image pixel value range: {min_value} - {max_value}")

Image pixel value range: 0.0 - 65.65650177001953


In [46]:
index = 100
mode = "SAR-Intensity"
image_id = train_dataset.image_id.iloc[index]
image_path = f'{config.train_dir}/{mode}/SN6_Train_AOI_11_Rotterdam_{mode}_{image_id}.tif'
min_value, max_value = check_image_range(image_path)
print(f"Image pixel value range: {min_value} - {max_value}")

Image pixel value range: 0.0 - 89.99523162841797
