In [1]:
import rasterio.windows
# from torch.utils.data import Dataset, ConcatDataset
import geopandas as gpd
import os
import sys
import rasterio as rio
import pandas as pd
from rasterio.features import rasterize
import numpy as np
from tqdm import tqdm
import time

from os.path import dirname as up
sys.path.append('/home/sushen/marine_debris_semester_project')
from data.utils_file import read_tif_image, pad
import model.random_forest.engineering_patches as eng

from feature_extraction import calculate_indices, calculate_texture

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
df_map_scenes = pd.read_csv('/data/sushen/marinedebris/MARIDA/marida_mapping.csv')
df_map_scenes = df_map_scenes[df_map_scenes['mod'].str.contains('SR')==False]
df_map_scenes['tile'] = df_map_scenes['region'].apply(lambda x: x.split('_')[-1])
df_map_scenes['tile_contained'] = df_map_scenes.apply(lambda x: x.tile in x.tifpath, axis=1)
df_map_scenes = df_map_scenes[df_map_scenes['tile_contained']==True]
df_map_scenes.reset_index(drop=True, inplace=True) 
df_map_scenes.drop([15], inplace=True) 

In [None]:

date_tile = '48MYU'

scene_names = df_map_scenes.loc[df_map_scenes['region'].str.contains(date_tile)]['tifpath'].values
print(scene_names)
scene_name = scene_names[0]

shp_names = df_map_scenes.loc[df_map_scenes['region'].str.contains(date_tile)]['s2name'].values
print(shp_names)
shp_name = shp_names[0]

region_names = df_map_scenes.loc[df_map_scenes['region'].str.contains(date_tile)]['region'].values
print(region_names)
region_name = region_names[0]

In [None]:
# i = 4
# scene_name = df_map_scenes.iloc[i]['tifpath']
# shp_name = df_map_scenes.iloc[i]['s2name']
# region_name = df_map_scenes.iloc[i]['region']

In [None]:
# df_map_scenes.reset_index()
# df_map_scenes.head()

In [None]:
data_path = '/data/sushen/marinedebris/MARIDA'
mask_id_path = '/data/sushen/marinedebris/project/masks_id/'
mask_conf_path = '/data/sushen/marinedebris/project/masks_conf/'
hdf_path = '/data/sushen/marinedebris/project'


tif_file_path = os.path.join(data_path, 'scenes', scene_name)
shp_file_path = os.path.join(data_path, 'shapefiles', shp_name)
mask_id_file_path = os.path.join(mask_id_path, region_name + ".tif")
mask_conf_file_path = os.path.join(mask_conf_path, region_name + ".tif")

In [None]:
# Geopandas Data Frame read
gdf = gpd.read_file(shp_file_path)
gdf.head()

In [None]:
# Rasterio Image opening
with rio.open(tif_file_path) as src:
    crs = src.crs
    width = src.width
    height = src.height
    transform = src.transform
    profile = src.profile
    print(type(src))
    print(src.meta["count"])

gdf = gdf.to_crs(crs)
# gdf.head()

In [None]:
# Rasterize geometry of shp into a mask with labels
if not os.path.exists(mask_id_file_path):
    mask_id = rasterize(zip(gdf.geometry, gdf.id), all_touched=True,
                    transform=transform, out_shape=(height, width))

    profile["count"] = 1
    profile["dtype"] = "uint8"

    print(f"writing mask to {mask_id_file_path}")
    with rio.open(mask_id_file_path, "w", **profile) as dst:
        dst.write(mask_id[None])

# Rasterize geometry of shp into a mask with conf levels
if not os.path.exists(mask_conf_file_path):
    mask_conf = rasterize(zip(gdf.geometry, gdf.conf), all_touched=True,
                    transform=transform, out_shape=(height, width))

    profile["count"] = 1
    profile["dtype"] = "uint8"

    print(f"writing mask to {mask_conf_file_path}")
    with rio.open(mask_conf_file_path, "w", **profile) as dst:
        dst.write(mask_conf[None])

In [None]:
# print(type(mask))
# print(mask.shape)
# print(np.nonzero(mask))
# print(mask[np.nonzero(mask)])

In [None]:
imagesize = 16*10 # 16 pixels around centroid, 10m per pixel

row = gdf.iloc[17] #17 problematic
minx, miny, maxx, maxy = row.geometry.centroid.buffer(imagesize // 2).bounds
window = rasterio.windows.from_bounds(minx, miny, maxx, maxy, transform=transform)

image, _ = read_tif_image(tif_file_path, window)
image = image.astype("float")
print(image)
print(image.size)
if image.size == 0:
    print('true')

with rasterio.open(mask_id_file_path, "r") as src:
    mask_id = src.read(window=window)[0]
    print(mask_id)
    print(mask_id.size)

with rasterio.open(mask_conf_file_path, "r") as src:
    mask_conf = src.read(window=window)[0]
    print(mask_conf)
    print(mask_conf.size)

image, mask_id = pad(image, mask_id, imagesize // 10)
image, mask_conf = pad(image, mask_conf, imagesize // 10)
image = np.delete(image, 9, axis = 0)

In [None]:
print(image.shape)
print(mask_id.shape)
print(mask_conf.shape)

print(mask_conf)
print(row)

In [None]:
indices = calculate_indices(image)
print(indices.shape)
np.argwhere(np.isnan(indices))

textures = calculate_texture(image)
print(textures.shape)
np.argwhere(np.isnan(textures))

In [None]:
image = np.moveaxis(image, (0, 1, 2), (2, 0, 1))
print(image.shape)
indices = np.moveaxis(indices, (0, 1, 2), (2, 0, 1))
print(indices.shape)
textures = np.moveaxis(textures, (0, 1, 2), (2, 0, 1))
print(textures.shape)

features = np.dstack((mask_id, mask_conf, image, indices, textures))
print(features.shape)
sz1 = features.shape[0]
sz2 = features.shape[1]
features = np.reshape(features, (sz1*sz2, -1))
print(features.shape)

features = features[features[:, 0] > 0]
features.shape

In [None]:
# features_formated_for_df = [features[:, 0], features[:, 1], features[:, 2:]]
columns = ['Class','Conf',"B1", "B2", "B3", "B4", "B5", "B6", "B7", "B8", "B8A", "B11", "B12", 
    "NDVI", "FAI", "FDI", "SI", "NDWI", "NRD", "NDMI", "BSI",
    "con", "dis", "homo", "ener", "cor", "asm"]
df = pd.DataFrame(features, columns = columns)

In [None]:
df.head(30)

In [4]:
data_path = '/data/sushen/marinedebris/MARIDA'
mask_id_path = '/data/sushen/marinedebris/project/masks_id/'
mask_conf_path = '/data/sushen/marinedebris/project/masks_conf/'
hdf_path = '/data/sushen/marinedebris/project'

dataset_name = os.path.join(hdf_path, 'dataset.h5')

In [None]:

hdf = pd.HDFStore(dataset_name, mode = 'w')
hdf.append('train', df, format='table', data_columns=True)
# hdf.append('train', df, format='table', data_columns=True)
hdf.close()

In [11]:
hdf_ss = pd.HDFStore(dataset_name, mode = 'r')
df_train = hdf_ss.select('train')
hdf_ss.close()
# df_train.columns
df_train['Class'].unique()
# df_train.loc[df_train['Class'] == 'unknown']

array([ 1.,  4., 15.,  7., 14.,  5.,  6., 13., 12., 10.,  8., 11.,  9.,
        3.,  2.])

In [10]:
hdf_ss = pd.HDFStore('/data/sushen/marinedebris/MARIDA/dataset.h5', mode = 'r')
df_train = hdf_ss.select('train')
hdf_ss.close()
df_train.head()

Unnamed: 0,nm440,nm490,nm560,nm665,nm705,nm740,nm783,nm842,nm865,nm1600,nm2200,Confidence,Class,XCoords,YCoords,Date,Tile,Image
0,0.102549,0.099589,0.083065,0.060977,0.054536,0.056019,0.058678,0.049876,0.056887,0.042395,0.032179,High,Wakes,709065.0,9340935.0,1-12-19,48MYU,0
1,0.102549,0.102169,0.085815,0.064754,0.054536,0.056019,0.058678,0.052938,0.056887,0.042395,0.032179,High,Wakes,709065.0,9340925.0,1-12-19,48MYU,0
2,0.101787,0.103341,0.085705,0.06832,0.054536,0.055916,0.059498,0.053244,0.057091,0.042195,0.032279,High,Wakes,709065.0,9340915.0,1-12-19,48MYU,0
3,0.101787,0.101699,0.085045,0.063495,0.054328,0.05148,0.052932,0.051815,0.053328,0.039091,0.029878,High,Wakes,709055.0,9340905.0,1-12-19,48MYU,0
4,0.101787,0.100058,0.087575,0.065488,0.054536,0.055916,0.059498,0.052223,0.057091,0.042195,0.032279,High,Wakes,709065.0,9340905.0,1-12-19,48MYU,0


In [None]:
hdf_ss = pd.HDFStore(dataset_name, mode = 'r')
df_train = hdf_ss.select('train')
# df_train.drop_duplicates()
# print(len(df_train))
hdf_ss.close()

In [None]:
len(df_train)

In [None]:
for i in tqdm(np.arange(63)):
    for j in tqdm(np.arange(5)):
        time.sleep(0.5)

In [None]:
!nvidia-smi

In [None]:
data_path = '/data/sushen/marinedebris/MARIDA'
mask_id_path = '/data/sushen/marinedebris/project/masks_id/'
mask_conf_path = '/data/sushen/marinedebris/project/masks_conf/'
hdf_path = '/data/sushen/marinedebris/project'

for i in tqdm(np.arange(len(df_map_scenes))):
    scene_name = df_map_scenes.iloc[i]['tifpath']
    shp_name = df_map_scenes.iloc[i]['s2name']
    region_name = df_map_scenes.iloc[i]['region']

    tif_file_path = os.path.join(data_path, 'scenes', scene_name)
    shp_file_path = os.path.join(data_path, 'shapefiles', shp_name)
    mask_id_file_path = os.path.join(mask_id_path, region_name + ".tif")
    mask_conf_file_path = os.path.join(mask_conf_path, region_name + ".tif")

    # Rasterio Image opening
    with rio.open(tif_file_path) as src:
        crs = src.crs
        width = src.width
        height = src.height
        transform = src.transform
        profile = src.profile

    # Geopandas Data Frame read
    gdf = gpd.read_file(shp_file_path)



In [None]:
# gdf.head()

In [None]:
df_map_scenes.head()