In [1]:
import glob
import geopandas as gpd
import os
import sys
import rasterio as rio
from rasterio.features import rasterize, geometry_window
import rasterio.windows
import pandas as pd
import numpy as np
from tqdm import tqdm
from joblib import load

from shapely.geometry import LineString, Polygon

sys.path.append('/home/sushen/marine_debris_semester_project')
# from model.random_forest.random_forest import get_random_forest
from data.utils_file import pad

from harmonization_scripts.feature_extraction import calculate_indices, calculate_texture

In [6]:
# Folder contents paths
list_tif = glob.glob("/data/sushen/marinedebris/floatingobjects/scenes/*.tif")
list_shp = glob.glob("/data/sushen/marinedebris/floatingobjects/shapefiles/*.shp")
output_path = '/data/sushen/marinedebris/project/floObj_shapefiles'

# Remove l2a files
list_tif = [ x for x in list_tif if "l2a" not in x ]

print(len(list_tif))
print(len(list_shp))

26
26


In [7]:
image_index = 6

tif_file_path = list_tif[image_index]
shp_file_path = list_shp[image_index]

# output_path = '/data/sushen/marinedebris/project/shapefiles_floObj'
shp_filename = os.path.basename(shp_file_path)
# output_shp_file_path = os.path.join(output_path, shp_filename)
 
print(shp_filename)
 

biscay_20180419.shp


In [8]:
# Rasterio Image opening
with rio.open(tif_file_path, "r") as src:
    crs = src.crs
    transform = src.transform

# Open geopandas dataframe and use corresponding CRS
gdf_lines = gpd.read_file(shp_file_path)
gdf_lines = gdf_lines.to_crs(crs)
len(gdf_lines)

879

In [9]:
# From the geometry object, create a window so that we read the scene only in that window

imagesize = 16*10
row_index = 0
row = gdf_lines.iloc[row_index]

minx, miny, maxx, maxy = row.geometry.centroid.buffer(imagesize // 2).bounds
window = rasterio.windows.from_bounds(minx, miny, maxx, maxy, transform = transform)
width = int(window.width)
height = int(window.height)
print(minx, miny, maxx, maxy)

# Open the scene using the window
with rio.open(tif_file_path, "r") as src_crop:
        transform_crop = src_crop.window_transform(window)
        image_crop = src_crop.read(window=window)
# print(image_crop)
      

603033.6838130676 4822395.829196161 603193.6838130676 4822555.829196161


In [7]:
# Calculate indices & textures
indices = calculate_indices(image_crop)
textures = calculate_texture(image_crop)

# Remove unused bands and reorder axis
image_crop = np.delete(image_crop, [9, 10], axis = 0)

image_crop = np.moveaxis(image_crop, (0, 1, 2), (2, 0, 1))
indices = np.moveaxis(indices, (0, 1, 2), (2, 0, 1))
textures = np.moveaxis(textures, (0, 1, 2), (2, 0, 1))

# Stacking features and reshaping into a 2d array of shape [#pixel, #bands]
features = np.dstack((image_crop, indices, textures))
sz1 = features.shape[0]
sz2 = features.shape[1]

features = np.reshape(features, (sz1*sz2, -1))  

IndexError: index 13 is out of bounds for axis 1 with size 13

In [None]:
# Create a 2d mask 
geometry_object = gdf_lines.iloc[[row_index]].geometry
mask_2d = rasterize(geometry_object, all_touched=True,
                transform=transform_crop, out_shape=(height, width))
print(mask_2d)

mask_2d = np.reshape(mask_2d, sz1*sz2)
bool_mask_2d = mask_2d > 0 
bool_mask_2d.shape

geometry_features = features[bool_mask_2d, :]
print(geometry_features.shape)

In [None]:
cl_path = '/data/sushen/marinedebris/project/rf_classifier.joblib'
rf_classifier = load(cl_path)

In [None]:
prediction = rf_classifier.predict(geometry_features)

In [None]:
print(prediction)

In [None]:
a = np.array([3, 0, 0, 0, 0, 1, 1, 4, 4, 4, 4, 4])
values, counts = np.unique(a, return_counts=True)
ind = np.argmax(counts)
# print(values)
# print(ind)
print(values[ind])  # prints the most frequent element

In [None]:
gdf_test = gpd.read_file('/data/sushen/marinedebris/project/floObj_shapefiles/kentpointfarm_20180710.shp')
gdf_test['marida_cla'].value_counts()