# batch chip planet images for Lucas to pick which ones to label


In [7]:
import os
import numpy as np
import rasterio
from rasterio import windows
from tqdm import tqdm

def get_windows(dataset, window_size, stride):
    for row in range(0, dataset.height - window_size + 1, stride):
        for col in range(0, dataset.width - window_size + 1, stride):
            yield windows.Window(col, row, window_size, window_size)

window_size = 1024
stride = 1024

## structure of file Lucas sent
# +-- root_folder_events(folder)
# |   +-- different_events(folder)
# |   |   +-- different_regions_of_the_event(folder)
# |   |   |   +-- images(.tif)


root_folder = r"E:\chips\events"
event_list = [f for f in os.listdir(root_folder) if not f.startswith('.')]
print(event_list)
for event in tqdm(event_list):
    event_folder = os.path.join(root_folder, event)
    region_list = [f for f in os.listdir(event_folder) if not f.startswith('.')]
    
    for region in region_list:
        region_folder = os.path.join(event_folder, region)
        image_list = [f for f in os.listdir(region_folder) if not f.startswith('.')]
        
        for image in image_list:
            image_path = os.path.join(region_folder, image)
            print(image_path)
            with rasterio.open(image_path) as dataset:
                # Retrieve the no-data value from the original dataset
                nodata_value = dataset.nodata
                img_name = image_path.split('\\')[-1]
                img_name = img_name.split('.')[0]

                for idx, window in enumerate(get_windows(dataset, window_size, stride)):
                    chip = dataset.read(window=window)

                    # Calculate the percentage of no-data values in the chip
                    nodata_count = np.sum(chip == nodata_value)
                    total_count = chip.size
                    nodata_percentage = (nodata_count / total_count) * 100

                    # Save the chip only if no-data percentage is less than 85%
                    if nodata_percentage < 10:
                        row_idx, col_idx = window.row_off // stride, window.col_off // stride
                        transform = dataset.window_transform(window)
                        chip_name = f'{img_name}__{row_idx}-{col_idx}.tif'
                        if not os.path.exists(region_folder.replace("events","events_chipped")):
                            os.makedirs(region_folder.replace("events","events_chipped"))
                        chip_path = os.path.join(region_folder.replace("events","events_chipped"), chip_name)

                        with rasterio.open(
                            chip_path,
                            'w',
                            driver='GTiff',
                            height=window.height,
                            width=window.width,
                            count=dataset.count,
                            dtype=chip.dtype,
                            crs=dataset.crs,
                            transform=transform,
                            nodata=nodata_value  # Set the no-data value for the chip
                        ) as chip_file:
                            chip_file.write(chip)



# Transfer tiff to jpg and into FCC (nir, red, green)
when upload to label box, need to select "image" as project type and define ontlogy as "segmentation"
also, need to upload data in jpg format and then queue batch

In [9]:
import rasterio
import numpy as np
# from skimage import  img_as_ubyte
# from skimage import exposure
# import imageio
from osgeo import gdal
from glob import glob
from tqdm import tqdm
import os

root_folder = '/Users/zhijiezhang/Current_Projects/RGV_Project/RGV_240603_1/'
tif_list = glob(root_folder+'*.tif')
if not os.path.exists('/Users/zhijiezhang/Current_Projects/RGV_Project/RGV_240603_1_jpg4/'): os.makedirs('/Users/zhijiezhang/Current_Projects/RGV_Project/RGV_240603_1_jpg4/') #

for tif in tqdm(tif_list):
	tif_file = tif
	jpeg_file1 = tif_file.replace('tif','jpg')
	jpeg_file = jpeg_file1.replace('RGV_240603_1','RGV_240603_1_jpg4')
	

	# Open the TIFF file
	with rasterio.open(tif_file) as src:
		# Convert the TIFF data to uint8 and scale it to 0-255
		red = src.read(4).astype("float32")
		green = src.read(3).astype("float32")
		blue = src.read(2).astype("float32")
		maxs = [np.max(red), np.max(green), np.max(blue)]
		mins = [np.min(red), np.min(green), np.min(blue)]
		
		tmax = np.max(maxs)
		tmin = np.min(mins)
		scale = tmax-tmin
		# if scale > 8600:
		# 	scale = 7500
		scale = 6000
		print(tif_file, tmax, tmin, scale)
		
		
		
		red = ((red-tmin)/scale * 255.0).astype("uint8")
		green = ((green-tmin)/scale * 255.0).astype("uint8")
		blue = ((blue-tmin)/scale * 255.0).astype("uint8")
		
		# Write the JPEG file
		with rasterio.open(jpeg_file, 'w', driver='JPEG',
						   width=src.width, height=src.height,
						   count=3, dtype='uint8',
						   crs=src.crs, transform=src.transform) as dst:
			dst.write(red, 1)
			dst.write(green, 2)
			dst.write(blue, 3)

100%|██████████| 4/4 [00:00<00:00, 50.61it/s]

/Users/zhijiezhang/Current_Projects/RGV_Project/RGV_240603_1/20200730_La_Feria_South_East.tif 8153.0 1033.0 6000
/Users/zhijiezhang/Current_Projects/RGV_Project/RGV_240603_1/20190626_La_Paloma_South_Central.tif 8806.0 784.0 6000
/Users/zhijiezhang/Current_Projects/RGV_Project/RGV_240603_1/20190626_La_Paloma_Northeast_East.tif 9092.0 799.0 6000
/Users/zhijiezhang/Current_Projects/RGV_Project/RGV_240603_1/20200730_Hargill_Edcouch_Elsa_Southeast.tif 7592.0 926.0 6000





In [10]:
import rasterio
import numpy as np
from glob import glob
from tqdm import tqdm
import os

root_folder = '/Users/zhijiezhang/Current_Projects/RGV_Project/RGV_240603_1/'
tif_list = glob(root_folder + '*.tif')
output_folder = '/Users/zhijiezhang/Current_Projects/RGV_Project/RGV_240603_1_jpg4/'

if not os.path.exists(output_folder):
    os.makedirs(output_folder)

def min_max_stretch(channel):
    min_val = np.min(channel)
    max_val = np.max(channel)
    stretched = ((channel - min_val) / (max_val - min_val) * 255.0).astype('uint8')
    return stretched

for tif in tqdm(tif_list):
    tif_file = tif
    jpeg_file = os.path.join(output_folder, os.path.basename(tif_file).replace('tif', 'jpg'))
    
    # Open the TIFF file
    with rasterio.open(tif_file) as src:
        # Read the TIFF data and stretch it
        red = min_max_stretch(src.read(4).astype("float32"))
        green = min_max_stretch(src.read(3).astype("float32"))
        blue = min_max_stretch(src.read(2).astype("float32"))
        
        # Write the JPEG file
        with rasterio.open(jpeg_file, 'w', driver='JPEG',
                           width=src.width, height=src.height,
                           count=3, dtype='uint8',
                           crs=src.crs, transform=src.transform) as dst:
            dst.write(red, 1)
            dst.write(green, 2)
            dst.write(blue, 3)


100%|██████████| 4/4 [00:00<00:00, 46.76it/s]


# Download label from labelbox
Note: you need to download ndjson file from LabelBox website first.

In [None]:
import os
import json
import requests
from labelbox import Client

# Your Labelbox API key
# LABELBOX_API_KEY = 'YOUR_LABELBOX_API_KEY'
LABELBOX_API_KEY = 'your_key_here'


# Initialize Labelbox client
client = Client(api_key=LABELBOX_API_KEY)

# Load the NDJSON file manually
file_path = r"E:\RGV_DATA\RGV_30_labels.ndjson"

# Read the NDJSON file line by line
with open(file_path) as f:
    lines = f.readlines()

# Parse the lines into a list of dictionaries
labels = [json.loads(line) for line in lines]

# Make and go to data directory
json_name = r"E:\RGV_DATA\RGV_30_labels"
os.makedirs(json_name, exist_ok=True)
os.chdir(json_name)

root_dir = os.getcwd()

headers = {
    'Authorization': f'Bearer {LABELBOX_API_KEY}'
}

for cur_label in labels:
    try:
        objects = cur_label['projects']['clwzukrot023a07xzgn8o195c']['labels'][0]['annotations']['objects']
    except KeyError:
        continue  # Skip if the required keys are missing

    dirname = cur_label["data_row"]["external_id"].rstrip(".jpg")
    print("Scraping data for", dirname)
    
    try:
        # Creating image directory
        os.makedirs(dirname, exist_ok=False)
    except FileExistsError:
        print("Using existing {} directory".format(dirname))
        
    # print("Current working directory is", root_dir)
    # Changing into directory associated with image
    os.chdir(os.path.join(root_dir, dirname))

# here it scrapes catogories specified in the 
    categories = {"Water", "No_water"}

    for category in categories:
        try:
            # Creating directory for each category
            os.makedirs(category, exist_ok=False)
        except FileExistsError:
            print("{} directory already exists".format(category))

    for obj in objects:
        mask_url = obj['mask']['url']
        outdir = os.path.join(obj["name"].lower().replace(" ", "_"))
        if outdir not in categories:
            print(outdir)

        # Ensure the directory exists
        # os.makedirs(outdir, exist_ok=True)

        # Download the label with authentication
        try:
            response = requests.get(mask_url, headers=headers)
            response.raise_for_status()  # Raise an error for bad status codes
            # out_path = os.path.join(outdir, os.path.basename(mask_url).split('?')[0])
            out_path = os.path.join(json_name,cur_label["data_row"]["external_id"])
            with open(out_path, 'wb') as out_file:
                out_file.write(response.content)
            print(f"Downloaded {mask_url} to {out_path}")
        except requests.exceptions.RequestException as e:
            print(f"Error downloading {mask_url}: {e}")
    
    # Going back to base directory
    os.chdir(root_dir)
