## Data Processing

This part is first step for our Model, It's really easy to implemented it but  you should be careful about your path!

import module!

In [None]:
import geopandas as gpd
import numpy as np
import os
import rasterio
import rasterio.mask

from osgeo import gdal
from PIL import Image
from rasterio.features import rasterize
from shapely.geometry import Polygon
from shapely.ops import cascaded_union

### Split Image by pixel size

This part split our original image into (256,256) size.  
As I mentioned above, you shold careful about your path!  

1. In_path, This path is a original image path which can download in GoogldDrive. We've organized our images, so just download the entirefolder.
2. Input_filename, This parma set your split image region and date. 
3. out_path, Save path for After you split original image
4. output_filename, This param have to match with 'Input_filename", Don't change the form please

In [None]:
# Put image directory and name for split
in_path = 'G:/0619/'
input_filename = '0619_3.tif'

# Save path, after you split the image
# Example for output_filename, As you see . It have to match the input_filename 
out_path = '.../data/0619/'
output_filename = '0619_3_'

After setting the path, just run the cell below.  
But, one .tif file split has done, you should reallocate the path.  
For example, you split the image named '0603_1.tif' , edit the above param and re-run  

In [None]:
# Size of image splited
tile_size_x = 256 
tile_size_y = 256
overlap = 128

ds = gdal.Open(os.path.join(in_path, input_filename))
band = ds.GetRasterBand(1)
xsize = band.XSize
ysize = band.YSize

for i in range(0, xsize, tile_size_x - overlap):
    for j in range(0, ysize, tile_size_y - overlap):
        width = min(tile_size_x, xsize - i)  # Adjust for remaining space in x direction
        height = min(tile_size_y, ysize - j)  # Adjust for remaining space in y direction
        com_string = (
            f"gdal_translate -of GTIFF -srcwin {i}, {j}, {width}, {height} "
            f"{os.path.join(in_path, input_filename)} "
            f"{os.path.join(out_path, output_filename)}{i}_{j}.tif"
        )
        os.system(com_string)

Next step is eliminating images which don't match with specified size(256,256)  
This problem occured when you cutting the edges.

### Only 256*256

After you finished the split, all your image saved in "/split/Date(etc. 0603)"  
so, param 'folder_path' is a path when you saved your split image.

In [None]:
def is_image_256x256(file_path):
    try:
        with Image.open(file_path) as img:
            return img.size == (256, 256)
    except:
        return False

def delete_non_256x256_tiff_files(folder_path):
    for filename in os.listdir(folder_path):
        if filename.lower().endswith(".tiff") or filename.lower().endswith(".tif"):
            file_path = os.path.join(folder_path, filename)
            if not is_image_256x256(file_path):
                os.remove(file_path)
                print(f"Deleted: {filename}")

#Example for folder_path
folder_path = ".../data/0619"
delete_non_256x256_tiff_files(folder_path)

### Make Mask(mask = 0)

This process is also same as abobe. Just careful about your path!

In [None]:
def poly_from_utm(polygon, transform):
    poly_pts = []
    
    poly = cascaded_union(polygon)
    for i in np.array(poly.exterior.coords):
        # Convert polygons to the image CRS
        poly_pts.append(~transform * tuple(i))
        
    # Generate a polygon object
    new_poly = Polygon(poly_pts)
    return new_poly

# raster_folder mean, split image folder path
# This is an example for the path, Please should maintatin that form!
raster_folder = ".../data/0619"

#output_folder is the folder that saved your mask image.
output_folder = ".../data/0619_mask"

for raster_filename in os.listdir(raster_folder):
    if raster_filename.endswith('.tif'):
        raster_path = os.path.join(raster_folder, raster_filename)
        
        with rasterio.open(raster_path, "r") as src:
            raster_img = src.read()
            raster_meta = src.meta

        #You can get this shapefile in our GoogleDrive
        #We organize the name like this
        shape_path = ".../0619_lake.shp"  # Mask shapefile
        train_df = gpd.read_file(shape_path)

        poly_shp = []
        im_size = (src.meta['height'], src.meta['width'])
        for num, row in train_df.iterrows():
            if row['geometry'].geom_type == 'Polygon':
                poly = poly_from_utm(row['geometry'], src.meta['transform'])
                poly_shp.append(poly)
            else:
                for p in row['geometry']:
                    poly = poly_from_utm(p, src.meta['transform'])
                    poly_shp.append(poly)

        mask = rasterize(shapes=poly_shp, out_shape=im_size)

        mask = mask.astype("uint16")
        
        inverted_mask = 1 - mask   ## make mask = 0
        
        save_filename = f"{os.path.splitext(raster_filename)[0]}.tif"
        save_path = os.path.join(output_folder, save_filename)
        
        bin_mask_meta = src.meta.copy()
        bin_mask_meta.update({'count': 1})
        with rasterio.open(save_path, 'w', **bin_mask_meta) as dst:
            dst.write(inverted_mask * 255, 1) 

print("Processing complete.")