## PROCESS SATELLITE DATA TO MADE READY FOR ML MODEL

**The purpose** 
> Preprocess saellite data to made ready as input for ML model

<hr/> 

## 1-Import necessary package

In [30]:
import numpy as np
import rasterio
from rasterio import plot
import warnings
import cv2
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import shutil
import os, glob, functools, fnmatch, requests,  io, shutil, tarfile, json
from tqdm import tqdm
from rasterio.merge import merge
from IPython.display import Image
warnings.filterwarnings('ignore')

<hr/> 

## 4-Slice RGB Image into small tiles

In [4]:
def get_tile_name_path(dst_dir:str, index:int):
    '''
    generating index specific tile name
    '''
    dst_tile_name = "{}_.tif".format(str(index).zfill(5))
    dst_tile_path = os.path.join(dst_dir, dst_tile_name)
    return dst_tile_name, dst_tile_path

def get_tile_transform(parent_transform, pixel_x:int,pixel_y:int):
    '''
    creating tile transform matrix from parent tif image
    '''
    crs_x = parent_transform.c + pixel_x * parent_transform.a
    crs_y = parent_transform.f + pixel_y * parent_transform.e
    tile_transform = rasterio.Affine(parent_transform.a, parent_transform.b, crs_x,
                                     parent_transform.d, parent_transform.e, crs_y)
    return tile_transform
    
def get_tile_profile(parent_tif:rasterio.io.DatasetReader, pixel_x:int, pixel_y:int):
    '''
    preparing tile profile
    '''
    tile_crs = parent_tif.crs
    tile_nodata = parent_tif.nodata if parent_tif.nodata is not None else 0
    tile_transform = get_tile_transform(parent_tif.transform, pixel_x, pixel_y)
    profile = dict(
                driver="GTiff",
                crs=tile_crs,
                nodata=tile_nodata,            
                transform=tile_transform
            )
    return profile

def generate_tiles(tif:rasterio.io.DatasetReader, size:int, dst_dir:str):
    i = 0
    for x in tqdm(range(0, tif.width, size)):
        for y in range(0, tif.height, size):
            # creating the tile specific profile
            profile = get_tile_profile(tif, x, y)
            # extracting the pixel data (couldnt understand as i dont think thats the correct way to pass the argument)
            tile_data = tif.read(window=((y, y + size), (x, x + size)),
                                 boundless=True, fill_value=profile['nodata'])[:3]
            i+=1
            dst_name, dst_tile_path = get_tile_name_path(dst_dir, i)
            c, h, w = tile_data.shape
            profile.update(
                height=h,
                width=w,
                count=c,
                dtype=tile_data.dtype,
            )
            with rasterio.open(dst_tile_path, "w", **profile) as dst:
                dst.write(tile_data)

In [5]:
#Set folder to save smail tile
RGB_path = "./RGB/RASTER_17.tif"
img = rasterio.open(RGB_path )
dst_image_dir = "./IMG_SLICE"

In [6]:
#We can change the size from 512 to 256, 224, ...
generate_tiles(img, 512, dst_image_dir)

100%|███████████████████████████████████████████| 12/12 [00:26<00:00,  2.18s/it]


<hr/> 

## 5- Filtering Unnecessary Data

In [22]:
tile_paths = os.listdir(dst_image_dir)

In [23]:
#tile_paths

In [24]:
search_criteria = "*.tif"
q = os.path.join(dst_image_dir , search_criteria)
print(q)

./IMG_SLICE/*.tif


In [25]:
# Get all of the quad tiles
quad_files = glob.glob(q)

In [26]:
quad_files

['./IMG_SLICE/00001_.tif',
 './IMG_SLICE/00002_.tif',
 './IMG_SLICE/00003_.tif',
 './IMG_SLICE/00004_.tif',
 './IMG_SLICE/00005_.tif',
 './IMG_SLICE/00006_.tif',
 './IMG_SLICE/00007_.tif',
 './IMG_SLICE/00008_.tif',
 './IMG_SLICE/00009_.tif',
 './IMG_SLICE/00010_.tif',
 './IMG_SLICE/00011_.tif',
 './IMG_SLICE/00012_.tif',
 './IMG_SLICE/00013_.tif',
 './IMG_SLICE/00014_.tif',
 './IMG_SLICE/00015_.tif',
 './IMG_SLICE/00016_.tif',
 './IMG_SLICE/00017_.tif',
 './IMG_SLICE/00018_.tif',
 './IMG_SLICE/00019_.tif',
 './IMG_SLICE/00021_.tif',
 './IMG_SLICE/00022_.tif',
 './IMG_SLICE/00023_.tif',
 './IMG_SLICE/00024_.tif',
 './IMG_SLICE/00025_.tif',
 './IMG_SLICE/00026_.tif',
 './IMG_SLICE/00027_.tif',
 './IMG_SLICE/00028_.tif',
 './IMG_SLICE/00029_.tif',
 './IMG_SLICE/00030_.tif',
 './IMG_SLICE/00031_.tif',
 './IMG_SLICE/00032_.tif',
 './IMG_SLICE/00033_.tif',
 './IMG_SLICE/00034_.tif',
 './IMG_SLICE/00035_.tif',
 './IMG_SLICE/00036_.tif',
 './IMG_SLICE/00037_.tif',
 './IMG_SLICE/00038_.tif',
 

**Conclusion** 
> The folder ```./rgb_sclice_valid``` contain valid tile that can be use as input for ML model.

In [27]:
src_files_to_mosaic = []

In [28]:
for f in quad_files:
  src = rasterio.open(f)
  src_files_to_mosaic.append(src)

In [31]:
# Create the mosaic
mosaic, out_trans = merge(src_files_to_mosaic)

In [32]:
out_meta = src.meta.copy()
out_meta.update({"driver": "GTiff",
                 "height": mosaic.shape[1],
                 "width": mosaic.shape[2],
                 "transform": out_trans
                  }
)

In [33]:
#The folder to save RGBNIR mosaic on a temporary folder 
temp_folder = r"./temp"
temp_file = "RASTER_17_merge.tif"
out_mosaic = os.path.join(temp_folder,temp_file )
# Write the mosaic to raster file
with rasterio.open(out_mosaic, "w", **out_meta) as dest:
    dest.write(mosaic)