## INSTALL PACKAGES

In [None]:
!pip install rasterio
!pip install tqdm
!pip install earthpy
!pip install bokeh
!pip install google-auth google-auth-oauthlib google-auth-httplib2 google-api-python-client
!pip install ipywidgets


## IMPORT LIBRARIES

In [32]:
import os
import rasterio
from tqdm import tqdm
import earthpy.plot as epp   
import matplotlib.pyplot as plt   
import numpy as np   
import glob
import googleapiclient.http
import ipywidgets as widgets
from IPython.display import display
from rasterio.merge import merge
from google.oauth2 import service_account
from googleapiclient.discovery import build
from google.colab import files
import ipywidgets as widgets
from IPython.display import display
import shutil

## Authenticate to Google Drive

In [4]:
# Set the path to your credentials file
creds_file_path = '/content/credentials.json'

# Authenticate to Google Drive using credentials.json
creds = service_account.Credentials.from_service_account_file(
    creds_file_path,
    scopes=['https://www.googleapis.com/auth/drive']
)

# Build the Drive API client
drive_service = build('drive', 'v3', credentials=creds)


## FUNCTIONS 

In [5]:

def get_tile_name_path(dst_dir:str, index:int):
    '''
    A function to generate index specific tile name and its path
    '''
    dst_tile_name = "{}_.tif".format(str(index).zfill(5))
    dst_tile_path = os.path.join(dst_dir, dst_tile_name)
    return dst_tile_name, dst_tile_path



def get_tile_transform(parent_transform, pixel_x:int,pixel_y:int):
    '''
    A function to create a tile transform matrix from parent tif image
    '''
    # Calculate the x and y coordinates of the pixel center in CRS coordinates
    crs_x = parent_transform.c + pixel_x * parent_transform.a
    crs_y = parent_transform.f + pixel_y * parent_transform.e
    # Create the tile transform matrix using these CRS coordinates
    tile_transform = rasterio.Affine(parent_transform.a, parent_transform.b, crs_x,
                                     parent_transform.d, parent_transform.e, crs_y)
    return tile_transform


    
def get_tile_profile(parent_tif:rasterio.io.DatasetReader, pixel_x:int, pixel_y:int):
    '''
    A function to prepare the tile profile
    '''
    # Extract CRS and nodata value from parent tif image
    tile_crs = parent_tif.crs
    tile_nodata = parent_tif.nodata if parent_tif.nodata is not None else 0
    # Generate the tile transform matrix using the parent tif image's transform matrix
    tile_transform = get_tile_transform(parent_tif.transform, pixel_x, pixel_y)
    # Generate a dictionary containing the tile's profile information
    profile = dict(
                driver="GTiff",
                crs=tile_crs,
                nodata=tile_nodata,            
                transform=tile_transform
            )
    return profile

    

def generate_tiles(tif:rasterio.io.DatasetReader, size:int, dst_dir:str):
    '''
    A function to generate tiles of given size from the input tif image
    and save them in a destination directory.
    '''
    i = 0
    for x in tqdm(range(0, tif.width, size)):
        for y in range(0, tif.height, size):
            # Generate the tile's profile information
            profile = get_tile_profile(tif, x, y)
            # Extract the pixel data for the tile using the tile's profile information
            tile_data = tif.read(window=((y, y + size), (x, x + size)),
                                 boundless=True, fill_value=profile['nodata'])[:3]
            i+=1
            # Generate the tile's name and path using the tile index
            dst_name, dst_tile_path = get_tile_name_path(dst_dir, i)
            # Get the tile dimensions
            c, h, w = tile_data.shape
            # Update the tile's profile information with the tile's dimensions and data type
            profile.update(
                height=h,
                width=w,
                count=c,
                dtype=tile_data.dtype,
            )
            # Write the tile to the destination directory using the tile's profile information
            with rasterio.open(dst_tile_path, "w", **profile) as dst:
                dst.write(tile_data)


## Define the Parent and Destination Directory Path

In [25]:

# Define the source file path
parent_tif = '/content/gdrive/MyDrive/RASTER_13.tif'

# Define the destination directory path
dst_dir = '/content/IMG_SLICE'

if not os.path.exists(dst_dir):
    os.makedirs(dst_dir)


# Define the tile size
size = 1024

# Open the source file for reading
with rasterio.open(parent_tif) as src:
    # Generate the tiles and save them in the destination directory
    generate_tiles(src, size, dst_dir)


100%|██████████| 6/6 [00:29<00:00,  4.86s/it]


## VISUALIZE THE DATA FOR CLOUD COVER

In [None]:
# Use the listdir() method to get a list of all the filenames in the dst_dir directory
patch_files = os.listdir(dst_dir)

# Sort the list of files in ascending order
patch_files.sort()
# Print the list of files in a table
print("List of files in directory: ")
print("{:<20} {:<20}".format("Filename", "Size (MB)"))
print("-" * 40)
for file in patch_files:
    file_path = os.path.join(dst_dir, file)
    file_size = round(os.path.getsize(file_path) / (1024 * 1024), 2)
    print("{:<20} {:<20}".format(file, file_size))

# Count the number of files in the directory
num_files = len([f for f in os.listdir(dst_dir) if os.path.isfile(os.path.join(dst_dir, f))])
# Print the number of tiles
print("Number of Patches:", num_files)

In [8]:
# Create an empty list to store the opened image patches
patches_list = []

# Iterate over the list of patch filenames
for file in patch_files:
    # Construct the path to the patch file
    path = os.path.join(dst_dir, file)
    # Open the patch file using rasterio and append the opened file object to the patches list
    patches_list.append(rasterio.open(path))


In [9]:
iter_patch = iter(enumerate(patches_list))

In [None]:
# Run cell as many times as required to step through and plot patches one at a time.
# Get the next image patch to plot from the iter_patch iterator
img_num, img_file = next(iter_patch, "Iteration complete")

# If img_file is not equal to "Iteration complete", plot the image
if img_file != "Iteration complete":

    # Get the metadata of the image file
    img_file.meta
    
    # Read the image data into a numpy array and stack the bands to create an RGB image
    rgb = np.array([img_file.read(1), img_file.read(2), img_file.read(3)])
    
    # Plot the RGB image using earthpy.plot.plot_rgb() function
    im = epp.plot_rgb(rgb, figsize=(20, 20), stretch=True, str_clip=0.2, 
                      title=f"{img_num:05d}.tif")
    
    # Display the plot using matplotlib.pyplot.show() function
    plt.show()

    # Print the shape of the RGB array
    rgb.shape
    
    # Close the image file to free up system resources
    img_file.close()
    
# If img_file is equal to "Iteration complete", print a message indicating that iteration is complete
else:
    print(img_file)


## FILTERING UNNECESSARY DATA & MERGING

In [11]:
# Obtain a list of file names in the destination directory using the listdir() method from the os module
tile_paths = os.listdir(dst_dir)

In [12]:
# Define a search criteria for files with extension .tif
search_criteria = "*.tif"

# Create a complete file path by joining the destination directory and the search criteria
# using the os.path.join() method
q = os.path.join(dst_dir, search_criteria)

# Print the resulting file path to the console
print(q)


/content/IMG_SLICE/*.tif


In [13]:
# Get a list of file paths that match the search criteria defined in 'q'
quad_files = glob.glob(q)

In [14]:
# Create an empty list to store opened raster files
src_files_to_mosaic = []

In [15]:
# Iterate through each file path in 'quad_files', open the file using 'rasterio', and append it to 'src_files_to_mosaic'
for f in quad_files:
  src = rasterio.open(f)
  src_files_to_mosaic.append(src)
  

In [16]:
# Use the 'merge()' function from 'rasterio' to create a mosaic from the list of opened raster files
mosaic, out_trans = merge(src_files_to_mosaic)


In [17]:

# Create a new metadata object for the output mosaic using the metadata from one of the input raster files
# update it with the appropriate parameters for the mosaic
out_meta = src.meta.copy()
out_meta.update({"driver": "GTiff",
                 "height": mosaic.shape[1],
                 "width": mosaic.shape[2],
                 "transform": out_trans
                  }
)


In [34]:
# The folder to save RGB mosaic on a temporary folder 
temp_folder = "/content/IMG_SLICE_MERGED"

# Check if the folder already exists
if not os.path.exists(temp_folder):
    # Create the folder if it doesn't exist
    os.makedirs(temp_folder)
  
# Extract only the filename from the path string
mosaic_file = os.path.basename(parent_tif)
# Set the name for the mosaic file
temp_file = mosaic_file

# Create a full path for the mosaic file using the `temp_folder` and `temp_file` variables
out_mosaic = os.path.join(temp_folder,temp_file )

# Open the mosaic file in write mode using `rasterio.open()`, and assign it to the variable `dest`
# The `**out_meta` argument unpacks the metadata from the `out_meta` dictionary and passes it as keyword arguments to the `rasterio.open()` function
# This ensures that the mosaic is saved with the correct metadata
with rasterio.open(out_mosaic, "w", **out_meta) as dest:

    # Write the mosaic array to the output file
    dest.write(mosaic)


In [None]:

# Load the merged mosaic file into a rasterio dataset
with rasterio.open(out_mosaic, "r") as mosaic_ds:
    mosaic = mosaic_ds.read()

# Get the size of the file in megabytes
size = os.path.getsize(out_mosaic) / (1024 * 1024)

# Plot the mosaic array
plt.imshow(mosaic.transpose((1,2,0)))
plt.show()

# Print the size of the image
print("Size (MB):", size)


## EXPORT FILE TO G-DRIVE

In [48]:
# Set the file paths
gdrive_folder = "IMG_SLICE_MERGED"

# Check if the Google Drive folder exists and create it if it doesn't
gdrive_folder_path = '/content/gdrive/MyDrive/' + gdrive_folder
if not os.path.exists(gdrive_folder_path):
  os.mkdir(gdrive_folder_path)

# Copy the file to the Google Drive folder
shutil.copy(out_mosaic, gdrive_folder_path)

# Check if the file exists
if os.path.exists(gdrive_folder_path):
    print("File exported to Google Drive successfully!")
else:
    print("File export to Google Drive failed.")

File exported to Google Drive successfully!


## DELETE THE TEMPORARY FOLDERS

In [29]:
# Define the confirmation dialog box
confirmation_box = widgets.Box([
    widgets.Label(value='Are you sure you want to delete the folders?'),
    widgets.Button(description='Yes'),
    widgets.Button(description='No')
])

# Define the callback function for the Yes button
def on_yes_button_clicked(button):
    if os.path.exists(temp_folder):
        !rm -r $temp_folder
        print(f"{temp_folder} folder deleted successfully!")
    if os.path.exists(dst_dir):
        !rm -r $dst_dir
        print(f"{dst_dir} folder deleted successfully!")

# Define the callback function for the No button
def on_no_button_clicked(button):
    print('Deletion cancelled.')

# Attach the callbacks to the buttons
confirmation_box.children[1].on_click(on_yes_button_clicked)
confirmation_box.children[2].on_click(on_no_button_clicked)

# Display the confirmation box
display(confirmation_box)


Box(children=(Label(value='Are you sure you want to delete the folders?'), Button(description='Yes', style=But…

Deletion cancelled.
