In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Creating a functions to  split image, converting to tensors and then combining
 all the tensors.

In [None]:
import os
import torch
import tifffile as tiff
from torchvision import transforms
from PIL import Image

def split_image_to_grids(image_path, grid_size=10):
    """
    Splits an image into grid_size x grid_size patches and converts each patch to a PyTorch tensor.

    Args:
        image_path (str): Path to the image file.
        grid_size (int): Size of each grid patch. Default is 10.

    Returns:
        list: A list of PyTorch tensors representing the grid patches.
    """
    # Load the TIFF image using tifffile
    image = tiff.imread(image_path)

    # Ensure the image is 2D
    if image.ndim != 2:
        raise ValueError("The TIFF image must be 2D.")

    # Get the dimensions of the image
    height, width = image.shape


    # Transformation to convert numpy array to tensor
    transform = transforms.ToTensor()

    # List to store the tensors
    grid_tensors = []


    # Loop through the image and extract grid patches
    for y in range(0, height - grid_size + 1, grid_size):
        for x in range(0, width - grid_size + 1, grid_size):
            # Extract the grid patch
            grid = image[y:y+grid_size, x:x+grid_size]
            # Convert the grid patch to tensor
            grid_tensor = transform(grid)
            # Append the tensor to the list
            grid_tensors.append(grid_tensor)


    return grid_tensors

def process_tiff(tiff_path, grid_size=10, save_path="data.pth"):
    """
    Processes a TIFF file into grid patches and saves the patches as PyTorch tensors in a .pth file.

    Args:
        tiff_path (str): Path to the TIFF file.
        grid_size (int): Size of each grid patch.
        save_path (str): Path to save the output tensor file.
    """
    tensor_patches = split_image_to_grids(tiff_path, grid_size)
    tensor_patches = torch.stack(tensor_patches)
    torch.save(tensor_patches, save_path)

def combine_tensors(tensor_files, save_path="combined_data.pth"):
    """
    Combines multiple tensor files into one.

    Args:
        tensor_files (list): List of tensor files to combine.
        save_path (str): Path to save the combined tensor file.
    """
    tensors = []

    for tensor_file in tensor_files:
        tensor = torch.load(tensor_file)
        tensors.append(tensor)

    # Concatenate all tensors along the first dimension
    combined_tensor = torch.cat(tensors, dim=0)

    # Save the combined tensor to a .pth file
    torch.save(combined_tensor, save_path)

print("Functions defined.")


Functions defined.


Inputing two sample tif files


In [None]:
# Define the path to the basin TIFF file
tiff_path_lake = "/content/drive/MyDrive/EE ML PFAS/Raster_Data/Mapping/WaterWays/Raster file/Lakes_in_California.tif"  # Replace with your actual path to the basin TIFF file
grid_size = 10  # You can change the grid size if needed
save_path_lake = "/content/drive/MyDrive/EE ML PFAS/Raster_Data/Mapping/WaterWays/Raster file/processed_lake.pth"  # Define the save path for the basin data, ensure it ends with '.pth'

# Process the basin TIFF file
process_tiff(tiff_path_lake, grid_size, save_path_lake)
print(f"Processed basin TIFF file saved to {save_path_lake}.")

Processed basin TIFF file saved to /content/drive/MyDrive/EE ML PFAS/Raster_Data/Mapping/WaterWays/Raster file/processed_lake.pth.


In [None]:
# Define the path to the river TIFF file
tiff_path_river = "/content/drive/MyDrive/EE ML PFAS/Raster_Data/Mapping/WaterWays/Raster file/Rivers_in_California.tif"  # Replace with your actual path to the river TIFF file
grid_size = 10  # You can change the grid size if needed
save_path_river = "/content/drive/MyDrive/EE ML PFAS/Raster_Data/Mapping/WaterWays/Raster file/processed_rivers.pth"  # Define the save path for the river data

# Process the river TIFF file
process_tiff(tiff_path_river, grid_size, save_path_river)
print(f"Processed river TIFF file saved to {save_path_river}.")


Processed river TIFF file saved to /content/drive/MyDrive/EE ML PFAS/Raster_Data/Mapping/WaterWays/Raster file/processed_rivers.pth.


Running combine function on the inputed files

In [None]:
# List of tensor files to combine
tensor_files = [save_path_lake, save_path_river]  # List of the saved tensor file paths
save_path_combined = "combined_data.pth"  # Define the save path for the combined data

# Combine the basin and river tensor files
combine_tensors(tensor_files, save_path_combined)
print(f"Combined tensor file saved to {save_path_combined}.")


Combined tensor file saved to combined_data.pth.


Checking the output.

In [None]:
import torch

# Load the combined tensor file
combined_tensor_path = "combined_data.pth"  # Path to the combined tensor file
combined_tensors = torch.load(combined_tensor_path)

# Print the tensors to verify
for i, tensor in enumerate(combined_tensors):
    print(f"Tensor {i+1}:\n", tensor)
    print(f"Shape: {tensor.shape}\n")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0039, 0.0039, 0.0039,
          0.0039, 0.0039],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0039, 0.0039, 0.0039, 0.0039,
          0.0039, 0.0039],
         [0.0000, 0.0000, 0.0000, 0.0039, 0.0039, 0.0000, 0.0000, 0.0039,
          0.0039, 0.0039],
         [0.0039, 0.0039, 0.0039, 0.0039, 0.0000, 0.0000, 0.0000, 0.0039,
          0.0000, 0.0000]]])
Shape: torch.Size([1, 10, 10])

Tensor 19681:
 tensor([[[0.0039, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0039],
         [0.0039, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000],
         [0.0039, 0.0039, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000],
         [0.0039, 0.0039, 0.0039, 0.0039, 0.0039, 0.0039, 0.0039, 0.0039,
          0.0039, 0.0039],
         [0.0039, 0.0000, 0.0039, 0.0039, 0.0039, 0.0039, 0.0039, 0.0000,
    

This is the final code where I expect a folder with all tif files in it and it would yield the result combining all the above steps.

In [None]:
import os

def process_all_tiffs(folder_path, grid_size=10, save_path="combined_data.pth"):
    """
    Processes all TIFF files in a folder into grid patches and saves the patches as PyTorch tensors in a .pth file.

    Args:
        folder_path (str): Path to the folder containing the TIFF files.
        grid_size (int): Size of each grid patch.
        save_path (str): Path to save the output tensor file.
    """
    tensor_files = []

    # Get a list of all TIFF files in the folder
    for filename in os.listdir(folder_path):
        if filename.endswith(".tif"):
            tiff_path = os.path.join(folder_path, filename)

            # Process the TIFF file and save the tensor patches
            process_tiff(tiff_path, grid_size, save_path="temp.pth")
            tensor_files.append("temp.pth")

    # Combine all tensor files into one
    combine_tensors(tensor_files, save_path)

    # Remove temporary tensor files
    for tensor_file in tensor_files:
        os.remove(tensor_file)

    print(f"Processed all TIFF files in {folder_path} and saved the combined tensor to {save_path}.")

# Define the path to the folder containing the TIFF files
folder_path = "/content/drive/MyDrive/EE ML PFAS/Raster_Data/Mapping/WaterWays/Raster file"  # Replace with your actual folder path
grid_size = 10  # You can change the grid size if needed
save_path = "/content/drive/MyDrive/EE ML PFAS/Raster_Data/Mapping/WaterWays/Raster file/processed_all_waterways.pth"  # Define the save path for the combined data

# Process all TIFF files in the folder
process_all_tiffs(folder_path, grid_size, save_path)
