## Illogical transitions in pairs
- This script wraps up a batch of vector files located in a folder and creates a series of illogical transition files based on all the possible pairs of the input data.

- Inputs
    - Path of the folder with the vector files.
    - Path of the csv with all the illogical transitions (val1, val2) without headers.
    - Column reference name of the vector files for rasterization.
    - Rasterization properties: data type, rows, columns...
    - Path of the csv file without headers containing or:
        - 1 column table with all the accurate names.
        - 2 column table with the values and its corresponding names.
- Outputs
    - Path of the output folder with the final illogical transitions.
- Processing
    - Read all the vector input files.
    - Optional: Dissolve them to make the rest of the process easier.
    - Check the input column if it is based on values or text.
    - Create the corresponding dictionary for classification.
    - Homogenize the reference column with the dictionary.
    - Create intermediate vector files.
    - Rasterize the reference column.
    - Compare all the years in pairs creating the corresponding illogical files.
        - Raster files with ID values.
        - csv with ID values and pair accumulated values.
- Author
    - Rubén Crespo Ceballos

In [1]:
from osgeo import gdal
import rasterio
from rasterio import features
from rasterio.transform import from_origin
from osgeo import gdal, ogr, osr
import os
from difflib import SequenceMatcher
import numpy as np
import random
import itertools
import pandas as pd
import geopandas as gpd
import re
import csv

In [None]:
def create_folder_if_not_exists(folder_path):
    """
    Create a folder if it doesn't exist.

    Parameters:
    folder_path (str): The path of the folder to be created.
    """
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
        print(f"Folder created at: {folder_path}")
    else:
        print(f"Folder already exists at: {folder_path}")

def get_vector_file_list(path):
    """
    Get a list of the vector files inside the folder
    Parameters:
    - path (str): path of the folder with the resources.

    Returns:
    - File_list (list). list of the resources.
    """
    File_list = [] #f for f in os.listdir(path) if os.isfile(mypath,f)
    for file in os.listdir(path):
        # "anat" is just to get here necessary ones
        if file.endswith(".shp"):
            if file not in File_list:
                File_list.append(os.path.join(path,file))
        else:
            pass
    return File_list

def get_raster_file_list(path):
    """
    Get a list of the raster files inside the folder
    Parameters:
    - path (str): path of the folder with the resources.

    Returns:
    - File_list (list). list of the resources.
    """
    File_list = [] #f for f in os.listdir(path) if os.isfile(mypath,f)
    for file in os.listdir(path):
        # "32628" is just to get here necessary ones
        if file.endswith(".tif") or file.endswith(".tiff"):
            if file not in File_list:
                File_list.append(os.path.join(path,file))
        else:
            pass
    return File_list

def get_csv_file_list(path):
    """
    Get a list of the csv files inside the folder
    Parameters:
    - path (str): path of the folder with the resources.

    Returns:
    - File_list (list). list of the resources.
    """
    File_list = [] #f for f in os.listdir(path) if os.isfile(mypath,f)
    for file in os.listdir(path):
        if file.endswith(".csv"):
            if file not in File_list:
                File_list.append(os.path.join(path,file))
        else:
            pass
    return File_list

def update_names_based_on_similarity(unique_names, gdf, column_name, similarity_threshold=0):
    """
    Update names in gdf based on similarity to names in unique list.

    Parameters:
    - unique_names (list): list of the unique names.
    - gdf (GeoDataFrame): GeoDataFrame whose names need to be updated.
    - column_name (str): String of the column.
    - similarity_threshold (float): Threshold for similarity ratio.

    Returns:
    - gdf. Updates gdf in place.
    """
    # Add a new column 'valid_text' with None values
    gdf['valid_text'] = None

    total_elements = len(gdf)  # Get total number of elements

    # Iterate through rows of gdf2
    for index, row in gdf.iterrows():

        # Get the value of the column for the current row
        name_gdf = row[column_name]
        highest_similarity_ratio = 0
        best_matching_name = None
        # Iterate through unique names in gdf1
        for unique_name in unique_names:
            # Calculate similarity ratio between names in gdf2 and gdf1
            similarity_ratio = SequenceMatcher(None, unique_name, name_gdf).ratio()
            # Update best matching name if similarity ratio is higher
            if similarity_ratio > highest_similarity_ratio:
                highest_similarity_ratio = similarity_ratio
                best_matching_name = unique_name

        if highest_similarity_ratio >= similarity_threshold:
            # confirmation = input(f"Similarity found: '{name_gdf2}' -> '{name_gdf1}'Is this okay? (y/n): ").strip().lower()
            # if confirmation == "y":
            # print(f"{highest_similarity_ratio} for {name_gdf1} to {best_matching_name}")
            gdf.at[index, 'valid_text'] = best_matching_name

        print(f"Processing element {index + 1}/{total_elements}", end="\r") # This is to track the process

    return gdf

def create_identifier_dictionary(list):
    """
    Creates a dictionary out of a list appending a new id to each one of them.

    Parameters:
    - list (list): list of strings.

    Returns:
    - value_to_text_dict. dictionary o value: text.
    """
    value_to_text_dict = {value: index + 1 for index, value in enumerate(sorted(list))}
    return value_to_text_dict

def csv_to_dict(file_path):
    """
    Creates a dictionary out of a csv of two columns excluding the header.

    Parameters:
    - file_path (string): lpath of the file.

    Returns:
    - result_dict(dict). dictionary o value: text.
    """
    result_dict = {}
    
    # Open the CSV file
    with open(file_path, mode='r', newline='', encoding='ISO-8859-1') as file: # encoding='ISO-8859-1' encoding='utf-8'
        reader = csv.reader(file)
        
        # Skip the header
        # next(reader)
        
        # Iterate through each row and add to the dictionary
        for row in reader:
            key = row[1]  # First column as the key
            value = row[0]  # Second column as the value
            result_dict[float(key)] = value
            
    return result_dict

def csv_to_list(file_path):
    """
    Reads a CSV file with one column and transforms it into a list.

    Parameters:
    - file_path (string): Path to the CSV file.

    Returns:
    - return(list): List containing the values from the column
    """
    result = []
    with open(file_path, mode='r', newline='', encoding='ISO-8859-1') as file:
        reader = csv.reader(file)
        for row in reader:
            if row:  # Ensure the row is not empty
                result.append(row[0])
    return result

def rasterize_geodataframe_by_column(gdf, value_to_index, resolution, nodata_value, data_type, output_path):
    """
    Rasterizes a geodataframe based on the column field.

    Parameters:
    - gdf (GeoDataFrame): GeoDataFrame to be rasterized.
    - column_name (str): String of the column name.
    - resolution (int): resolution of the raster.
    - output_path (str): output of the raster file.

    Returns:
    - None. Rasterizes the geodataframe.
    """
       
    # Get the bounds of the GeoDataFrame
    xmin, ymin, xmax, ymax = gdf.total_bounds
    # Calculate the number of pixels in x and y directions
    cols = int((xmax - xmin) / resolution)
    rows = int((ymax - ymin) / resolution)
    # Create a transform for the raster
    transform = from_origin(xmin, ymax, resolution, resolution)

    # Create an empty array to hold the rasterized values
    # rasterized_array = np.zeros((rows, cols), dtype=data_type) # if bigger, change the dtype. This is crucial. # np.uint8
    rasterized_array = np.full((rows, cols), nodata_value, dtype=data_type)

    total_values = len(value_to_index)

    # Rasterize each unique value separately
    for idx, (text_value, value) in enumerate(value_to_index.items()):
        print(f"Processing {idx + 1} out of {total_values}")
        mask = gdf['raster_val'] == value
        shapes = gdf.loc[mask, 'geometry']
        if shapes.empty:
            # The value_to_index contains all the possible parameters, but there are some that don't exists in a certain file
            continue

        temp_raster = features.rasterize(
            shapes=shapes,
            out_shape=(rows, cols),
            transform=transform,
            all_touched=True, # Esto asegura que si toca la linea del poligono, se genera el pixel
            default_value=value,
            dtype=data_type, # must be equal to the zeros # np.uint8
        )
        rasterized_array = np.maximum(rasterized_array, temp_raster)

    crs = gdf.crs

    # Define the metadata for the raster
    profile = {
        'driver': 'GTiff',
        'height': rows,
        'width': cols,
        'count': 1,
        'dtype': data_type,
        'crs': crs, #CRS.from_epsg(32628),
        'transform': transform,
        'nodata': nodata_value,  # Set the nodata value in the profile metadata
        'compress': 'deflate',  # Compression method
        'tiled': True,  # Enable tiling
        'legend': {str(key): value for key, value in value_to_index.items()}
    }

    # Write the raster array to a GeoTIFF file
    with rasterio.open(output_path, 'w', **profile) as dst:
        dst.write(rasterized_array, 1)

        # Set nodata values in the raster
        # rasterized_array[rasterized_array == 0] = nodata_value
        # dst.write(rasterized_array, 1)
    

def gdal_rasterize_from_shapefile(shapefile_path, resolution, nodata_value, data_type, output_path, cols=None, rows=None):
    """
    Rasterizes a GeoDataFrame using GDAL directly.

    Parameters:
    - shapefile_path (string): path of the vector file.
    - resolution (int or float): Resolution of the raster (pixel size).
    - nodata_value: The value to use for no-data pixels.
    - data_type: Data type for the output raster (e.g., gdal.GDT_Float32).
    - output_path (str): Path to save the output raster.
    - cols (int, optional): Number of columns in the output raster.
    - rows (int, optional): Number of rows in the output raster.

    Returns:
    - None. The function writes the raster to the specified output path.
    """

    # Open the Shapefile using OGR
    shapefile = ogr.Open(shapefile_path)
    layer = shapefile.GetLayer()

    # Get the bounds of the Shapefile (same as GeoDataFrame's total_bounds)
    xmin, xmax, ymin, ymax = layer.GetExtent()

    # If cols and rows are not provided, calculate them based on resolution
    if cols is None or rows is None:
        cols = int((xmax - xmin) / resolution)
        rows = int((ymax - ymin) / resolution)

    # Create a new raster dataset
    raster_ds = gdal.GetDriverByName('GTiff').Create(
        output_path, cols, rows, 1, data_type,
        options=['COMPRESS=DEFLATE', 'TILED=YES']
    )

    # Set the geotransform (affine transform for the raster)
    geotransform = (xmin, resolution, 0, ymax, 0, -resolution)
    raster_ds.SetGeoTransform(geotransform)

    # Set the CRS (coordinate reference system) from the Shapefile
    srs = layer.GetSpatialRef()
    if srs:
        raster_ds.SetProjection(srs.ExportToWkt())

    # Create the raster band and set no-data value
    band = raster_ds.GetRasterBand(1)
    band.SetNoDataValue(nodata_value)

    # Rasterize the shapefile
    gdal.RasterizeLayer(
        raster_ds,  # Output raster dataset
        [1],        # Raster band to write to
        layer,      # Input OGR layer to rasterize
        options=['ATTRIBUTE=raster_val', 'ALL_TOUCHED=TRUE']
    )

    # Flush and close the raster dataset
    band.FlushCache()
    raster_ds = None  # Close the file and save

    # Close the shapefile
    shapefile = None

    print(f"Rasterization complete: {output_path}")

def check_same_dimensions(raster_files):
    """
    Check the if the dimensions of all the input rasters have the same dimensions.

    Parameters:
    - raster_files (list): List of raster files.

    Returns:
    - dimensions_list (list). The list with all the dimensions.
    """
    dimensions_list = []

    # Open the first raster file in the list
    for file_path in raster_files[:]:
        with rasterio.open(file_path) as src:
            shape_dimensions = src.shape
            dimensions_list.append(shape_dimensions)

    if len(set(dimensions_list)) == 1:
        print(f"All the elements have the same dimensions{dimensions_list[0]}")
        dimensions_list
    else:
        print("The dimensions are note the same")
        return dimensions_list


def read_csv_in_pairs(csv_file):
    """
    Reads a two column csv and transforms it into a pair value list.

    Parameters:
    - csv_file (str): path of the csv file.

    Returns:
    - rule_values_list: list of unique value pairs.
    """
    rule_values_list = []
    with open(csv_file, 'r') as file:
        # next(file)  # Skip the header row
        for line in file:
            # Split the line into two values based on spaces, and convert them to floats
            rule_value_1, rule_value_2 = map(float, line.split(","))
            rule_values_list.append((rule_value_1, rule_value_2))
    return rule_values_list

def generate_random_pairs(paths_list):
    """
    Generates a list of random unique pairs, from the input list.

    Parameters:
    - paths_list (list): list of paths.

    Returns:
    - all_pairs: list of unique value pairs.
    """
    # Generate all possible pairs from the raster paths
    all_pairs = list(itertools.combinations(paths_list, 2))
    # Randomly shuffle the list of pairs
    random.shuffle(all_pairs)
    return all_pairs


def compare_rasters(raster_pair, output, rule_table_path, nodata_value):
    """
    Generates a list of random unique pairs, from the input list.

    Parameters:
    - raster_pair (list): list of two elements.
    - output (path): path of the output file.
    - rule_table_path (list): is a list of pairs.

    Returns:
    - output_loc (str): path out the ourput file. 
    - comparison_df: dataframe related to the data.
    """
    # Open raster files
    ds1 = gdal.Open(raster_pair[0])
    ds2 = gdal.Open(raster_pair[1])

    if not ds1 or not ds2:
        print("Error: Unable to open raster files.")
        return
    
    # Check if both rasters have the same height and width
    if ds1.RasterXSize != ds2.RasterXSize or ds1.RasterYSize != ds2.RasterYSize:
        print("Error: Rasters have different dimensions.")
    
    # Get the first raster information
    width = ds1.RasterXSize
    height = ds1.RasterYSize
    geotransform = ds1.GetGeoTransform()
    projection = ds1.GetProjection()
    
    # Read rule table and create a list of pairs with the info
    rule_values_list = read_csv_in_pairs(rule_table_path)

    # Create output raster
    driver = gdal.GetDriverByName("GTiff")
    year1 = os.path.basename(raster_pair[0]).split('_')[-1].replace(".tif","") # Get always the last element
    year2 = os.path.basename(raster_pair[1]).split('_')[-1].replace(".tif","")
    output_filename = f"{year1}_{year2}_illogical_transitions.tif" #Customize
    output_loc = os.path.join(output, output_filename)

    output_ds = driver.Create(output_loc, width, height, 1, gdal.GDT_Int16, options= ['COMPRESS=DEFLATE', 'TILED=YES']) # GDT_Int32
    output_ds.GetRasterBand(1).SetNoDataValue(nodata_value)
    output_ds.SetGeoTransform(geotransform)
    output_ds.SetProjection(projection)

    output_array = np.zeros((height, width), dtype=np.int16) # int16

    unique_value_dict = {} # pairs : unique_value


    # Loop through each pixel and compare values
    block_size = 256  # Adjust the block size as needed
    for y in range(0, height, block_size):
        for x in range(0, width, block_size):
            print(f"Comparing pixels at rows/columns ({x},{y}) from ({width}, {height})", end='\r')
            block_width = min(block_size, width - x)
            block_height = min(block_size, height - y)

            block1 = ds1.GetRasterBand(1).ReadAsArray(x, y, block_width, block_height)
            block2 = ds2.GetRasterBand(1).ReadAsArray(x, y, block_width, block_height)
    
            # Check if the pair of values matches any rule pair
            correct_value = 0
            
            rule_values_list = set(rule_values_list) # To speed up the process, don't use list
            for i in range(block_height):
                for j in range(block_width):
                    value1 = block1[i, j]
                    value2 = block2[i, j]

                    if (value1, value2) in rule_values_list: # CHANGE THIS LATER
                        unique_value = unique_value_dict.setdefault((value1, value2), len(unique_value_dict) + 1)
                        output_array[y + i, x + j] = unique_value

                    else:
                        output_array[y + i, x + j] = correct_value

    # Write the output
    output_ds.GetRasterBand(1).WriteArray(output_array)

    # Close datasets
    ds1 = None
    ds2 = None
    output_ds = None

    # Create the dataframe
    unique_values = list(unique_value_dict.values())
    value_pairs = list(unique_value_dict.keys())

    comparison_df = pd.DataFrame({
        'univalue': unique_values,
        f'Val1_{year1}': [pair[0] for pair in value_pairs],
        f'Val2_{year2}': [pair[1] for pair in value_pairs]
                                })
    
    # Lets export the csv just in case
    comparison_df.to_csv(os.path.join(output, f"illogical_table_{year1}_{year2}.csv"), index=False)

    return output_loc, comparison_df


def vectorize_raster(raster_path, output_path, df):
    """
    Vectorizes an input raster.

    Parameters:
    - raster_path (str): path of the input raster.
    - output_path (path): path of the output file.
    - df (dataframe): dataframe related to the input raster.

    Returns:
    - None. It produces the vector file. 
    """
    # Open the raster file
    with rasterio.open(raster_path) as src:
        # Read raster data as numpy array
        data = src.read(1)
        # Get affine transform of the raster
        transform = src.transform

    # Vectorize the raster data
    raster_shapes = features.shapes(data, transform=transform)

    # Convert the vectorized shapes into a GeoDataFrame
    gdf = gpd.GeoDataFrame.from_features(
        [
            {"geometry": geo_shape, "properties": {"univalue": value}}
            for geo_shape, value in raster_shapes
        ],
        crs=src.crs
    )

    # Merge the dataframe with the GeoDataFrame
    merged_gdf = gdf.merge(df, on='univalue')
    # Dissolve based on a column value
    dissolved_gdf = merged_gdf.dissolve(by='univalue')

    # Save the merged GeoDataFrame as a new shapefile
    filename = os.path.join(output_path, os.path.basename(raster_path).split('.')[0])
    dissolved_gdf.to_file(f'{filename}.shp', driver='ESRI Shapefile')
    return

def add_name_columns_to_dataframe(df, column_name, names_dictionary):
    """
    Adds the input name list into the  an input raster.

    Parameters:
    - df (dataframe): dataframe related to the input raster.
    - column_name (str): name of the column that is appeended to the df.
    - names_dictionary (dict): dict of the names that we want to append.

    Returns:
    - df (dataframe). It produces the updated dataframe with the names. 
    """
    reversed_dict = {v: k for k, v in names_dictionary.items()} # Reverse it to match the values / value: text
    count = 1
    for column in df.columns:
        if column.startswith(f'Val{count}'):
            #Creates a new column witht the column_name matching the value of the value.
            df[column_name + str(count)] = df[column].map(reversed_dict)
            count += 1
    return df

def dissolve_geodataframe(gdf, column):
    """
    Dissolves a GeoDataFrame based on unique values of a specified column.

    Parameters:
    gdf (GeoDataFrame): Input GeoDataFrame to be dissolved.
    column (str): Column name based on which to dissolve the GeoDataFrame.

    Returns:
    GeoDataFrame: The dissolved GeoDataFrame.
    """

    print(f"Initial number of geometries: {len(gdf)}")
    
    # Perform the dissolve operation
    dissolved_gdf = gdf.dissolve(by=column)
    
    print(f"Final number of geometries after dissolve: {len(dissolved_gdf)}")
    
    return dissolved_gdf

def map_gdf_based_on_column_type(gdf, column_name, names_dictionary, names_list):
    """
    Maps values of a column in a GeoDataFrame (gdf) to unique identifiers based on the data type of the column.
    
    Parameters:
    gdf (GeoDataFrame): The input GeoDataFrame.
    column_name (str): The name of the column to be mapped.
    names_dictionary (dict): A dictionary where keys are valid text and values are unique raster values.

    Returns:
    GeoDataFrame: The modified GeoDataFrame with a new column 'raster_val' representing unique identifiers.
    """
    if gdf[column_name].dtype == object:
        print(f"The column '{column_name}' contains strings.")
        gdf = update_names_based_on_similarity(names_list, gdf, column_name, similarity_threshold=0.5)
        print("names updated")
        # Add a new column to the GeoDataFrame containing the unique identifiers
        gdf['raster_val'] = gdf["valid_text"].map(names_dictionary)
    else:
        print(f"The column '{column_name}' does not contain strings.")
        # Get unique values/strings from the specified column, they are always sorted.
        unique_values = sorted(gdf[column_name].unique())
        # Create a dictionary one to one
        value_to_index = {value: value for value in unique_values}
        gdf['raster_val'] = gdf[column_name].map(value_to_index)
    return gdf

In [None]:
"""Specify all the inputs"""
# Path for the vector inputs
input_path = r"Y:\z_resources\im-nca-senegal\v2_shp_occsol_anat\23-12-22\shp_occsol_anat\testing"

# Path for the output illogical files
output_path = input_path + r"\output_path"

# Path of the illogical rules csv
rule_table_path = r"Y:\z_resources\im-nca-senegal\v2_shp_occsol_anat\23-12-22\shp_occsol_anat\illogical_transitions.csv"

create_folder_if_not_exists(output_path)

# Get the original vector files
vector_file_list = get_vector_file_list(input_path)

"""We have here two situations we have to develop"""

# If we have only strings with no values so we append a created value.
names_list_path = r"Y:\z_resources\im-nca-senegal\v2_shp_occsol_anat\23-12-22\shp_occsol_anat\testing\names_list.csv"
names_list = csv_to_list(names_list_path)
names_dictionary = create_identifier_dictionary(names_list) # The output will always be number: text 

# If we have both strings and values
names_dictionary_path = r"Y:\z_resources\ruben\landcover_vector_files_copy\names_dictionary.csv" #It has headers
names_dictionary = csv_to_dict(names_dictionary_path) # {Code: label}
names_list = list(names_dictionary.values())

vector_file_list = get_vector_file_list(input_path)

"""For rasterization"""
# Specify the column of the vector file
column_name = 'leyenda'
# Define the resolution of your raster.
resolution = 30  # in meters
# Define the nodata value of your raster.
nodata_value = 0

# Define the data type of the raster.
data_type = gdal.GDT_UInt16
"""
gdal.GDT_Byte,
gdal.GDT_Int16,
gdal.GDT_UInt16,
gdal.GDT_Int32,
gdal.GDT_UInt32,
gdal.GDT_Float32,
gdal.GDT_Float64
"""

# Define the rows and columns for the rasterization (All the files must have the same dimensions). You can take as reference any on the inputs
rows = 9999
columns = 9999

Folder already exists at: Y:\z_resources\ruben\ladncover_test\output_files


In [None]:
"""OPTIONAL: For very dense vector files. We are going to dissolve it based on the designed column, and  Optimise vector files to the fullest"""
dissolved_files_path = input_path + r"\dissolved_files"
create_folder_if_not_exists(dissolved_files_path)

for file in vector_file_list[:]:
    gdf = gpd.read_file(file)
    print("gdf opened")
    gdf = dissolve_geodataframe(gdf, column_name)
    gdf.to_file(os.path.join(dissolved_files_path, os.path.basename(file).replace(".shp", "_optimized.shp")) , driver='ESRI Shapefile')

# We specify the next input
vector_file_list = get_vector_file_list(dissolved_files_path)

In [None]:
"""Create the "raster_val" for rasterization according to the content of the columnn"""
optimized_vector_files_path = input_path + r"\optimized_vector_files"
create_folder_if_not_exists(optimized_vector_files_path)

for file in vector_file_list[:]:
    gdf = gpd.read_file(file)
    print(f"gdf {os.path.basename(file)} opened")
    if gdf[column_name].dtype == object:
        print(f"The column '{column_name}' contains strings.")
        gdf = update_names_based_on_similarity(names_list, gdf, column_name, similarity_threshold=0.3)
        print("names updated")
        # Add a new column to the GeoDataFrame containing the unique identifiers
        gdf['raster_val'] = gdf["valid_text"].map(names_dictionary)
        
    else:
        print(f"The column '{column_name}' contains values.")
        # Get unique values/strings from the specified column, they are always sorted.
        unique_values = sorted(gdf[column_name].unique())
        # Create a dictionary one to one
        value_to_index = {value: value for value in unique_values}
        gdf['raster_val'] = gdf[column_name].map(value_to_index)
    
    # Opening and saving the file takes a lot of time
    gdf.to_file(os.path.join(optimized_vector_files_path, os.path.basename(file).replace(".shp", "_optimized.shp")) , driver='ESRI Shapefile')
    
# Read the new imputs
vector_file_list = get_vector_file_list(optimized_vector_files_path)


In [None]:
"""Transform the vector files and convert them into rasters"""
rasterized_files_path = input_path + r"\rasterized_files_path"
create_folder_if_not_exists(rasterized_files_path)

for file in vector_file_list[:]:
    output_path_file = os.path.join(rasterized_files_path, os.path.basename(file).replace(".shp", ".tif"))

    # gdf = gpd.read_file(file)
    print(f"{os.path.basename(file)} opened")
    # Prepare the dict to raster based on the column
    # gdf = map_gdf_based_on_column_type(gdf, column_name, names_dictionary, names_list)
    
    print("Rasterizing: " , output_path_file)
    
    # Slow method (I won't delete it jet)
    # rasterize_geodataframe_by_column(gdf, names_dictionary, resolution, nodata_value, data_type, output_path_file)
    
    # Fast method
    gdal_rasterize_from_shapefile(file, resolution, nodata_value, data_type, output_path_file, cols=None, rows=None)


In [None]:
"""Check the dimensions of the rasters"""
raster_list = get_raster_file_list(rasterized_files_path)
check_same_dimensions(raster_list)

All the elements have the same dimensions(129, 262)


In [None]:
"""Create the illogical transitions 1 to 1 and vectorize it"""
illocical_raster_nodata_value = 0

raster_list = get_raster_file_list(rasterized_files_path)
all_pairs_path_list = generate_random_pairs(raster_list)

for raster_pair in all_pairs_path_list[:]:
    illogical_raster_path, illogical_df = compare_rasters(raster_pair, output_path, rule_table_path, illocical_raster_nodata_value)
    illogical_df = add_name_columns_to_dataframe(illogical_df, column_name, names_dictionary)
    # Comment the next part if you don't want to vectorize it directly
    vectorize_raster(illogical_raster_path, output_path, illogical_df)


Folder already exists at: Y:\z_resources\ruben\ladncover_test\illogical_files
Comparing pixels at rows/columns (256,0) from (262, 129)

In [None]:
"""If the vectorization is independent"""
Illogical_raster_list = get_raster_file_list(output_path)
illogical_csvs = get_csv_file_list(output_path)

for illogical_raster_path, csv_file in zip(Illogical_raster_list, illogical_csvs):
    illogical_df = pd.read_csv(csv_file)
    illogical_df = add_name_columns_to_dataframe(illogical_df, column_name, names_dictionary)
    vectorize_raster(illogical_raster_path, output_path, illogical_df)