In [1]:
import os
import rasterio

In [5]:
def check_tif_values(directory):
    """
    Reads every .tif file in the given directory using rasterio,
    checks if the values of the tif are in 0, 1, 2, or 3,
    and returns a list of .tif files which contain values other than these.
    """
    invalid_tif_files = []

    for filename in os.listdir(directory):
        if filename.endswith('.tif'):
            filepath = os.path.join(directory, filename)
            with rasterio.open(filepath) as src:
                data = src.read(1)  # Reading the first band

                # Check if the values are outside the range 0, 1, 2, 3
                if not ((data == 0) | (data == 1) | (data == 2) | (data == 3)).all():
                    invalid_tif_files.append(filename)

    return invalid_tif_files

In [6]:
# Example usage
directory = '/home/data/kenya/labels/'
result = check_tif_values(directory)

In [7]:
print(result)

['kenol1_1252.tif', 'kenol1_920.tif', 'kenol1_2192.tif', 'kenol1_485.tif', 'kenol1_990.tif', 'kenol2_289.tif', 'kenol1_929.tif', 'kenol1_1316.tif', 'kenol2_749.tif', 'kenol2_216.tif', 'kenol1_1321.tif', 'kenol1_994.tif', 'kenol2_221.tif', 'kenol2_287.tif', 'kenol2_288.tif', 'kenol2_218.tif', 'kenol1_2170.tif', 'kenol2_512.tif', 'kenol1_1351.tif', 'kenol2_901.tif', 'kenol1_1233.tif', 'kenol1_903.tif', 'kenol2_219.tif', 'kenol1_497.tif', 'kenol1_2107.tif', 'kenol1_2201.tif', 'kenol2_810.tif', 'kenol1_486.tif', 'kenol2_1678.tif', 'kenol1_1930.tif', 'kenol1_760.tif', 'kenol1_1164.tif', 'kenol2_293.tif']


In [9]:
for file in result:
    os.remove(os.path.join(directory, file))
    os.remove(os.path.join('/home/data/kenya/images/', file))