In [2]:
import os
import shutil
import geopandas as gpd
import rasterio
from rasterio.features import geometry_mask
from shapely.geometry import box
from PIL import Image

In [3]:
for part in os.listdir("./datasets"):
    part_path = os.path.join("./datasets", part)
    for date in os.listdir(part_path):
        date_path = os.path.join(part_path, date)
        for file in os.listdir(date_path):
            if file.endswith(".tif"):
                file_path = os.path.join(date_path, file)
                tiff_image = Image.open(file_path)
                tiff_image.save(f"./for_label/{os.path.splitext(file)[0]}.png", 'PNG')

KeyboardInterrupt: 

In [2]:
source_paths = ["./perfect_data", "./normal_data"]
target_path = "./dataset"

In [3]:
for source_path in source_paths:
    for road_folder in os.listdir(source_path):
        road_folder_path = os.path.join(source_path, road_folder)
        if os.path.isdir(road_folder_path):
            for date in os.listdir(road_folder_path):
                target_date_path = os.path.join(target_path, date)
                if os.path.exists(target_date_path) == False:
                    os.makedirs(target_date_path)
                date_path = os.path.join(road_folder_path, date)
                if os.path.isdir(date_path):
                    for file in os.listdir(date_path):
                        file_path = os.path.join(date_path, file)
                        if file.endswith(".tif"):
                            new_name = os.path.join(target_date_path, road_folder + "_" + date + ".tif")
                            shutil.copy(file_path, new_name)

In [3]:
shapefile_path = "./train/pac_2024_training.shp"
shapes = gpd.read_file(shapefile_path)

In [7]:
target_path = "./datasets/dataset_part_0"

In [10]:
# for date in os.listdir(target_path):
date = "2016-07-16_2016-08-24"
date_path = os.path.join(target_path, date)
raster_files = [os.path.join(date_path, f) for f in os.listdir(date_path) if f.endswith('.tif')]

raster_bounds = []

for raster_file in raster_files:
    with rasterio.open(raster_file) as src:
        bounds = src.bounds 
        raster_bounds.append(box(bounds.left, bounds.bottom, bounds.right, bounds.top))

raster_geometries = gpd.GeoSeries(raster_bounds)

filtered_shapes = shapes[shapes.geometry.apply(lambda x: raster_geometries.intersects(x).any())]
filtered_shapefile_path = os.path.join(date_path, date + ".shp")
filtered_shapes.to_file(filtered_shapefile_path)

In [5]:
for file in os.listdir(target_path):
    file_path = os.path.join(target_path, file)
    if file.endswith(".tif.aux.xml"):
        if os.path.exists(file_path):
            os.remove(file_path)

In [9]:
smallest_width = []
smallest_height = []
for road_folder in os.listdir(source_path):
    road_folder_path = os.path.join(source_path, road_folder)
    if os.path.isdir(road_folder_path):
        for date in os.listdir(road_folder_path):
            date_path = os.path.join(road_folder_path, date)
            if os.path.isdir(date_path):
                for file in os.listdir(date_path):
                    file_path = os.path.join(date_path, file)
                    if file.endswith(".tif"):
                        with rasterio.open(file_path) as dataset:
                            pixel_width = dataset.width
                            pixel_height = dataset.height
                            smallest_width.append(pixel_width)
                            smallest_height.append(pixel_height)

sorted_width = sorted(smallest_width)
sorted_height = sorted(smallest_height)

# print top 20 smallest
print(sorted_width[20:])
print(sorted_height[20:])


[320, 329, 329, 329, 329, 329, 332, 332, 335, 335, 335, 335, 336, 336, 336, 336, 336, 336, 336, 340, 346, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 357, 357, 366, 366, 366, 366, 376, 376, 376, 376, 376, 381, 381, 382, 382, 382, 382, 382, 384, 384, 388, 388, 388, 388, 389, 389, 389, 389, 389, 390, 394, 394, 394, 401, 401, 401, 401, 403, 407, 407, 407, 407, 407, 410, 410, 410, 416, 417, 417, 419, 419, 420, 420, 420, 422, 422, 424, 433, 433, 433, 433, 433, 433, 436, 436, 437, 437, 437, 437, 439, 439, 448, 448, 448, 448, 448, 452, 452, 459, 459, 459, 459, 459, 459, 459, 463, 463, 463, 463, 463, 463, 463, 469, 469, 469, 469, 469, 474, 474, 474, 474, 474, 480, 480, 480, 480, 480, 480, 480, 481, 481, 481, 481, 484, 486, 486, 486, 486, 486, 486, 486, 493, 493, 493, 493, 504, 504, 504, 504, 504, 510, 510, 510, 510, 510, 510, 510, 516, 516, 516, 516, 533, 533, 533, 534, 534, 535, 540, 540, 540, 540, 544, 544, 552, 552, 552, 553, 553, 553, 569, 569, 569, 569, 569, 583, 583,

In [6]:
tif_count = 0
for date in os.listdir(target_path):
    date_path = os.path.join(target_path, date)
    for root, dirs, files in os.walk(date_path):
        for file in files:
            if file.endswith(".tif"):
                tif_count += 1

print(tif_count)

625


In [7]:
parts = 15

for i in range(parts):
    part_path = "./datasets/dataset_part_" + str(i)
    if os.path.exists(part_path) == False:
        os.makedirs(part_path)

to_moves = []
current_count = 0
current_part = 0
for date in os.listdir(target_path):
    date_path = os.path.join(target_path, date)
    if os.path.exists(date_path) == False:
        continue
    to_moves.append(date_path)
    for root, dirs, files in os.walk(date_path):
        for file in files:
            if file.endswith(".tif"):
                current_count += 1
    if (current_count >= tif_count/parts):
        print(current_count)

        for to_move in to_moves:
            new_path = os.path.join("./datasets", "dataset_part_" + str(current_part), os.path.basename(to_move))
            shutil.move(to_move, new_path)

        to_moves = []
        current_count = 0
        current_part += 1

42
42
49
45
49
49
43
45
52
46
56
45
43


In [14]:
tif_count = 0
for i in range(8):
    part_path = os.path.join("./datasets", "dataset_part_" + str(i))
    for date in os.listdir(part_path):
        date_path = os.path.join(part_path, date)
        for root, dirs, files in os.walk(date_path):
            for file in files:
                if file.endswith(".tif"):
                    tif_count += 1

364
