In [1]:
import rasterio
import shutil
import numpy as np

import os
import pandas as pd


In [2]:
def getPatchWithCsv(gps_csv, tif, patch_size, outfolder, pad=None, month="July"):
    train_folder = os.path.join(outfolder, "train")
    test_folder = os.path.join(outfolder, "test")
    if not os.path.exists(train_folder):
        os.makedirs(train_folder)
    if not os.path.exists(test_folder):
        os.makedirs(test_folder)

    if pad is not None:
        pad_test_folder = os.path.join(outfolder, f"pad_test")
        if not os.path.exists(pad_test_folder):
            os.makedirs(pad_test_folder)

    gps_csv = pd.read_csv(gps_csv)
    print(gps_csv.shape)

    gps_csv = gps_csv[gps_csv['Date'] == month].reset_index(drop=True)
    print(gps_csv.shape)
    lat_list = gps_csv['lat']
    lon_list = gps_csv['lon']
    folder_list = gps_csv['folder']
    class_list = gps_csv['class']
    pad_list = gps_csv['pad']
    print(lat_list)
    with rasterio.open(tif) as dataset:
        for i in range(len(lat_list)):
            p_lon = lon_list[i]
            p_lat = lat_list[i]
            fd = folder_list[i]
            cls = class_list[i]
            case_pad = pad_list[i]
            # clip
            py, px = dataset.index(p_lon, p_lat)
            window = rasterio.windows.Window(px - patch_size//2, py - patch_size//2, patch_size, patch_size)
            clip = dataset.read(window=window)
            # clip = clip/max_value
            clip = clip[0:6,:,:]
            # print(f"clip shape is {clip.shape}")
            has_nan = np.isnan(clip)
            
            if not np.any(has_nan):
                # meta data update
                transform = rasterio.windows.transform(window, dataset.transform)
                meta = dataset.meta.copy()
                meta.update({
                    'width': patch_size,
                    'height': patch_size,
                    'transform': transform,
                    'count': 6
                })

                # save clip
                out_tif = f"{month}_{cls}_{str(p_lon)}_{str(p_lat)}_{str(case_pad)}_{patch_size}.tif"
                # clip_max = np.nanmax(clip)
                # clip = clip/clip_max
                
                if fd == 'training':
                    outfolder = train_folder
                else:
                    outfolder = test_folder
                    
                out_dir = os.path.join(outfolder, out_tif)
                with rasterio.open(out_dir, 'w', **meta) as dst:
                    dst.write(clip)

                if case_pad == pad:
                    pad_out_dir = os.path.join(pad_test_folder, out_tif)
                    shutil.copy(out_dir, pad_out_dir)
            else:
                print(f"out tif name is {out_tif}")



In [3]:
gps_csv = "./data/split_grid_best.csv"
tifs = [
    "./All_Paddock_22_SEP_2024_ortho_bgrent.tiff",
    "./All_Paddock_26_JUL_2024_ortho_bgrent.tiff"
]
patch_sizes = [50, 64]
outfolder_base = "./data/patch_grid"

for tif in tifs:
    for patch_size in patch_sizes:
        month = "July" if "JUL" in tif else "Sep"
        outfolder = os.path.join(outfolder_base, f"patch_{patch_size}")

        if not os.path.exists(outfolder):
            os.makedirs(outfolder)
            
        getPatchWithCsv(
            gps_csv=gps_csv,
            tif=tif,
            patch_size=patch_size,
            outfolder=outfolder,
            month=month
        )
    

(554, 22)
(199, 22)
0      30.873353
1      30.873651
2      30.873453
3      30.873592
4      30.873491
         ...    
194    30.877140
195    30.877245
196    30.876973
197    30.876931
198    30.876887
Name: lat, Length: 199, dtype: float64
(554, 22)
(199, 22)
0      30.873353
1      30.873651
2      30.873453
3      30.873592
4      30.873491
         ...    
194    30.877140
195    30.877245
196    30.876973
197    30.876931
198    30.876887
Name: lat, Length: 199, dtype: float64
(554, 22)
(355, 22)
0      30.874439
1      30.875598
2      30.873419
3      30.873981
4      30.874123
         ...    
350    30.876726
351    30.876701
352    30.876698
353    30.876714
354    30.876715
Name: lat, Length: 355, dtype: float64
(554, 22)
(355, 22)
0      30.874439
1      30.875598
2      30.873419
3      30.873981
4      30.874123
         ...    
350    30.876726
351    30.876701
352    30.876698
353    30.876714
354    30.876715
Name: lat, Length: 355, dtype: float64


In [16]:
gps_csvs = [
    "./data/split_pad3.csv",
    "./data/split_pad5.csv",
    "./data/split_pad9.csv"
]
tifs = [
    "./All_Paddock_22_SEP_2024_ortho_bgrent.tiff",
    "./All_Paddock_26_JUL_2024_ortho_bgrent.tiff"
]
patch_sizes = [20, 50, 224]
outfolder_base = "./data/patch"

for gps_csv in gps_csvs:
    pad = os.path.splitext(os.path.basename(gps_csv))[0]
    pad_folder = os.path.join(outfolder_base, f"{pad}")
    if not os.path.exists(pad_folder):
        os.makedirs(pad_folder)
    for tif in tifs:
        for patch_size in patch_sizes:
            month = "July" if "JUL" in tif else "Sep"
            outfolder = os.path.join(pad_folder, f"patch_{patch_size}")

            if not os.path.exists(outfolder):
                os.makedirs(outfolder)
                
            getPatchWithCsv(
                gps_csv=gps_csv,
                tif=tif,
                patch_size=patch_size,
                outfolder=outfolder,
                pad=pad.split('_')[-1],
                month=month
            )

(554, 19)
(199, 19)
0      30.874053
1      30.875312
2      30.876165
3      30.876606
4      30.876083
         ...    
194    30.874548
195    30.874515
196    30.874319
197    30.874327
198    30.874573
Name: lat, Length: 199, dtype: float64
(554, 19)
(199, 19)
0      30.874053
1      30.875312
2      30.876165
3      30.876606
4      30.876083
         ...    
194    30.874548
195    30.874515
196    30.874319
197    30.874327
198    30.874573
Name: lat, Length: 199, dtype: float64
(554, 19)
(199, 19)
0      30.874053
1      30.875312
2      30.876165
3      30.876606
4      30.876083
         ...    
194    30.874548
195    30.874515
196    30.874319
197    30.874327
198    30.874573
Name: lat, Length: 199, dtype: float64
(554, 19)
(355, 19)
0      30.876442
1      30.875234
2      30.876479
3      30.876380
4      30.873735
         ...    
350    30.874340
351    30.874369
352    30.874282
353    30.874339
354    30.874365
Name: lat, Length: 355, dtype: float64
(554, 19)
(355, 