In [16]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [18]:
import numpy as np
import pandas as pd
import ast 
import geopandas as gpd
from shapely.geometry import Point
import shutil
from tqdm import tqdm
from pathlib import Path
from shapely import wkt
import s3fs
from src.gelos_config import GELOSConfig
from src.data_cleaner import _construct_file_paths, _construct_dem_path, drop_rows, filter_by_n_dates, gen_thumbnail_urls

s3 = s3fs.S3FileSystem(anon=True)

# TODO: Make this part of the data generation logic - construct filepaths

color_dict = {
    '1': '#419bdf',   # Water
    '2': '#397d49',   # Trees
    '5': '#e49635',   # Crops
    '7': '#c4281b',   # Built area
    '8': '#a59b8f',   # Bare ground
    '11': '#e3e2c3',  # Rangeland
}
lulc = {
    '1': 'Water',
    '2': 'Trees',
    '5': 'Crops',
    '7': 'Built area',
    '8': 'Bare ground',
    '11': 'Rangeland'
}
class DataCleaner:
    def __init__(self, config: GELOSConfig):
        self.config = config
        self.version = self.config.dataset.version
        self.working_dir = Path(self.config.directory.working)
        self.output_dir = Path(self.config.directory.output)
        
    def clean(self):
        metadata_df = pd.read_csv(self.working_dir / self.version / "chip_metadata.csv")
        metadata_df['chip_footprint'] = gpd.GeoSeries(metadata_df['chip_footprint'].dropna().map(wkt.loads), crs=4326)
        metadata_gdf = gpd.GeoDataFrame(metadata_df, geometry = 'chip_footprint', crs=4326)
        metadata_gdf = metadata_gdf[metadata_gdf['status'] == 'success']

        # ensure only desired lulc classes are present
        metadata_gdf = metadata_gdf[metadata_gdf['lulc'].isin([1, 2, 5, 7, 8, 11])]
        
        # filter rows where there are insufficient samples
        for modality in ['s1rtc', 's2l2a', 'lc2l2']:
            metadata_gdf = metadata_gdf[
                metadata_gdf.apply(lambda row: filter_by_n_dates(row, modality, required_dates=4), axis=1)
            ]
        
        # get sampling factor, max count, and min count
        sampling_factor = self.config.lulc.sampling_factor
        if sampling_factor:
            max_count = metadata_gdf.groupby("lulc").count().max().iloc[0]
            min_count = metadata_gdf.groupby("lulc").count().min().iloc[0]
            
            # use sampling factor to calculate correction factor, for proportional class drop quantities
            max_distance = max_count - min_count
            max_end_value = min_count * sampling_factor
            max_distance_to_max_end_value = max_count - max_end_value
            correction_factor = max_distance_to_max_end_value / max_distance
            
            # use correction factor to determine proportion of samples above min to drop for each class
            # the number of samples dropped will be proportional to the number of samples above minimum
            # this scales the number of samples between min and min * sampling factor
            if max_distance_to_max_end_value > 0:
                    
                for index, row in metadata_gdf.groupby("lulc").count().iterrows():
                    lulc_class = index
                    class_count = row['chip_index']
                    class_distance = class_count - min_count
                    drop_quantity = int(correction_factor * class_distance)
                    metadata_gdf = drop_rows(metadata_gdf, lulc_class, drop_quantity)
            
        # create metadata columns
        metadata_gdf['id'] = np.arange(0, len(metadata_gdf))
        metadata_gdf['lat'] = metadata_gdf.geometry.centroid.y
        metadata_gdf['lon'] = metadata_gdf.geometry.centroid.x
        metadata_gdf = metadata_gdf.rename(columns={"chip_index": "original_id"})
        metadata_gdf.index = metadata_gdf['id']
        metadata_gdf['lulc'] = metadata_gdf['lulc'].astype(int).astype(str)
        metadata_gdf['category'] = metadata_gdf['lulc'].map(lulc)
        metadata_gdf['color'] = metadata_gdf['lulc'].map(color_dict)

        for image in ["lc2l2", "s1rtc", "s2l2a"]:
            metadata_gdf[f"{image}_thumbs"] = metadata_gdf.apply(
                gen_thumbnail_urls, axis=1, image=image
            )
            
        for modality in ["lc2l2", "s1rtc", "s2l2a", "dem"]:

            if modality == "dem":
                metadata_gdf["dem_paths"] = metadata_gdf.apply(
                    _construct_dem_path, axis=1
                )
                continue

            metadata_gdf[f"{modality}_paths"] = metadata_gdf.apply(
                _construct_file_paths, modality=modality, axis=1
            )

        (self.output_dir / self.version).mkdir(exist_ok=True)
        
        # save to geojson
        metadata_gdf.to_file(self.output_dir / f'{self.version}/gelos_chip_tracker.geojson', driver='GeoJSON', index=False)

        # move files to destination folder
        for index, row in tqdm(metadata_gdf.iterrows(), total=len(metadata_gdf), desc="copying files to output dir..."):
            for col in ["s2l2a_dates", "s1rtc_dates", "lc2l2_dates"]:
                for i, date in enumerate(row[col].split(',')):
                    platform = col[:-6]
                    src_file = self.working_dir / self.version / f"{platform}_{row["original_id"]:06}_{i}_{date}.tif"
                    dst_file = self.output_dir / self.version / f"{platform}_{row["id"]:06}_{date}.tif"
                    shutil.copy2(src_file, dst_file)
                    src_file = self.working_dir / self.version / f"{platform}_{row["original_id"]:06}_{i}_{date}.png"
                    dst_file = self.output_dir / self.version / f"{platform}_{row["id"]:06}_{date}.png"
                    shutil.copy2(src_file, dst_file)
            src_file = self.working_dir / self.version / f"dem_{row["original_id"]:06}.tif"
            dst_file = self.output_dir / self.version / f"dem_{row["id"]:06}.tif"
            shutil.copy2(src_file, dst_file)
        
        # zip folder
        folder_to_zip = self.working_dir / self.version
        output_zip_file = self.output_dir / self.version / self.version
        shutil.make_archive(output_zip_file, 'zip', folder_to_zip)


In [3]:
config = GELOSConfig.from_yaml('/app//config.yml')
self = DataCleaner(config)


In [5]:
metadata_df = pd.read_csv(self.working_dir / self.version / "chip_metadata.csv")
metadata_df['chip_footprint'] = gpd.GeoSeries(metadata_df['chip_footprint'].dropna().map(wkt.loads), crs=4326)
metadata_gdf = gpd.GeoDataFrame(metadata_df, geometry = 'chip_footprint', crs=4326)
metadata_gdf = metadata_gdf[metadata_gdf['status'] == 'success']

# ensure only desired lulc classes are present
metadata_gdf = metadata_gdf[metadata_gdf['lulc'].isin([1, 2, 5, 7, 8, 11])]
metadata_gdf.head()

Unnamed: 0,chip_index,aoi_index,s2l2a_dates,s1rtc_dates,lc2l2_dates,lulc,chip_footprint,epsg,status,s2l2a_scene_ids,s1rtc_scene_ids,lc2l2_scene_ids,lulc_scene_ids,dem_scene_ids
318,318,0,20230218202304192023071320231230,20230218202304192023071220231227,20230217202305242023092120231218,2.0,"POLYGON ((21.82991 4.28125, 21.82992 4.28993, ...",32634,success,S2A_MSIL2A_20230218T085021_R107_T34NEK_2023022...,S1A_IW_GRDH_1SDV_20230218T041643_20230218T0417...,"LC09_L2SP_179057_20230217_02_T1,LC09_L2SP_1790...","60N-2023,34N-2023,01N-2023","Copernicus_DSM_COG_10_N04_00_E021_00_DEM,Coper..."
475,475,0,20230218202304192023071320231230,20230218202304192023071220231227,20230217202305242023092120231218,2.0,"POLYGON ((21.57035 4.22938, 21.57036 4.23806, ...",32634,success,S2A_MSIL2A_20230218T085021_R107_T34NEK_2023022...,S1A_IW_GRDH_1SDV_20230218T041643_20230218T0417...,"LC09_L2SP_179057_20230217_02_T1,LC09_L2SP_1790...","60N-2023,34N-2023,01N-2023","Copernicus_DSM_COG_10_N04_00_E021_00_DEM,Coper..."
1461,1461,0,20230218202304192023071320231230,20230218202304192023071220231227,20230217202305242023092120231218,2.0,"POLYGON ((21.11185 4.09062, 21.11185 4.09931, ...",32634,success,S2A_MSIL2A_20230218T085021_R107_T34NEK_2023022...,S1A_IW_GRDH_1SDV_20230218T041643_20230218T0417...,"LC09_L2SP_179057_20230217_02_T1,LC09_L2SP_1790...","60N-2023,34N-2023,01N-2023","Copernicus_DSM_COG_10_N04_00_E021_00_DEM,Coper..."
1550,1550,0,20230218202304192023071320231230,20230218202304192023071220231227,20230217202305242023092120231218,2.0,"POLYGON ((21.09455 4.08194, 21.09456 4.09062, ...",32634,success,S2A_MSIL2A_20230218T085021_R107_T34NEK_2023022...,S1A_IW_GRDH_1SDV_20230218T041643_20230218T0417...,"LC09_L2SP_179057_20230217_02_T1,LC09_L2SP_1790...","60N-2023,34N-2023,01N-2023","Copernicus_DSM_COG_10_N04_00_E021_00_DEM,Coper..."
1551,1551,0,20230218202304192023071320231230,20230218202304192023071220231227,20230217202305242023092120231218,2.0,"POLYGON ((21.1032 4.08194, 21.1032 4.09062, 21...",32634,success,S2A_MSIL2A_20230218T085021_R107_T34NEK_2023022...,S1A_IW_GRDH_1SDV_20230218T041643_20230218T0417...,"LC09_L2SP_179057_20230217_02_T1,LC09_L2SP_1790...","60N-2023,34N-2023,01N-2023","Copernicus_DSM_COG_10_N04_00_E021_00_DEM,Coper..."


In [7]:

# filter rows where there are insufficient samples
for modality in ['s1rtc', 's2l2a', 'lc2l2']:
    metadata_gdf = metadata_gdf[
        metadata_gdf.apply(lambda row: filter_by_n_dates(row, modality, required_dates=4), axis=1)
    ]


In [8]:


# get sampling factor, max count, and min count
sampling_factor = self.config.lulc.sampling_factor
if sampling_factor:
    max_count = metadata_gdf.groupby("lulc").count().max().iloc[0]
    min_count = metadata_gdf.groupby("lulc").count().min().iloc[0]
    
    # use sampling factor to calculate correction factor, for proportional class drop quantities
    max_distance = max_count - min_count
    max_end_value = min_count * sampling_factor
    max_distance_to_max_end_value = max_count - max_end_value
    correction_factor = max_distance_to_max_end_value / max_distance
    
    # use correction factor to determine proportion of samples above min to drop for each class
    # the number of samples dropped will be proportional to the number of samples above minimum
    # this scales the number of samples between min and min * sampling factor
    if max_distance_to_max_end_value > 0:
            
        for index, row in metadata_gdf.groupby("lulc").count().iterrows():
            lulc_class = index
            class_count = row['chip_index']
            class_distance = class_count - min_count
            drop_quantity = int(correction_factor * class_distance)
            metadata_gdf = drop_rows(metadata_gdf, lulc_class, drop_quantity)
    

In [9]:

# create metadata columns
metadata_gdf['id'] = np.arange(0, len(metadata_gdf))
metadata_gdf['lat'] = metadata_gdf.geometry.centroid.y
metadata_gdf['lon'] = metadata_gdf.geometry.centroid.x
metadata_gdf = metadata_gdf.rename(columns={"chip_index": "original_id"})
metadata_gdf.index = metadata_gdf['id']
metadata_gdf['lulc'] = metadata_gdf['lulc'].astype(int).astype(str)
metadata_gdf['category'] = metadata_gdf['lulc'].map(lulc)
metadata_gdf['color'] = metadata_gdf['lulc'].map(color_dict)



  metadata_gdf['lat'] = metadata_gdf.geometry.centroid.y

  metadata_gdf['lon'] = metadata_gdf.geometry.centroid.x


In [10]:
metadata_gdf.head()

Unnamed: 0_level_0,original_id,aoi_index,s2l2a_dates,s1rtc_dates,lc2l2_dates,lulc,chip_footprint,epsg,status,s2l2a_scene_ids,s1rtc_scene_ids,lc2l2_scene_ids,lulc_scene_ids,dem_scene_ids,id,lat,lon,category,color
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
0,318,0,20230218202304192023071320231230,20230218202304192023071220231227,20230217202305242023092120231218,2,"POLYGON ((21.82991 4.28125, 21.82992 4.28993, ...",32634,success,S2A_MSIL2A_20230218T085021_R107_T34NEK_2023022...,S1A_IW_GRDH_1SDV_20230218T041643_20230218T0417...,"LC09_L2SP_179057_20230217_02_T1,LC09_L2SP_1790...","60N-2023,34N-2023,01N-2023","Copernicus_DSM_COG_10_N04_00_E021_00_DEM,Coper...",0,4.285593,21.825585,Trees,#397d49
1,475,0,20230218202304192023071320231230,20230218202304192023071220231227,20230217202305242023092120231218,2,"POLYGON ((21.57035 4.22938, 21.57036 4.23806, ...",32634,success,S2A_MSIL2A_20230218T085021_R107_T34NEK_2023022...,S1A_IW_GRDH_1SDV_20230218T041643_20230218T0417...,"LC09_L2SP_179057_20230217_02_T1,LC09_L2SP_1790...","60N-2023,34N-2023,01N-2023","Copernicus_DSM_COG_10_N04_00_E021_00_DEM,Coper...",1,4.233723,21.566028,Trees,#397d49
2,1461,0,20230218202304192023071320231230,20230218202304192023071220231227,20230217202305242023092120231218,2,"POLYGON ((21.11185 4.09062, 21.11185 4.09931, ...",32634,success,S2A_MSIL2A_20230218T085021_R107_T34NEK_2023022...,S1A_IW_GRDH_1SDV_20230218T041643_20230218T0417...,"LC09_L2SP_179057_20230217_02_T1,LC09_L2SP_1790...","60N-2023,34N-2023,01N-2023","Copernicus_DSM_COG_10_N04_00_E021_00_DEM,Coper...",2,4.094963,21.107529,Trees,#397d49
3,1550,0,20230218202304192023071320231230,20230218202304192023071220231227,20230217202305242023092120231218,2,"POLYGON ((21.09455 4.08194, 21.09456 4.09062, ...",32634,success,S2A_MSIL2A_20230218T085021_R107_T34NEK_2023022...,S1A_IW_GRDH_1SDV_20230218T041643_20230218T0417...,"LC09_L2SP_179057_20230217_02_T1,LC09_L2SP_1790...","60N-2023,34N-2023,01N-2023","Copernicus_DSM_COG_10_N04_00_E021_00_DEM,Coper...",3,4.08628,21.09023,Trees,#397d49
4,1551,0,20230218202304192023071320231230,20230218202304192023071220231227,20230217202305242023092120231218,2,"POLYGON ((21.1032 4.08194, 21.1032 4.09062, 21...",32634,success,S2A_MSIL2A_20230218T085021_R107_T34NEK_2023022...,S1A_IW_GRDH_1SDV_20230218T041643_20230218T0417...,"LC09_L2SP_179057_20230217_02_T1,LC09_L2SP_1790...","60N-2023,34N-2023,01N-2023","Copernicus_DSM_COG_10_N04_00_E021_00_DEM,Coper...",4,4.086279,21.098879,Trees,#397d49


In [11]:
for image in ["lc2l2", "s1rtc", "s2l2a"]:
    metadata_gdf[f"{image}_thumbs"] = metadata_gdf.apply(
        gen_thumbnail_urls, axis=1, image=image
    )

In [13]:
metadata_gdf.head()

Unnamed: 0_level_0,original_id,aoi_index,s2l2a_dates,s1rtc_dates,lc2l2_dates,lulc,chip_footprint,epsg,status,s2l2a_scene_ids,...,lulc_scene_ids,dem_scene_ids,id,lat,lon,category,color,lc2l2_thumbs,s1rtc_thumbs,s2l2a_thumbs
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,318,0,20230218202304192023071320231230,20230218202304192023071220231227,20230217202305242023092120231218,2,"POLYGON ((21.82991 4.28125, 21.82992 4.28993, ...",32634,success,S2A_MSIL2A_20230218T085021_R107_T34NEK_2023022...,...,"60N-2023,34N-2023,01N-2023","Copernicus_DSM_COG_10_N04_00_E021_00_DEM,Coper...",0,4.285593,21.825585,Trees,#397d49,https://gelos-fm.s3.amazonaws.com/thumbnails/l...,https://gelos-fm.s3.amazonaws.com/thumbnails/s...,https://gelos-fm.s3.amazonaws.com/thumbnails/s...
1,475,0,20230218202304192023071320231230,20230218202304192023071220231227,20230217202305242023092120231218,2,"POLYGON ((21.57035 4.22938, 21.57036 4.23806, ...",32634,success,S2A_MSIL2A_20230218T085021_R107_T34NEK_2023022...,...,"60N-2023,34N-2023,01N-2023","Copernicus_DSM_COG_10_N04_00_E021_00_DEM,Coper...",1,4.233723,21.566028,Trees,#397d49,https://gelos-fm.s3.amazonaws.com/thumbnails/l...,https://gelos-fm.s3.amazonaws.com/thumbnails/s...,https://gelos-fm.s3.amazonaws.com/thumbnails/s...
2,1461,0,20230218202304192023071320231230,20230218202304192023071220231227,20230217202305242023092120231218,2,"POLYGON ((21.11185 4.09062, 21.11185 4.09931, ...",32634,success,S2A_MSIL2A_20230218T085021_R107_T34NEK_2023022...,...,"60N-2023,34N-2023,01N-2023","Copernicus_DSM_COG_10_N04_00_E021_00_DEM,Coper...",2,4.094963,21.107529,Trees,#397d49,https://gelos-fm.s3.amazonaws.com/thumbnails/l...,https://gelos-fm.s3.amazonaws.com/thumbnails/s...,https://gelos-fm.s3.amazonaws.com/thumbnails/s...
3,1550,0,20230218202304192023071320231230,20230218202304192023071220231227,20230217202305242023092120231218,2,"POLYGON ((21.09455 4.08194, 21.09456 4.09062, ...",32634,success,S2A_MSIL2A_20230218T085021_R107_T34NEK_2023022...,...,"60N-2023,34N-2023,01N-2023","Copernicus_DSM_COG_10_N04_00_E021_00_DEM,Coper...",3,4.08628,21.09023,Trees,#397d49,https://gelos-fm.s3.amazonaws.com/thumbnails/l...,https://gelos-fm.s3.amazonaws.com/thumbnails/s...,https://gelos-fm.s3.amazonaws.com/thumbnails/s...
4,1551,0,20230218202304192023071320231230,20230218202304192023071220231227,20230217202305242023092120231218,2,"POLYGON ((21.1032 4.08194, 21.1032 4.09062, 21...",32634,success,S2A_MSIL2A_20230218T085021_R107_T34NEK_2023022...,...,"60N-2023,34N-2023,01N-2023","Copernicus_DSM_COG_10_N04_00_E021_00_DEM,Coper...",4,4.086279,21.098879,Trees,#397d49,https://gelos-fm.s3.amazonaws.com/thumbnails/l...,https://gelos-fm.s3.amazonaws.com/thumbnails/s...,https://gelos-fm.s3.amazonaws.com/thumbnails/s...


In [19]:

    
for modality in ["lc2l2", "s1rtc", "s2l2a", "dem"]:

    if modality == "dem":
        metadata_gdf["dem_paths"] = metadata_gdf.apply(
            _construct_dem_path, axis=1
        )
        continue

    metadata_gdf[f"{modality}_paths"] = metadata_gdf.apply(
        _construct_file_paths, modality=modality, axis=1
    )


In [23]:
metadata_gdf.iloc[0]['lc2l2_thumbs']

'https://gelos-fm.s3.amazonaws.com/thumbnails/lc2l2_000000_20230217.png,https://gelos-fm.s3.amazonaws.com/thumbnails/lc2l2_000000_20230524.png,https://gelos-fm.s3.amazonaws.com/thumbnails/lc2l2_000000_20230921.png,https://gelos-fm.s3.amazonaws.com/thumbnails/lc2l2_000000_20231218.png'

In [None]:

(self.output_dir / self.version).mkdir(exist_ok=True)

# save to geojson
metadata_gdf.to_file(self.output_dir / f'{self.version}/gelos_chip_tracker.geojson', driver='GeoJSON', index=False)

# move files to destination folder
for index, row in tqdm(metadata_gdf.iterrows(), total=len(metadata_gdf), desc="copying files to output dir..."):
    for col in ["s2l2a_dates", "s1rtc_dates", "lc2l2_dates"]:
        for i, date in enumerate(row[col].split(',')):
            platform = col[:-6]
            src_file = self.working_dir / self.version / f"{platform}_{row["original_id"]:06}_{i}_{date}.tif"
            dst_file = self.output_dir / self.version / f"{platform}_{row["id"]:06}_{date}.tif"
            shutil.copy2(src_file, dst_file)
            src_file = self.working_dir / self.version / f"{platform}_{row["original_id"]:06}_{i}_{date}.png"
            dst_file = self.output_dir / self.version / f"{platform}_{row["id"]:06}_{date}.png"
            shutil.copy2(src_file, dst_file)
    src_file = self.working_dir / self.version / f"dem_{row["original_id"]:06}.tif"
    dst_file = self.output_dir / self.version / f"dem_{row["id"]:06}.tif"
    shutil.copy2(src_file, dst_file)

# zip folder
folder_to_zip = self.working_dir / self.version
output_zip_file = self.output_dir / self.version / self.version
shutil.make_archive(output_zip_file, 'zip', folder_to_zip)


## Get a subset of successful chips

In [51]:
metadata_gdf_sample = metadata_gdf.head(10)
cleaner.sample_dir = Path('/home/benchuser/code/data/')
cleaner.output_version = 'v0.40'
(cleaner.sample_dir / cleaner.output_version).mkdir(exist_ok=True)
metadata_gdf.to_file(cleaner.sample_dir / f'{cleaner.output_version}/cleaned_df.geojson', driver='GeoJSON', index=False)

for index, row in tqdm(metadata_gdf_sample.iterrows(), total=len(metadata_gdf_sample), desc="copying files to output dir..."):
    for col in ["s2l2a_dates", "s1rtc_dates", "lc2l2_dates"]:
        for i, date in enumerate(ast.literal_eval(row[col])):
            platform = col[:-6]
            src_file = cleaner.working_dir / cleaner.version / f"{platform}_{row["original_chip_id"]:06}_{i}_{date}.tif"
            dst_file = cleaner.sample_dir / cleaner.output_version / f"{platform}_{row["chip_id"]:06}_{date}.tif"
            shutil.copy2(src_file, dst_file)
            src_file = cleaner.working_dir / cleaner.version / f"{platform}_{row["original_chip_id"]:06}_{i}_{date}.png"
            dst_file = cleaner.sample_dir / cleaner.output_version / f"{platform}_{row["chip_id"]:06}_{date}.png"
            shutil.copy2(src_file, dst_file)
    src_file = cleaner.working_dir / cleaner.version / f"dem_{row["original_chip_id"]:06}.tif"
    dst_file = cleaner.sample_dir / cleaner.output_version / f"dem_{row["chip_id"]:06}.tif"
    shutil.copy2(src_file, dst_file)

copying files to output dir...: 100%|██████████| 10/10 [00:00<00:00, 227.68it/s]
