# Demo of CW-Tiler - Atlanta - V3

This is a walk through for creation of the Atlantas Dataset.  

This demo will walk through the use of the tiler to create utm tiles with the SpaceNet Data Repository.  We will be taking advantage of cloud optimized geotiffs.  

For more information about SpaceNet visit https://spacenetchallenge.github.io/



In [2]:
# Import base tools

## Note, for mac osx compatability import something from shapely.geometry before importing fiona or geopandas
## https://github.com/Toblerity/Shapely/issues/553  * Import shapely before rasterio or fioana
from shapely import geometry
import rasterio
import random
from cw_tiler import main
from cw_tiler import utils
from cw_tiler import vector_utils
import numpy as np
import geopandas as gpd
import os
from tqdm import tqdm
# Setting Certificate Location for Ubuntu/Mac OS locations (Rasterio looks for certs in centos locations)  This is important for s3 access  (s3 must be configured using aws cli / boto3)
os.environ['CURL_CA_BUNDLE']='/etc/ssl/certs/ca-certificates.crt'

In [156]:
from stac_tools.stac_item import spacenetStacItem
import glob
import os



## Read List of Images into GeoDataframe

In [2]:

import geopandas as gpd

imageDF = gpd.read_file("/raid/nfs/workingDir/cw-tiler/tmpGeoFiles/Atlanta_data.shp")

In [3]:
imageDF.columns

Index(['image_area', 'image_id', 'image_path', 'image_type', 'imd_path',
       'xml_path', 'geometry'],
      dtype='object')

In [4]:
print("Total Images = {}".format(imageDF.shape))
imageDF.head()

Total Images = (81, 7)


Unnamed: 0,image_area,image_id,image_path,image_type,imd_path,xml_path,geometry
0,529.597357,Atlanta_nadir14_catid_10300100039AB000,/raid/nfs/data/Datasets/CosmiQ_General_Study/A...,PAN,/raid/nfs/data/Datasets/CosmiQ_General_Study/A...,/raid/nfs/data/Datasets/CosmiQ_General_Study/A...,"POLYGON ((750151.0046244338 3715733.306822867,..."
1,529.577299,Atlanta_nadir14_catid_10300100039AB000,/raid/nfs/data/Datasets/CosmiQ_General_Study/A...,MS,/raid/nfs/data/Datasets/CosmiQ_General_Study/A...,/raid/nfs/data/Datasets/CosmiQ_General_Study/A...,"POLYGON ((750148.3462557473 3715732.979846669,..."
2,529.597357,Atlanta_nadir14_catid_10300100039AB000,/raid/nfs/data/Datasets/CosmiQ_General_Study/A...,Pan-Sharpen,/raid/nfs/data/Datasets/CosmiQ_General_Study/A...,/raid/nfs/data/Datasets/CosmiQ_General_Study/A...,"POLYGON ((750151.0046244338 3715733.306822867,..."
3,963.246923,Atlanta_nadir39_catid_1030010003832800,/raid/nfs/data/Datasets/CosmiQ_General_Study/A...,Pan-Sharpen,/raid/nfs/data/Datasets/CosmiQ_General_Study/A...,/raid/nfs/data/Datasets/CosmiQ_General_Study/A...,"POLYGON ((753426.2862091018 3713963.090985105,..."
4,963.246923,Atlanta_nadir39_catid_1030010003832800,/raid/nfs/data/Datasets/CosmiQ_General_Study/A...,PAN,/raid/nfs/data/Datasets/CosmiQ_General_Study/A...,/raid/nfs/data/Datasets/CosmiQ_General_Study/A...,"POLYGON ((753426.2862091018 3713963.090985105,..."


## Create JSON Work order Files for Processing Raw Imagery into Tiles.  

In [164]:
### 
# Feed index file to chip all other images into.  The Shape File has a set of cells.  CW-Tiler will clip imagery to each cell extent.  
tile_index_shp = '/raid/nfs/workingDir/cw-tiler/tmpGeoFiles/AOI_6_Atlanta_Cells_v1.shp'

# Pick location for Cells
results_data_base_location = '/raid/nfs/data/Datasets/CosmiQ_General_Study/AOI_6_Atlanta/processedDatav4/'

## Location to write Command Script to:
command_script_location = '/raid/nfs/workingDir/cw-tiler/Atlanta_AOI_parallelScript_v1.sh'

import json
commandScript = []
## Python promt calling cw_tiler_AOI_6_Atlanta.py to process
pythonCommand = 'python cw_tiler_AOI_6_Atlanta.py {}'
## Iterate through each image to create the work order command.  This includes the creation of a json file with the details of the work order.  
for idx, image_row in imageDF.iterrows():
        workorder = image_row.to_dict()
        workorder['geometry']=workorder['geometry'].to_wkt()
        if workorder['image_type'] == 'Pan-Sharpen':
            ## Pan Sharpen tile should bee 900 x 900px
            workorder['tile_pixel_size'] = 900
        elif workorder['image_type'] == 'PAN':
            ## Pan  tile should bee 900 x 900px
            workorder['tile_pixel_size'] = 900
        elif workorder['image_type'] == 'MS':
            ## Pan  tile should bee 225 x 225px
            workorder['tile_pixel_size'] = 225
            
        workorder['tile_index_shp'] = tile_index_shp
        workorder['results_data_base_location'] = results_data_base_location
        
        ## Worker Orders should work on a _cog for quicker tiling.  This is not required
        workorder['image_path'] = workorder['image_path'].replace('.tif', '_cog.tif')
        
        ## Save Work order
        workorder_location = os.path.join(results_data_base_location, "{}_{}_v1.json".format(workorder['image_id'], workorder['image_type']))
        
        #print(workorder_location)
        with open(workorder_location, 'w') as fp:
            json.dump(workorder, fp, indent=4)
            
        commandScript.append(pythonCommand.format(workorder_location))

with open(command_script_location, 'w') as fp:
    
    fp.writelines(["{}\n".format(item)  for item in commandScript])
        
 

In [7]:
## Create Train /Test / Split

In [35]:
import os
import json
import glob
tile_index_shp = '/raid/nfs/workingDir/cw-tiler/AOI_6_Atlanta_Cells_v1.shp'
results_data_base_location = '/raid/nfs/data/Datasets/CosmiQ_General_Study/AOI_6_Atlanta/processedDatav4/'

results = glob.glob(os.path.join(results_data_base_location, '**', '*.shp'), recursive=True)
## Each Imagery Workfile generates a ShapeFile with results.  

In [36]:
## Process Vector File

'/raid/nfs/data/Datasets/CosmiQ_General_Study/AOI_6_Atlanta/processedDatav2/Atlanta_nadir14_catid_10300100039AB000/Pan-Sharpenworkorder.shp'

In [37]:
import geopandas as gpd
import pandas as pd
import numpy as np
baseIndexFile = gpd.read_file(tile_index_shp)
baseIndexFile1 = baseIndexFile.copy()

## Analyze Results to check all tiles are made.  
for result in results:
    if os.path.basename(result) == 'Pan-Sharpenworkorder.shp':
        tmpFile = gpd.read_file(result)
        idName = os.path.basename(os.path.dirname(result))
        tmpFile[idName] = tmpFile['result']
        tmpFile = tmpFile.drop(columns=['result', 'geometry'])
        baseIndexFile = pd.merge(baseIndexFile, tmpFile, how='outer', on='FID')
    if os.path.basename(result) == 'spacenet-buildingsworkorder.shp':
        tmpFile = gpd.read_file(result)
        #Index(['FID', 'object_cou', 'object_are', 'geometry'], dtype='object')
        idName = 'spacenet-buildings'
        tmpFile['spacenet-buildings_count'] = tmpFile['object_cou']
        tmpFile['spacenet-buildings_area'] = tmpFile['object_are']
        tmpFile = tmpFile.drop(columns=['object_cou', 'object_are','geometry'])
        baseIndexFile = pd.merge(baseIndexFile, tmpFile, how='outer', on='FID')
    
    if os.path.basename(result) == 'spacenet-tile-idworkorder.shp':
        tmpFile = gpd.read_file(result)
        #Index(['FID', 'object_cou', 'object_are', 'geometry'], dtype='object')
        tmpFile['label_suffix'] = tmpFile['tile_index']
        tmpFile['image_suffix'] = tmpFile['tile_ind_1']
        tmpFile = tmpFile.drop(columns=['tile_index', 'tile_ind_1','geometry'])
        baseIndexFile = pd.merge(baseIndexFile, tmpFile, how='outer', on='FID')
        

baseIndexFile.crs = baseIndexFile1.crs
#baseIndexFile.to_file('/raid/nfs/workingDir/cw-tiler/AOI_6_Atlanta_PAN_details.geojson', driver='GeoJSON')

In [38]:
baseIndexFile.head()
baseIndexFile['_TotalCount'] = baseIndexFile.drop(columns=['FID', 'geometry', 'spacenet-buildings_count', 'spacenet-buildings_area', 'label_suffix', 'image_suffix']).sum(numeric_only=True, axis=1)

In [39]:
#Split Train/Test/Validate
TrainSplit = .50
TestSplit  = .40
ValidateSplit = .10
totalTiles = np.sum(np.sum(baseIndexFile.drop(columns=['FID', 'geometry', "_TotalCount"])==0))
totalSpatialTiles = baseIndexFile.shape[0]
print("Total Tiles Spatially = {}".format(totalSpatialTiles))
print("Total Tiles Total = {}".format(totalTiles))
print("Train Tiles = {}, {}% of Tiles".format(totalSpatialTiles*TrainSplit, TrainSplit*100))
print("Test Tiles = {}, {}% of Tiles".format(totalSpatialTiles*TestSplit*1, TestSplit*100))
print("Validate Tiles = {}, {}% of Tiles".format(totalSpatialTiles*ValidateSplit, ValidateSplit*100))

Total Tiles Spatially = 2318
Total Tiles Total = 62170
Train Tiles = 1159.0, 50.0% of Tiles
Test Tiles = 927.2, 40.0% of Tiles
Validate Tiles = 231.8, 10.0% of Tiles


In [40]:
## Assign all tiles to Train by default
baseIndexFile['Category'] = 'Train'
## Assign All Tiles that have a Total Count of missing Tiles >0 To Test  This is only neccessary because all imagery does not have the same extent.  \
## Therefore along the edges there are some nadir angles that don't have complete tiles.  These are passed to test because we only need one tile from each
baseIndexFile.loc[baseIndexFile['_TotalCount']>0, 'Category']='Test'
baseIndexFile['Category'].value_counts()

Train    2090
Test      228
Name: Category, dtype: int64

In [41]:
## Assign more tiles to Test based on the requires split
TestIndex = np.random.choice(baseIndexFile[baseIndexFile['Category']=='Train']['FID'].values, np.int(np.round(totalSpatialTiles*TestSplit - float(baseIndexFile['Category'].value_counts()['Test']))), replace=False)
print(len(TestIndex))    
baseIndexFile.iloc[TestIndex, baseIndexFile.columns.get_loc("Category")]='Test'
baseIndexFile['Category'].value_counts()

699


Train    1391
Test      927
Name: Category, dtype: int64

In [42]:
## Assign Tiles to Validate
TestIndex = np.random.choice(baseIndexFile[baseIndexFile['Category']=='Train']['FID'].values, int(totalSpatialTiles*ValidateSplit), replace=False)
print(len(TestIndex))    
baseIndexFile.iloc[TestIndex, baseIndexFile.columns.get_loc("Category")]='Validate'
baseIndexFile['Category'].value_counts()
#baseIndexFile.to_file("/raid/nfs/workingDir/cw-tiler/AOI_6_Atlanta_Train_Test_Validate_Split.geojson", driver='GeoJSON')

231


Train       1160
Test         927
Validate     231
Name: Category, dtype: int64

In [44]:
## Split Test into 3 categories based on Nadir Angle
TestIndex = np.random.choice(baseIndexFile[baseIndexFile['Category']=='Test']['FID'].values, int(totalSpatialTiles*TestSplit/3), replace=False)
print(len(TestIndex))    
baseIndexFile.iloc[TestIndex, baseIndexFile.columns.get_loc("Category")]='Test_Very-Off-Nadir'
TestIndex = np.random.choice(baseIndexFile[baseIndexFile['Category']=='Test']['FID'].values, int(totalSpatialTiles*TestSplit/3), replace=False)
print(len(TestIndex))    
baseIndexFile.iloc[TestIndex, baseIndexFile.columns.get_loc("Category")]='Test_Off-Nadir'
print(len(TestIndex))    
baseIndexFile.loc[baseIndexFile['Category']=='Test', "Category"]='Test_Nadir'
baseIndexFile['Category'].value_counts()





309
309
309


Train                  1160
Test_Very-Off-Nadir     309
Test_Nadir              309
Test_Off-Nadir          309
Validate                231
Name: Category, dtype: int64

In [45]:
## Write Resulting geojson File
baseIndexFile.to_file("/raid/nfs/workingDir/cw-tiler/AOI_6_Atlanta_Train_Test_Validate_Split_V3.geojson", driver='GeoJSON')

In [47]:
baseIndexFile.groupby("Category")['spacenet-buildings_count', 'spacenet-buildings_area'].describe().reset_index()

Unnamed: 0_level_0,Category,spacenet-buildings_count,spacenet-buildings_count,spacenet-buildings_count,spacenet-buildings_count,spacenet-buildings_count,spacenet-buildings_count,spacenet-buildings_count,spacenet-buildings_count,spacenet-buildings_area,spacenet-buildings_area,spacenet-buildings_area,spacenet-buildings_area,spacenet-buildings_area,spacenet-buildings_area,spacenet-buildings_area,spacenet-buildings_area
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
0,Test_Nadir,309.0,51.912621,45.281251,0.0,15.0,42.0,75.0,205.0,309.0,16977.266444,14795.469159,0.0,6408.938288,13790.524743,23214.448014,82848.486821
1,Test_Off-Nadir,309.0,57.877023,49.052428,0.0,18.0,49.0,86.0,222.0,309.0,18902.260714,17435.747904,0.0,8464.256954,16179.726708,25075.588407,197721.69584
2,Test_Very-Off-Nadir,309.0,55.18123,47.517524,0.0,15.0,44.0,81.0,207.0,309.0,16331.746063,14090.254241,0.0,7521.933113,13595.710858,21080.191473,95643.674119
3,Train,1160.0,59.581034,52.583886,0.0,15.0,46.0,93.0,297.0,1160.0,18145.884553,15339.270785,0.0,7681.139083,15153.94363,24286.449631,154871.791252
4,Validate,231.0,57.891775,49.816459,0.0,16.0,48.0,84.5,201.0,231.0,17714.313251,14370.438253,0.0,8356.967146,15452.977571,23996.264816,106009.227127


Unnamed: 0,FID,geometry_x,result,geometry_y,test
0,0,"POLYGON ((732701 3719289, 732251 3719289, 7322...",1,"POLYGON ((732701 3719289, 732251 3719289, 7322...",1
1,1,"POLYGON ((732701 3719739, 732251 3719739, 7322...",0,"POLYGON ((732701 3719739, 732251 3719739, 7322...",0
2,2,"POLYGON ((732701 3720189, 732251 3720189, 7322...",0,"POLYGON ((732701 3720189, 732251 3720189, 7322...",0
3,3,"POLYGON ((732701 3720639, 732251 3720639, 7322...",0,"POLYGON ((732701 3720639, 732251 3720639, 7322...",0
4,4,"POLYGON ((732701 3721089, 732251 3721089, 7322...",0,"POLYGON ((732701 3721089, 732251 3721089, 7322...",0


In [4]:
### Read Final Tile Split details

final_Tile_Split = gpd.read_file("/raid/nfs/workingDir/cw-tiler/AOI_6_Atlanta_Train_Test_Validate_Split_V4.geojson", driver='GeoJSON')


In [74]:
print(final_Tile_Split['Category'].value_counts())
final_Tile_Split[final_Tile_Split['Category']=='Train'][['label_suffix', 'image_suffix']].head()

Train                  1160
Test_Very-Off-Nadir     309
Test_Nadir              309
Test_Off-Nadir          309
Validate                231
Name: Category, dtype: int64


Unnamed: 0,label_suffix,image_suffix
1,732251_3719739_label.geojson,732251_3719739_image.tif
2,732251_3720189_label.geojson,732251_3720189_image.tif
5,732251_3721539_label.geojson,732251_3721539_image.tif
6,732251_3721989_label.geojson,732251_3721989_image.tif
7,732251_3722439_label.geojson,732251_3722439_image.tif


In [72]:
## Move files from initial processing into seperate Train/Test/Validate Split using mv command
import shutil
from tqdm import tqdm
results_data_base_location = '/raid/nfs/data/Datasets/CosmiQ_General_Study/AOI_6_Atlanta/processedDatav2/'
processedData_path = '/raid/nfs/data/Datasets/CosmiQ_General_Study/AOI_6_Atlanta/processedDatav3/'
commandPathList = []
for idx, row in tqdm(final_Tile_Split[final_Tile_Split['Category']=='Train'].iterrows()):
    results = glob.glob(os.path.join(results_data_base_location, '**', '*{}'.format(row['image_suffix'])), recursive=True)
    for result in results:
        newPath = os.path.join(processedData_path, 'spacenet_4_Train', result.replace('/raid/nfs/data/Datasets/CosmiQ_General_Study/AOI_6_Atlanta/processedDatav2/', ''))
        if not os.path.exists(os.path.dirname(newPath)):
            os.makedirs(os.path.dirname(newPath))
        commandPath = "mv {} {}".format(result, newPath)
        commandPathList.append(commandPath)
        


## Makes sure folder exists
for pathName in tqdm(list(set([os.path.dirname(item.split(' ')[2]) for item in commandPathList]))):
    if not os.path.exists(pathName):
            os.makedirs(pathName)
        
        
with open('/raid/nfs/workingDir/cw-tiler/Atlanta_AOI_mv_Train_Data.sh', 'w') as fp:
    fp.writelines(["{}\n".format(item)  for item in commandPathList])
    
# To Execute command use sh ./Atlanta_AOI_mv_Train_Data.sh
  

In [88]:
## Work with clipping Labels.  

In [None]:
import shutil
from tqdm import tqdm
results_data_base_location = '/raid/nfs/data/Datasets/CosmiQ_General_Study/AOI_6_Atlanta/processedDatav2/'
processedData_path = '/raid/nfs/data/Datasets/CosmiQ_General_Study/AOI_6_Atlanta/processedDatav3/'
commandPathList = []
for idx, row in tqdm(final_Tile_Split[final_Tile_Split['Category']=='Train'].iterrows()):
    results = glob.glob(os.path.join(results_data_base_location, '**', '*{}'.format(row['label_suffix'])), recursive=True)
    for result in results:
        newPath = os.path.join(processedData_path, 'spacenet_4_Train', result.replace('/raid/nfs/data/Datasets/CosmiQ_General_Study/AOI_6_Atlanta/processedDatav2/', ''))
        if not os.path.exists(os.path.dirname(newPath)):
            os.makedirs(os.path.dirname(newPath))
        commandPath = "mv {} {}".format(result, newPath)
        commandPathList.append(commandPath)
with open('/raid/nfs/workingDir/cw-tiler/Atlanta_AOI_mv_Train_Data_label.sh', 'w') as fp:
         fp.writelines(["{}\n".format(item)  for item in commandPathList]) 
for pathName in tqdm(list(set([os.path.dirname(item.split(' ')[2]) for item in commandPathList]))):
    if not os.path.exists(pathName):
            os.makedirs(pathName)

In [68]:
## Creation of Test Data Set


/raid/nfs/data/Datasets/CosmiQ_General_Study/AOI_6_Atlanta/processedDatav3/spacenet_4_Train/Atlanta_nadir14_catid_10300100039AB000/PAN/Atlanta_nadir14_catid_10300100039AB000_PAN_732251_3719739_image.tif


In [5]:
import pandas as pd
Off_nadir=25
Very_Off_nadir=40

columnList = [column for column in  final_Tile_Split.columns if column[0:3]=='Atl']
AOI_List_Dict = []
for column in columnList:
    nameParts = column.split('_')
    nadirAngle = int(nameParts[1].replace('nadir', ""))
    if nadirAngle <= 25:
        nadirClass = 'Nadir'
    elif nadirAngle < 40:
        nadirClass = 'Off-Nadir'
    else:
        nadirClass = 'Very-off-Nadir'
    
    AOI_List_Dict.append({'FolderName': column,
     'nadir_angle': nadirAngle,
    'nadir_class': nadirClass})
    
        

In [6]:
final_Tile_Split.columns[3][0:3]

'Atl'

In [7]:
aoi_Dict = pd.DataFrame(AOI_List_Dict)
aoi_Dict.head()
aoi_Dict['nadir_class'].value_counts()

Nadir             11
Very-off-Nadir     9
Off-Nadir          7
Name: nadir_class, dtype: int64

In [8]:
NadirList = aoi_Dict[aoi_Dict['nadir_class']=='Nadir']['FolderName'].values
OffNadirList = aoi_Dict[aoi_Dict['nadir_class']=='Off-Nadir']['FolderName'].values
VeryOffNadirList = aoi_Dict[aoi_Dict['nadir_class']=='Very-off-Nadir']['FolderName'].values

In [10]:
final_Tile_Split = gpd.read_file("/raid/nfs/workingDir/cw-tiler/AOI_6_Atlanta_Train_Test_Validate_Split_V4.geojson", driver='GeoJSON')

In [11]:
final_Tile_Split['Category'].value_counts()

Train                  1160
Test_Off-Nadir          309
Test_Very-Off-Nadir     309
Test_Nadir              309
Validate                231
Name: Category, dtype: int64

In [None]:
import shutil
from tqdm import tqdm
import glob
results_data_base_location = '/raid/nfs/data/Datasets/CosmiQ_General_Study/AOI_6_Atlanta/processedDatav2/'
processedData_path = '/raid/nfs/data/Datasets/CosmiQ_General_Study/AOI_6_Atlanta/processedDatav3/'
commandPathList = []

NadirList = aoi_Dict[aoi_Dict['nadir_class']=='Nadir']['FolderName'].values
OffNadirList = aoi_Dict[aoi_Dict['nadir_class']=='Off-Nadir']['FolderName'].values
VeryOffNadirList = aoi_Dict[aoi_Dict['nadir_class']=='Very-off-Nadir']['FolderName'].values

testCategoryList = ["Test_Nadir","Test_Off-Nadir", "Test_Very-Off-Nadir"]
nadirCategoryListList = [NadirList, OffNadirList, VeryOffNadirList]
rowList = []

for testCategory, nadirCategoryList in zip(testCategoryList, nadirCategoryListList):
    for idx, row in tqdm(final_Tile_Split[final_Tile_Split['Category']==testCategory].iterrows()):

        
        rowTmp = row[nadirCategoryList]
        baseID = np.random.choice(rowTmp[rowTmp>=450].index,1)[0]
        print(baseID)
        row['TestimageID'] = baseID


        results = glob.glob(os.path.join(results_data_base_location, baseID, '**', '*{}'.format(row['image_suffix'])), recursive=True)
        for result in results:
            newPath = os.path.join(processedData_path, 'spacenet_4_Test', result.replace('/raid/nfs/data/Datasets/CosmiQ_General_Study/AOI_6_Atlanta/processedDatav2/', ''))
            if not os.path.exists(os.path.dirname(newPath)):
                os.makedirs(os.path.dirname(newPath))
            commandPath = "cp {} {}".format(result, newPath)
            commandPathList.append(commandPath)
            

        results = glob.glob(os.path.join(results_data_base_location, '**', '*{}'.format(row['label_suffix'])), recursive=True)
        for result in results:
            newPath = os.path.join(processedData_path, 'spacenet_4_Test', result.replace('/raid/nfs/data/Datasets/CosmiQ_General_Study/AOI_6_Atlanta/processedDatav2/', ''))
            if not os.path.exists(os.path.dirname(newPath)):
                os.makedirs(os.path.dirname(newPath))
            commandPath = "cp {} {}".format(result, newPath)
            commandPathList.append(commandPath)

        rowList.append(row)

    
final_test_train_split_update = gpd.GeoDataFrame(rowList)
final_test_train_split_update.crs = final_Tile_Split.crs
#final_test_train_split_update.to_file("/raid/nfs/workingDir/cw-tiler/AOI_6_Atlanta_Train_Test_Validate_Split_V3p3.geojson", driver='GeoJSON')

        
with open('/raid/nfs/workingDir/cw-tiler/Atlanta_AOI_mv_Test_Data_v4.sh', 'w') as fp:
         fp.writelines(["{}\n".format(item)  for item in commandPathList]) 
        
for pathName in tqdm(list(set([os.path.dirname(item.split(' ')[2]) for item in commandPathList]))):
    if not os.path.exists(pathName):
            os.makedirs(pathName)

test


'cp /raid/nfs/data/Datasets/CosmiQ_General_Study/AOI_6_Atlanta/processedDatav2/Atlanta_nadir23_catid_103001000352C200/PAN/Atlanta_nadir23_catid_103001000352C200_PAN_732251_3719289_image.tif /raid/nfs/data/Datasets/CosmiQ_General_Study/AOI_6_Atlanta/processedDatav3/spacenet_4_Test/Atlanta_nadir23_catid_103001000352C200/PAN/Atlanta_nadir23_catid_103001000352C200_PAN_732251_3719289_image.tif'

'cp /raid/nfs/data/Datasets/CosmiQ_General_Study/AOI_6_Atlanta/processedDatav2/geojson/spacenet-buildings/spacenet-buildings_748901_3745389_label.geojson /raid/nfs/data/Datasets/CosmiQ_General_Study/AOI_6_Atlanta/processedDatav3/spacenet_4_Test/geojson/spacenet-buildings/spacenet-buildings_748901_3745389_label.geojson'

In [None]:
import shutil
from tqdm import tqdm
results_data_base_location = '/raid/nfs/data/Datasets/CosmiQ_General_Study/AOI_6_Atlanta/processedDatav2/'
processedData_path = '/raid/nfs/data/Datasets/CosmiQ_General_Study/AOI_6_Atlanta/processedDatav3/'
commandPathList = []


rowList = []


for idx, row in tqdm(final_Tile_Split[final_Tile_Split['Category']=='Validate'].iterrows()):
    results = glob.glob(os.path.join(results_data_base_location, '**', '*{}'.format(row['image_suffix'])), recursive=True)
    for result in results:
        newPath = os.path.join(processedData_path, 'spacenet_4_Validate', result.replace('/raid/nfs/data/Datasets/CosmiQ_General_Study/AOI_6_Atlanta/processedDatav2/', ''))
        if not os.path.exists(os.path.dirname(newPath)):
            os.makedirs(os.path.dirname(newPath))
        commandPath = "mv {} {}".format(result, newPath)
        commandPathList.append(commandPath)

    results = glob.glob(os.path.join(results_data_base_location, '**', '*{}'.format(row['label_suffix'])), recursive=True)
    for result in results:
        newPath = os.path.join(processedData_path, 'spacenet_4_Validate', result.replace('/raid/nfs/data/Datasets/CosmiQ_General_Study/AOI_6_Atlanta/processedDatav2/', ''))
        if not os.path.exists(os.path.dirname(newPath)):
            os.makedirs(os.path.dirname(newPath))
        commandPath = "mv {} {}".format(result, newPath)
        commandPathList.append(commandPath)
         

        
with open('/raid/nfs/workingDir/cw-tiler/Atlanta_AOI_mv_Validate_Data_v2.sh', 'w') as fp:
         fp.writelines(["{}\n".format(item)  for item in commandPathList]) 
        
for pathName in tqdm(list(set([os.path.dirname(item.split(' ')[2]) for item in commandPathList]))):
    if not os.path.exists(pathName):
            os.makedirs(pathName)