# RTS Dataset Formatting

# TODO

-  calculate area in script? (update to derived in metadata?)

# Set-Up

In [1]:
import uuid
import numpy as np
import pandas as pd
import geopandas as gpd
import warnings
import re
from datetime import datetime
from custom_functions import *
from pathlib import Path

In [2]:
pd.set_option("display.max_columns", 100)

Workaround for horizontal scrollbars not working in Firefox:

In [3]:
from IPython.display import display, HTML
display(HTML("<style>.jp-OutputArea-output {display:flex}</style>"))

## User-Defined Input

Before starting, copy your new shapefile into the input_data directory. It is preferred that your file use EPSG:3413 (WGS 84 / NSIDC Sea Ice Polar Stereographic North) as the coordinate system, but this script will perform the transformation if necessary.

Provide the file name to the data:

In [4]:
new_data_file = 'rts_dataset_test_polygons_new.geojson'  # set this
new_data_filepath = Path('..') / 'input_data' / new_data_file

Provide the names of any metadata fields in your new file that are not already in the official RTS Data Set (please check the list to ensure that the field has not been included previously) that you would like to be included in the compiled data set:

In [5]:
# Provide new metatdata fields as a list of the character column names. If there are no new fields, leave the code assigning an empty list.
# If your new file is a shapefile, also provide a list of the abbreviated names
# Example:
# new_fields = ['CustomColumn1', 'CustomColumn2']
# Shapefile example:
# new_fields_abbreviated = ['CstmCl1', 'CstmCl2']
new_fields = []

Have you already created RTS centroid columns, or would you like them to be created within this script? Provide either True, if the columns do not exist yet, or False, if you have already created them:

In [6]:
# Example: 
# calculate_centroid = False
calculate_centroid = True

Would you like your formatted new data to be output in its own file (in which case you will email the file of new features to us to merge with the compiled data set) or appended the compiled dataset (in which case you will commit your updated file to your forked github repository and create a pull request to add the file to the official github repository). Your decision here should mostly be based on your comfort with github. If you have no idea what the second half of that sentence means, please opt for the separate file and email it to us.

In [7]:
# Example
# separate_file = True
separate_file = True

# Import Metadata Description File

In [8]:
col_metadata = pd.read_csv(Path('../input_data/metadata_description.csv'))

required_fields = list(col_metadata[col_metadata.Required == 'True'].FieldName.values)

generated_fields = list(col_metadata[col_metadata.Required == 'Generated'].FieldName.values)

optional_fields = list(col_metadata[col_metadata.Required == 'False'].FieldName.values)

all_fields = required_fields + generated_fields + optional_fields + new_fields

# Import Official and New RTS Data Files

In [9]:
rts_file = 'rts_dataset_test_polygons_current.geojson'
rts_data_filepath = Path('..') / 'input_data' / rts_file

rts_data = (
    gpd.read_file(rts_data_filepath)
    .filter(items = required_fields + generated_fields + optional_fields + ['geometry'])
)

rts_data.ContributionDate = pd.to_datetime(rts_data.ContributionDate)
    
for field in required_fields: # Check if all required columns are present
    if field not in rts_data.columns:
        raise ValueError('{field} is missing. Has the RTS data set been modified since download?'.format(field = repr(field)))

rts_data


Unnamed: 0,CentroidLat,CentroidLon,RegionName,CreatorLab,BaseMapDate,BaseMapSource,BaseMapResolution,TrainClass,LabelType,MergedRTS,StabilizedRTS,ContributionDate,UID,Area,geometry
0,70.01668,68.33918,Yamal-Gydan,Rodenhizer,"2022-05-01,2022-09-30",WorldView-2,4.0,Positive,Polygon,,,2023-09-01,b4bae416-9fde-5d91-920d-731bcf042b2d,7581.395967,"POLYGON ((2007198.307 865988.469, 2007189.916 ..."
1,70.01622,68.33917,Yamal-Gydan,Rodenhizer,"2022-05-01,2022-09-30",WorldView-2,4.0,Positive,Polygon,,,2023-09-01,10f75ab9-2297-5b04-97ad-559b34fa020f,3621.349764,"POLYGON ((2007253.161 866032.001, 2007235.776 ..."
2,70.01648,68.33242,Yamal-Gydan,Rodenhizer,"2022-05-01,2022-09-30",WorldView-2,4.0,Positive,Polygon,,,2023-09-01,ff0d265e-385c-53c2-9c3a-28e885a220d2,1339.292585,"POLYGON ((2007310.378 865857.070, 2007340.692 ..."
3,70.0155,68.3295,Yamal-Gydan,Rodenhizer,"2022-05-01,2022-09-30",WorldView-2,4.0,Positive,Polygon,,,2023-09-01,297dc622-3584-5d79-8b7b-b4f5a67fa8a4,3482.02968,"POLYGON ((2007453.557 865845.775, 2007456.723 ..."
4,70.01451,68.33296,Yamal-Gydan,Rodenhizer,"2022-05-01,2022-09-30",WorldView-2,4.0,Positive,Polygon,,,2023-09-01,7c64ad8e-07be-5ba5-8f97-19374f809af1,134.941981,"POLYGON ((2007514.965 865926.094, 2007508.132 ..."
5,70.01437,68.33493,Yamal-Gydan,Rodenhizer,"2022-05-01,2022-09-30",WorldView-2,4.0,Positive,Polygon,,,2023-09-01,e36abf57-ffb3-5c6a-be32-d8278f385a73,411.580601,"POLYGON ((2007496.803 865994.362, 2007488.866 ..."


In [10]:
new_data = gpd.read_file(new_data_filepath)

# convert to EPSG:3413 if necessary
if new_data.crs != 'EPSG:3413':
    new_data = new_data.to_crs('EPSG:3413')

# calculate centroid, if requested
if calculate_centroid:
    if re.search('\\.shp', str(new_data_filepath)):
        new_data = new_data.drop(['CntrdLt', 'CntrdLn'], axis = 1)
        new_data["CntrdLt"] = new_data.to_crs(4326).centroid.y.round(5)
        new_data["CntrdLn"] = new_data.to_crs(4326).centroid.x.round(5)

    elif re.search('\\.geojson', str(new_data_filepath)):
        new_data["CentroidLat"] = new_data.to_crs(4326).centroid.y.round(5)
        new_data["CentroidLon"] = new_data.to_crs(4326).centroid.x.round(5)

# select correct columns
if re.search('\\.geojson', str(new_data_filepath)):
    new_data = (
        new_data    
        .filter(items = required_fields + optional_fields + new_fields + ['geometry'])
        )
elif re.search('\\.shp', str(new_data_filepath)):
    new_data = (
        new_data    
        .rename(columns = dict(
            {key:value for key, value 
                 in zip(
                     ['CntrdLt', 'CntrdLn', 'ReginNm', 'CretrLb', 'BasMpDt', 'BsMpSrc', 'BsMpRsl', 'TrnClss', 'LablTyp'], 
                     required_fields
                     )},
            **{key:value for key, value 
               in zip(
                   ['MrgdRTS', 'StblRTS', 'ContrDt', 'UID'],
                   generated_fields,
                   )},
            **{key:value for key, value 
               in zip(
                   ['BsMpID', 'Area'], 
                   optional_fields,
                   )},
            **{key:value for key, value 
               in zip(
                   new_fields_abbreviated, 
                   new_fields
                   )}
            )
                )
        .filter(items = required_fields + optional_fields + new_fields + ['geometry'])
        )

for field in [item for item in required_fields]: # Check if all required columns are present
    if field not in new_data.columns:
        raise ValueError('{field} is missing. Ensure that all required fields are present prior to running this script'
                         .format(field = repr(field)))

for field in [item for item in new_fields]: # Check if all new columns are present
    if field not in new_data.columns:
        raise ValueError('{field} is missing. Did you specify the name of the new metadata field correctly?'.format(field = repr(field)))

new_data

Unnamed: 0,CentroidLat,CentroidLon,RegionName,CreatorLab,BaseMapDate,BaseMapSource,BaseMapResolution,TrainClass,LabelType,geometry
0,70.01655,68.33926,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007199.012 865984.608, 2007188.217 ..."
1,70.01543,68.34071,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007289.337 866129.959, 2007283.521 ..."
2,70.01652,68.33235,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007340.152 865838.514, 2007326.959 ..."
3,70.01531,68.33115,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007453.557 865845.775, 2007456.416 ..."
4,70.01457,68.33342,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007492.587 865949.766, 2007492.342 ..."
5,70.01448,68.33495,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007479.747 865990.850, 2007472.484 ..."
6,70.01543,68.34071,Yamal-Gydan,Rodenhizer,"2022-05-01,2022-9-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007289.337 866129.959, 2007283.521 ..."


# Check Metadata Format of New Data

In [11]:
run_formatting_checks(new_data)

Formatting looks good!


# Generate UIDs

Set seed for UID generation (R) by concatenating all required metadata columns (except UID) into a single string

In [12]:
new_data.CentroidLat = np.round(new_data.CentroidLat, 5)
new_data.CentroidLon = np.round(new_data.CentroidLon, 5)
c = new_data.BaseMapResolution == new_data.BaseMapResolution.astype(int)
new_data.loc[c,'BaseMapResolutionStr'] = new_data.BaseMapResolution.astype(int).astype(str)
new_data.loc[~c,'BaseMapResolutionStr'] = new_data.BaseMapResolution.astype(str)

In [13]:
new_data['seed'] = (
    new_data[[
        'CentroidLat', 
        'CentroidLon', 
        'RegionName', 
        'CreatorLab', 
        'BaseMapDate', 
        'BaseMapSource', 
        'BaseMapResolutionStr', 
        'TrainClass',
        'LabelType'
    ]].apply(
        lambda row: ''.join(row.values.astype(str)),
        axis = 1
    )
)
new_data.seed

0    70.0165568.33926Yamal-GydanRodenhizer2023-05-0...
1    70.0154368.34071Yamal-GydanRodenhizer2023-05-0...
2    70.0165268.33235Yamal-GydanRodenhizer2023-05-0...
3    70.0153168.33115Yamal-GydanRodenhizer2023-05-0...
4    70.0145768.33342Yamal-GydanRodenhizer2023-05-0...
5    70.0144868.33495Yamal-GydanRodenhizer2023-05-0...
6    70.0154368.34071Yamal-GydanRodenhizer2022-05-0...
Name: seed, dtype: object

Generate UIDs

In [14]:
new_data['UID'] = [str(uuid.uuid5(uuid.NAMESPACE_DNS, name = seed)) for seed in new_data.seed]
new_data.UID

0    697680a4-9707-59fb-aabb-540308cf0705
1    edb47fed-2c5d-59f0-9609-dd230ab25a58
2    d10b5ffe-ff73-57cf-a12b-4b74142f0b98
3    1fae5c95-c99a-5400-ad29-6273ecbbaf94
4    e9627675-6398-5d4f-a3c0-19d0ef5511df
5    f6593433-6229-5324-85cf-e3edccae5420
6    aedeff78-0897-5159-aefd-c5a5885475c8
Name: UID, dtype: object

# Check for Intersections with RTS Data Set

Find intersecting RTS polygons from the official RTS data set and retrieve their UIDs. Create an empty column for the UIDs of polygons that have been repeated that will be manually populated.

In [15]:
intersections = []
for idx in range(0,new_data.shape[0]):
    new_intersections = get_intersecting_uids(new_data.iloc[[idx]], rts_data)
    intersections = intersections + new_intersections
    
new_data['Intersections'] = intersections

adjacent_polys = []
for idx in range(0,new_data.shape[0]):
    new_adjacent_polys = get_touching_uids(new_data.iloc[[idx]], rts_data)
    adjacent_polys = adjacent_polys + new_adjacent_polys
    
new_data['AdjacentPolys'] = adjacent_polys

new_data.Intersections = remove_adjacent_polys(new_data.Intersections, new_data.AdjacentPolys)
new_data.drop('AdjacentPolys', axis=1)

overlapping_data = new_data.copy()
overlapping_data = overlapping_data[overlapping_data.Intersections.str.len() > 0]

if overlapping_data.shape[0] > 0:
    if 'RepeatRTS' not in list(overlapping_data.columns.values):
        overlapping_data['RepeatRTS'] = ['']*overlapping_data.shape[0]
    if 'MergedRTS' not in list(overlapping_data.columns.values):
        overlapping_data['MergedRTS'] = ['']*overlapping_data.shape[0]
    if 'StabilizedRTS' not in list(overlapping_data.columns.values):
        overlapping_data['StabilizedRTS'] = ['']*overlapping_data.shape[0]

    overlapping_data['AccidentalOverlap'] = ['']*overlapping_data.shape[0]

    print(overlapping_data)

    overlapping_data.to_file(
        Path('..') / 'python_output' / (str(new_data_file).split('.')[0] + "_overlapping_polygons.geojson")
        )

    print(
        'Overlapping polygons have been saved to ' + 
         str(Path('..') / 'python_output' / (str(new_data_file).split('.')[0] + "_overlapping_polygons.geojson"))
         )

else:
    print('There were no overlapping polygons. Proceed to the next code chunk without any manual editing.')

   CentroidLat  CentroidLon   RegionName  CreatorLab            BaseMapDate  \
0     70.01655     68.33926  Yamal-Gydan  Rodenhizer  2023-05-01,2023-09-30   
2     70.01652     68.33235  Yamal-Gydan  Rodenhizer  2023-05-01,2023-09-30   
3     70.01531     68.33115  Yamal-Gydan  Rodenhizer  2023-05-01,2023-09-30   
4     70.01457     68.33342  Yamal-Gydan  Rodenhizer  2023-05-01,2023-09-30   
5     70.01448     68.33495  Yamal-Gydan  Rodenhizer  2023-05-01,2023-09-30   

  BaseMapSource  BaseMapResolution TrainClass LabelType  \
0   WorldView-2                4.0   Positive   Polygon   
2   WorldView-2                4.0   Positive   Polygon   
3   WorldView-2                4.0   Positive   Polygon   
4   WorldView-2                4.0   Positive   Polygon   
5   WorldView-2                4.0   Positive   Polygon   

                                            geometry BaseMapResolutionStr  \
0  POLYGON ((2007199.012 865984.608, 2007188.217 ...                    4   
2  POLYGON ((200

At this point, you will need to manually check all polygons with intersections against the polygons in the official RTS data set in your preferred GIS software and save the output to

In [16]:
str(Path('..') / 'python_output' / str(new_data_file).split('.')[0]) + "_overlapping_polygons_edited.geojson"

'../python_output/rts_dataset_test_polygons_new_overlapping_polygons_edited.geojson'

When possible/necessary, try to find imagery that matches the date of the intersecting polygons - this may require contacting the lab that did the original delineation.

Your job is to inspect each of the polygons listed in the 'Intersections' column compared to the new RTS feature and manually copy and paste the UIDs from the 'Intersections' column into the 'RepeatRTS', 'StabilizedRTS', 'MergedRTS', or 'AccidentalOverlap' based on the relationship between the two polygons.

- Paste the UID into the RepeatRTS column when the new RTS feature is the same RTS feature as the RTS feature in the 'Intersections' column, but was delineated at a different point in time, by a different lab at the same point in time, or from different imagery at the same point in time. The RTS feature is the same when it was the result of the same RTS initiation event.

- Paste the UID into the StabilizedRTS column when the RTS feature in the 'Intersections' column is a stabilized RTS scar as of the date of the imagery used in the new RTS delineations.

- Paste the UID into the MergedRTS column when multiple RTS features in the 'Intersections' column merged to form the new RTS feature.

- Paste the UID into the AccidentalOverlap column when inaccuracies in delineation of separate RTS features lead to overlap (e.g. features that are very close to each other and the polygons barely touch). 

When this is done, each of the UIDs in the Intersections column should have been copied into one (and only one) of the 'RepeatRTS', 'StabilizedRTS', 'MergedRTS', or 'AccidentalOverlap' columns.


# Load Manually Edited File and Join to New Data

Add the 'RepeatRTS', 'StabilizedRTS', and 'MergedRTS' columns that you just edited back into `new_data`.

In [17]:
edited_file = Path('..') / 'python_output' / (str(new_data_file).split('.')[0] + "_overlapping_polygons_edited.geojson")

if Path.exists(edited_file):
    overlapping_data = (
        gpd.read_file(edited_file)
        .filter(items = ['UID', 'Intersections', 'RepeatRTS', 'MergedRTS', 'StabilizedRTS', 'AccidentalOverlap'])
        )

    new_data = pd.merge(new_data, 
                        overlapping_data, 
                        how = 'outer',
                        on = ['UID', 'Intersections'])

    new_data.loc[~new_data.RepeatRTS.isnull(), 'UID'] = new_data.RepeatRTS[~new_data.RepeatRTS.isnull()]

else:
    new_data['RepeatRTS'] = ['']*new_data.shape[0]
    new_data['MergedRTS'] = ['']*new_data.shape[0]
    new_data['StabilizedRTS'] = ['']*new_data.shape[0]
    new_data['AccidentalOverlap'] = ['']*new_data.shape[0]
    
    warnings.warn("No manually edited file has been imported. This is okay if there were no overlapping polygons, but is a problem otherwise.")

new_data

Unnamed: 0,CentroidLat,CentroidLon,RegionName,CreatorLab,BaseMapDate,BaseMapSource,BaseMapResolution,TrainClass,LabelType,geometry,BaseMapResolutionStr,seed,UID,Intersections,AdjacentPolys,RepeatRTS,MergedRTS,StabilizedRTS,AccidentalOverlap
0,70.01655,68.33926,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007199.012 865984.608, 2007188.217 ...",4,70.0165568.33926Yamal-GydanRodenhizer2023-05-0...,,"b4bae416-9fde-5d91-920d-731bcf042b2d,10f75ab9-...",,,"b4bae416-9fde-5d91-920d-731bcf042b2d,10f75ab9-...",,
1,70.01543,68.34071,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007289.337 866129.959, 2007283.521 ...",4,70.0154368.34071Yamal-GydanRodenhizer2023-05-0...,edb47fed-2c5d-59f0-9609-dd230ab25a58,,,,,,
2,70.01652,68.33235,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007340.152 865838.514, 2007326.959 ...",4,70.0165268.33235Yamal-GydanRodenhizer2023-05-0...,ff0d265e-385c-53c2-9c3a-28e885a220d2,ff0d265e-385c-53c2-9c3a-28e885a220d2,,ff0d265e-385c-53c2-9c3a-28e885a220d2,,,
3,70.01531,68.33115,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007453.557 865845.775, 2007456.416 ...",4,70.0153168.33115Yamal-GydanRodenhizer2023-05-0...,,297dc622-3584-5d79-8b7b-b4f5a67fa8a4,,,,297dc622-3584-5d79-8b7b-b4f5a67fa8a4,
4,70.01457,68.33342,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007492.587 865949.766, 2007492.342 ...",4,70.0145768.33342Yamal-GydanRodenhizer2023-05-0...,,7c64ad8e-07be-5ba5-8f97-19374f809af1,,,,,7c64ad8e-07be-5ba5-8f97-19374f809af1
5,70.01448,68.33495,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007479.747 865990.850, 2007472.484 ...",4,70.0144868.33495Yamal-GydanRodenhizer2023-05-0...,e36abf57-ffb3-5c6a-be32-d8278f385a73,e36abf57-ffb3-5c6a-be32-d8278f385a73,,e36abf57-ffb3-5c6a-be32-d8278f385a73,,,
6,70.01543,68.34071,Yamal-Gydan,Rodenhizer,"2022-05-01,2022-9-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007289.337 866129.959, 2007283.521 ...",4,70.0154368.34071Yamal-GydanRodenhizer2022-05-0...,aedeff78-0897-5159-aefd-c5a5885475c8,,,,,,


# Check for Intersections within New RTS Data Set

Intersections within the new data set are assumed to be repeat delineations of the same RTS feature. If this is not true (e.g. if you have delineated an old RTS scar and an active RTS feature on top of it), this code will not assign UIDs properly. In this case, please get in touch with us to determine how to proceed.

In [18]:
new_data["ContributionDate"] = datetime.today().strftime('%Y-%m-%d')

intersections = []
for idx in range(0,new_data.shape[0]):
    new_intersections = get_intersecting_uids(new_data.iloc[[idx]], new_data.drop([idx]))
    intersections = intersections + new_intersections
    
new_data['SelfIntersectionIndices'] = intersections

adjacent_polys = []
for idx in range(0,new_data.shape[0]):
    new_adjacent_polys = get_touching_uids(new_data.iloc[[idx]], new_data.drop(idx))
    adjacent_polys = adjacent_polys + new_adjacent_polys
    
new_data['AdjacentPolys'] = adjacent_polys

new_data.Intersections = remove_adjacent_polys(new_data.Intersections, new_data.AdjacentPolys)
new_data.drop('AdjacentPolys', axis=1)


new_data.loc[new_data.SelfIntersectionIndices.str.len() > 0, 'UID'] = (
    new_data[new_data.SelfIntersectionIndices.str.len() > 0]
    .apply(get_earliest_uid, df = new_data, axis = 1)
)

new_data

Unnamed: 0,CentroidLat,CentroidLon,RegionName,CreatorLab,BaseMapDate,BaseMapSource,BaseMapResolution,TrainClass,LabelType,geometry,BaseMapResolutionStr,seed,UID,Intersections,AdjacentPolys,RepeatRTS,MergedRTS,StabilizedRTS,AccidentalOverlap,ContributionDate,SelfIntersectionIndices
0,70.01655,68.33926,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007199.012 865984.608, 2007188.217 ...",4,70.0165568.33926Yamal-GydanRodenhizer2023-05-0...,,"b4bae416-9fde-5d91-920d-731bcf042b2d,10f75ab9-...",,,"b4bae416-9fde-5d91-920d-731bcf042b2d,10f75ab9-...",,,2024-01-18,
1,70.01543,68.34071,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007289.337 866129.959, 2007283.521 ...",4,70.0154368.34071Yamal-GydanRodenhizer2023-05-0...,aedeff78-0897-5159-aefd-c5a5885475c8,,,,,,,2024-01-18,aedeff78-0897-5159-aefd-c5a5885475c8
2,70.01652,68.33235,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007340.152 865838.514, 2007326.959 ...",4,70.0165268.33235Yamal-GydanRodenhizer2023-05-0...,ff0d265e-385c-53c2-9c3a-28e885a220d2,ff0d265e-385c-53c2-9c3a-28e885a220d2,,ff0d265e-385c-53c2-9c3a-28e885a220d2,,,,2024-01-18,
3,70.01531,68.33115,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007453.557 865845.775, 2007456.416 ...",4,70.0153168.33115Yamal-GydanRodenhizer2023-05-0...,,297dc622-3584-5d79-8b7b-b4f5a67fa8a4,,,,297dc622-3584-5d79-8b7b-b4f5a67fa8a4,,2024-01-18,
4,70.01457,68.33342,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007492.587 865949.766, 2007492.342 ...",4,70.0145768.33342Yamal-GydanRodenhizer2023-05-0...,,7c64ad8e-07be-5ba5-8f97-19374f809af1,,,,,7c64ad8e-07be-5ba5-8f97-19374f809af1,2024-01-18,
5,70.01448,68.33495,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007479.747 865990.850, 2007472.484 ...",4,70.0144868.33495Yamal-GydanRodenhizer2023-05-0...,e36abf57-ffb3-5c6a-be32-d8278f385a73,e36abf57-ffb3-5c6a-be32-d8278f385a73,,e36abf57-ffb3-5c6a-be32-d8278f385a73,,,,2024-01-18,
6,70.01543,68.34071,Yamal-Gydan,Rodenhizer,"2022-05-01,2022-9-30",WorldView-2,4.0,Positive,Polygon,"POLYGON ((2007289.337 866129.959, 2007283.521 ...",4,70.0154368.34071Yamal-GydanRodenhizer2022-05-0...,aedeff78-0897-5159-aefd-c5a5885475c8,,,,,,,2024-01-18,edb47fed-2c5d-59f0-9609-dd230ab25a58


# Check Completeness of Intersection Information

In [19]:
check_intersection_info(new_data)

Intersection information is complete.


# Final Column Selection

In [20]:
new_data = add_empty_columns(
    new_data, 
    [col for col in optional_fields if col not in ['StabilizedRTS', 'MergedRTS']]
)

new_data = new_data[all_fields + ['geometry']]

new_data

Unnamed: 0,CentroidLat,CentroidLon,RegionName,CreatorLab,BaseMapDate,BaseMapSource,BaseMapResolution,TrainClass,LabelType,MergedRTS,StabilizedRTS,ContributionDate,UID,BaseMapID,Area,geometry
0,70.01655,68.33926,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,"b4bae416-9fde-5d91-920d-731bcf042b2d,10f75ab9-...",,2024-01-18,,,,"POLYGON ((2007199.012 865984.608, 2007188.217 ..."
1,70.01543,68.34071,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,,,2024-01-18,aedeff78-0897-5159-aefd-c5a5885475c8,,,"POLYGON ((2007289.337 866129.959, 2007283.521 ..."
2,70.01652,68.33235,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,,,2024-01-18,ff0d265e-385c-53c2-9c3a-28e885a220d2,,,"POLYGON ((2007340.152 865838.514, 2007326.959 ..."
3,70.01531,68.33115,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,,297dc622-3584-5d79-8b7b-b4f5a67fa8a4,2024-01-18,,,,"POLYGON ((2007453.557 865845.775, 2007456.416 ..."
4,70.01457,68.33342,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,,,2024-01-18,,,,"POLYGON ((2007492.587 865949.766, 2007492.342 ..."
5,70.01448,68.33495,Yamal-Gydan,Rodenhizer,"2023-05-01,2023-09-30",WorldView-2,4.0,Positive,Polygon,,,2024-01-18,e36abf57-ffb3-5c6a-be32-d8278f385a73,,,"POLYGON ((2007479.747 865990.850, 2007472.484 ..."
6,70.01543,68.34071,Yamal-Gydan,Rodenhizer,"2022-05-01,2022-9-30",WorldView-2,4.0,Positive,Polygon,,,2024-01-18,aedeff78-0897-5159-aefd-c5a5885475c8,,,"POLYGON ((2007289.337 866129.959, 2007283.521 ..."


In [21]:
if separate_file:
    new_data.to_file(Path('..') / 'python_output' / (str(new_data_file).split('.')[0] + "_formatted.geojson"))
else:
    rts_data = add_empty_columns(
        rts_data, 
        [col for col in optional_fields]
        )
    rts_data.ContributionDate = [value.strftime('%Y-%m-%d') for value in rts_data.ContributionDate]
    
    rts_data = rts_data[all_fields + ['geometry']]
    updated_data = pd.concat([rts_data, new_data])
    updated_data.to_file(Path('..') / 'python_output' / rts_file)

  updated_data = pd.concat([rts_data, new_data])


Now you are ready to submit

In [22]:
if separate_file:
    print(str(Path('..') / 'python_output' / (str(new_data_file).split('.')[0] + "_formatted.geojson")))
else:
    print(str(Path('..') / 'python_output' / rts_file))

../python_output/rts_dataset_test_polygons_current.geojson
