### Download a training dataset from tiles and seasons

1. Create a dict of tiles and seasons
2. Check which one are already downloaded
3. Download the missing ones
4. Add progress bar/indicator

In [1]:
import re
from pathlib import Path
import geopandas as gpd
from datetime import date, datetime
from distutils.dir_util import copy_tree

#### 1. Create a dict of tiles and seasons

In [2]:
title_regex = re.compile(r"""^(?P<mission>S2[A-B])_MSI
                        (?P<product_level>L[1-2][A-C])_
                        (?P<sensing_time>\d{8}T\d{6})_
                        (?P<processing_baseline>N\d{4})_
                        (?P<relative_orbit>R\d{3})_T
                        (?P<utm_code>\d{2})
                        (?P<latitude_band>\w{1})
                        (?P<square>\w{2})_
                        (?P<year>\d{4})
                        (?P<month>\d{2})
                        (?P<day>\d{2})T
                        (?P<product_time>\d{6})""",re.VERBOSE)

In [3]:
PATH_IMAGES = Path(r"C:\Users\Fabian\Documents\Masterarbeit_Daten\Training_data_raw")

In [7]:
tiles_germany = gpd.read_file(
    r"../data/tiles_germany.geojson"
)

In [8]:
tiles_germany.columns

Index(['osm_id', 'tile_name', 'centroid_of_tile', 'geometry'], dtype='object')

In [9]:
tile_names = set(tiles_germany.tile_name)

In [10]:
downloaded_tiles = {}
for tile_name in tile_names:
    downloaded_tiles[tile_name] = {"spring": False, "summer": False, "autumn": False, "winter": False}

In [None]:
# season_start = {
#     'winter':'2018-01-01',
#     'spring':'2018-04-01',
#     'summer':'2018-07-01',
#     'autumn':'2018-10-01'
# }

# season_end = {
#     'winter':'2018-03-31',
#     'spring':'2018-06-30',
#     'summer':'2018-09-30',
#     'autumn':'2018-11-30'
# }

In [11]:
downloaded_tiles

{'31UGQ': {'spring': False, 'summer': False, 'autumn': False, 'winter': False},
 '32TMT': {'spring': False, 'summer': False, 'autumn': False, 'winter': False},
 '32UMA': {'spring': False, 'summer': False, 'autumn': False, 'winter': False},
 '33UUQ': {'spring': False, 'summer': False, 'autumn': False, 'winter': False},
 '32ULB': {'spring': False, 'summer': False, 'autumn': False, 'winter': False},
 '32UQC': {'spring': False, 'summer': False, 'autumn': False, 'winter': False},
 '32UQD': {'spring': False, 'summer': False, 'autumn': False, 'winter': False},
 '32UNU': {'spring': False, 'summer': False, 'autumn': False, 'winter': False},
 '32UMV': {'spring': False, 'summer': False, 'autumn': False, 'winter': False},
 '32UQV': {'spring': False, 'summer': False, 'autumn': False, 'winter': False},
 '32UME': {'spring': False, 'summer': False, 'autumn': False, 'winter': False},
 '32UPE': {'spring': False, 'summer': False, 'autumn': False, 'winter': False},
 '33UVT': {'spring': False, 'summer': Fa

In [12]:
seasons_dict = {"winter":{'start_date':date(2018, 1, 1), 'end_date':date(2018, 3, 31)},
                "spring":{'start_date':date(2018, 4, 1), 'end_date':date(2018, 6, 30)},
                "summer":{'start_date':date(2018, 7, 1), 'end_date':date(2018, 9, 30)},
                "autumn":{'start_date':date(2018, 10, 1), 'end_date':date(2018, 11, 30)}}

In [24]:
for _, dates in enumerate(seasons_dict.items()):
    start_date, end_date = dates["start_date"], dates["end_date"]
    print(start_date, "-", end_date)

2018-01-01 - 2018-03-31
2018-04-01 - 2018-06-30
2018-07-01 - 2018-09-30
2018-10-01 - 2018-11-30


In [13]:
seasons_dict.keys()

dict_keys(['winter', 'spring', 'summer', 'autumn'])

In [14]:
list(downloaded_tiles.items())[0:5]

[('31UGQ',
  {'spring': False, 'summer': False, 'autumn': False, 'winter': False}),
 ('32TMT',
  {'spring': False, 'summer': False, 'autumn': False, 'winter': False}),
 ('32UMA',
  {'spring': False, 'summer': False, 'autumn': False, 'winter': False}),
 ('33UUQ',
  {'spring': False, 'summer': False, 'autumn': False, 'winter': False}),
 ('32ULB',
  {'spring': False, 'summer': False, 'autumn': False, 'winter': False})]

In [16]:
to_directory = r"C:\Users\Fabian\Documents\Github_Masterthesis\Solarpark-detection\data_local\training_data_raw"
for folder in PATH_IMAGES.iterdir():
    regex_match = re.match(title_regex, folder.name)
    if regex_match:
            utm_code = regex_match.group("utm_code")
            latitude_band = regex_match.group("latitude_band")
            square = regex_match.group("square")
            year = int(regex_match.group("year"))
            month = int(regex_match.group("month"))
            day = int(regex_match.group("day"))
            
            tile_name = f"{utm_code}{latitude_band}{square}"
            product_date = date(year, month, day)
            if tile_name in downloaded_tiles:
                for season in seasons_dict.keys():
                    start_date = seasons_dict[season]['start_date']
                    end_date = seasons_dict[season]['end_date']
                    if start_date <= product_date <= end_date:
                        downloaded_tiles[tile_name][season] = True
                        copy_tree(folder, f"{to_directory}\{folder.name}")


In [78]:
downloaded_tiles.values()

dict_values([{'spring': False, 'summer': False, 'autumn': False, 'winter': False}, {'spring': False, 'summer': True, 'autumn': False, 'winter': False}, {'spring': True, 'summer': False, 'autumn': False, 'winter': False}, {'spring': False, 'summer': True, 'autumn': False, 'winter': False}, {'spring': False, 'summer': True, 'autumn': False, 'winter': False}, {'spring': False, 'summer': False, 'autumn': False, 'winter': False}, {'spring': False, 'summer': True, 'autumn': False, 'winter': False}, {'spring': False, 'summer': True, 'autumn': False, 'winter': False}, {'spring': False, 'summer': True, 'autumn': False, 'winter': False}, {'spring': False, 'summer': True, 'autumn': False, 'winter': False}, {'spring': True, 'summer': False, 'autumn': False, 'winter': False}, {'spring': False, 'summer': False, 'autumn': False, 'winter': False}, {'spring': False, 'summer': False, 'autumn': False, 'winter': False}, {'spring': False, 'summer': True, 'autumn': False, 'winter': False}, {'spring': False,

In [71]:
seasons_dict['winter']['start_date']

dict_keys(['winter', 'spring', 'summer', 'autumn'])

In [79]:
def count_true_values(d):
    count = 0
    for k, v in d.items():
        if isinstance(v, bool):
            if v:
                count += 1
        elif isinstance(v, dict):
            count += count_true_values(v)
        elif isinstance(v, list):
            for item in v:
                if isinstance(item, bool):
                    if item:
                        count += 1
                elif isinstance(item, dict):
                    count += count_true_values(item)
    return count

In [80]:
count = count_true_values(downloaded_tiles)
print(count)

49
