The TimeMatch dataset contains SITS from Sentinel2 Level-1C products in top-of-atmosphere reflectance. Four Sentinel-2 tiles are chosen in various climates: 33UVP (Austria), 32VNH (Denmark), 30TXT (mid-west France), and 31TCJ (southern France), abbreviated as AT1, DK1, FR1, and FR2, respectively.

The 20m bands are bilinearly interpolated to 10m.

In [1]:
import os
import zarr
import json
import pickle
import numpy as np

## Visualize blocks

In [2]:
# import geopandas
# gdf = geopandas.read_file(r"Exercise4\timematch_data\denmark\32VNH\2017\meta\blocks\blocks_denmark_32VNH_2017.shp")

# gdf = gdf.drop(columns=['id'])

# # print(type(gdf))
# print('gdf Projection: ', gdf.crs)
# print(gdf)

# _ = gdf.to_crs(epsg=4326).plot(
# # gdf.to_crs(epsg=32634).plot(
#     cmap='Set3',
#     edgecolor='black', legend=True, figsize=(8, 8)
#     )

## Traverse data folder and Count zarr files

In [3]:
def traverse_directory(path, level=0):
    zarr_count = 0
    
    if not path.endswith('.zarr'):
        print('\t' * level + os.path.basename(path))
    else:
        zarr_count += 1
        return zarr_count
    
    # Traverse the directory
    with os.scandir(path) as it:
        for entry in it:
            if entry.is_dir():
                # Recursively traverse subdirectories
                zarr_count += traverse_directory(entry.path, level + 1)
            elif entry.is_file():
                if entry.name.endswith('.zarr'):
                    zarr_count += 1
                else:
                    print('\t' * (level + 1) + entry.name)
    return zarr_count

zarr_files_count = traverse_directory('Exercise4')

print(f"Number of '.zarr' files: {zarr_files_count}")

Exercise4
	timematch_data
		denmark
			32VNH
				2017
					data
						.zgroup
					meta
						blocks
							blocks_denmark_32VNH_2017.cpg
							blocks_denmark_32VNH_2017.dbf
							blocks_denmark_32VNH_2017.prj
							blocks_denmark_32VNH_2017.shp
							blocks_denmark_32VNH_2017.shx
						dates.json
						labels.json
						labels_cleaned.json
						metadata.pkl
						parcels
Number of '.zarr' files: 4255


## Read zarr

In [4]:
# zarr_arr = zarr.load(r"C:\Users\yanni\Downloads\gewxwrika3\Exercise4\timematch_data\denmark\32VNH\2017\data\4333.zarr")
zarr_arr = zarr.load(r"Exercise4\timematch_data\denmark\32VNH\2017\data\0.zarr")
print(zarr_arr.shape)
print(np.unique(zarr_arr))
print(len(np.unique(zarr_arr)))

(52, 10, 896)
[   0  244  250 ... 7886 7889 7903]
7107


In [5]:
zarr_arr = zarr.load(r"Exercise4\timematch_data\denmark\32VNH\2017\data\1979.zarr")
print(zarr_arr.shape)
print(np.unique(zarr_arr))
print(len(np.unique(zarr_arr)))

(52, 10, 761)
[    0    99   104 ... 10976 11001 11021]
9748


In [6]:
zarr_arr = zarr.open(r"Exercise4\timematch_data\denmark\32VNH\2017\data\2879.zarr"
                    , mode='r')
print(zarr_arr.info)

Type               : zarr.core.Array
Data type          : uint16
Shape              : (52, 10, 130)
Chunk shape        : (52, 10, 130)
Order              : C
Read-only          : True
Compressor         : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)
Store type         : zarr.storage.DirectoryStore
No. bytes          : 135200 (132.0K)
No. bytes stored   : 53120 (51.9K)
Storage ratio      : 2.5
Chunks initialized : 1/1



In [7]:
zarr_arr = zarr.open(r"Exercise4\timematch_data\denmark\32VNH\2017\data")

print(zarr_arr.info)

print(zarr_arr["0.zarr"])

Name        : /
Type        : zarr.hierarchy.Group
Read-only   : False
Store type  : zarr.storage.DirectoryStore
No. members : 4255
No. arrays  : 4255
No. groups  : 0
Arrays      : 0.zarr, 1.zarr, 10.zarr, 100.zarr, 1000.zarr, 1001.zarr,
            : 1002.zarr, 1003.zarr, 1004.zarr, 1005.zarr, 1006.zarr, 1007.zarr,
            : 1008.zarr, 1009.zarr, 101.zarr, 1010.zarr, 1011.zarr, 1012.zarr,
            : 1013.zarr, 1014.zarr, 1015.zarr, 1016.zarr, 1017.zarr, 1018.zarr,
            : 1019.zarr, 102.zarr, 1020.zarr, 1021.zarr, 1022.zarr, 1023.zarr,
            : 1024.zarr, 1025.zarr, 1026.zarr, 1028.zarr, 1029.zarr, 103.zarr,
            : 1030.zarr, 1031.zarr, 1032.zarr, 1033.zarr, 1034.zarr, 1035.zarr,
            : 1036.zarr, 1037.zarr, 1038.zarr, 1039.zarr, 104.zarr, 1040.zarr,
            : 1042.zarr, 1043.zarr, 1044.zarr, 1045.zarr, 1046.zarr, 1047.zarr,
            : 1048.zarr, 1049.zarr, 105.zarr, 1050.zarr, 1051.zarr, 1052.zarr,
            : 1053.zarr, 1054.zarr, 1055.zarr, 

## Read Metadata for the Original Whole Dataset

In [8]:
# import random
with open(r'Exercise4\timematch_data\denmark\32VNH\2017\meta\metadata.pkl', 'rb') as f:
    metadata_pkl = pickle.load(f)

for key, value in metadata_pkl.items():
    if len(value) > 10:
        print(key, len(value))
    else:
        print(key, value)

print()
print(metadata_pkl['cloudy_pct'][0])
print()
print(metadata_pkl['parcels'][0])
print(metadata_pkl['parcels'][1063])
print(metadata_pkl['parcels'][997])
print(metadata_pkl['parcels'][30234])
print(metadata_pkl['parcels'][-1])

# # get 100 random indices of metadata_pkl['parcels']
# random_indices = random.sample(range(0, len(metadata_pkl['parcels'])), 1000)

# print(type(metadata_pkl['parcels'][30234]['n_pixels'])) # <class 'numpy.int32'>

# get all indices of metadata_pkl['parcels']
random_indices = range(0, len(metadata_pkl['parcels']))

print("\nFind Small Parcels")

for i in random_indices:
    if metadata_pkl['parcels'][i]['n_pixels'] < 32:
        print(i, metadata_pkl['parcels'][i]['n_pixels'])

start_date 20170101
end_date 20171231
dates 52
cloudy_pct 52
parcels 60332

7.68

{'id': '22-0', 'label': 216.0, 'n_pixels': 896, 'block': 13, 'geometric_features': [2841.5190707380984, 89390.00914778022, 0.03178788208915471, 0.28607918263090676]}
{'id': '13-0', 'label': 1.0, 'n_pixels': 221, 'block': 84, 'geometric_features': [899.2983518692513, 22638.624728159, 0.03972407169904898, 0.4384920634920635]}
{'id': '1-0', 'label': 11.0, 'n_pixels': 491, 'block': 85, 'geometric_features': [1076.1948832426972, 49044.169701451785, 0.021943380625951144, 0.4280732345248474]}
{'id': '245-0', 'label': 411.0, 'n_pixels': 414, 'block': 99, 'geometric_features': [1247.3504491896092, 41397.50783522958, 0.030131051708579056, 0.45494505494505494]}
{'id': '50-0E', 'label': 252.0, 'n_pixels': 176, 'block': 99, 'geometric_features': [874.4891528567739, 17686.431536811797, 0.04944406965512794, 0.2962962962962963]}

Find Small Parcels


Κανένα Parcel δεν έχει λιγότερα από 32 pixels

## Dates json and dates metadata have the same information

In [9]:
f = open(r"Exercise4\timematch_data\denmark\32VNH\2017\meta\dates.json")
dates_json = json.load(f)

# f = open("demofile.txt", "w")
print(len(dates_json), len(metadata_pkl['dates']))
for day_dic, day_json in zip(metadata_pkl['dates'], dates_json):
    
    # f.write(str(day_json) + "\n")
    # f.write("a")
    
    print(day_dic, day_json)

52 52
20170113 20170113
20170222 20170222
20170301 20170301
20170311 20170311
20170321 20170321
20170324 20170324
20170403 20170403
20170423 20170423
20170430 20170430
20170503 20170503
20170513 20170513
20170523 20170523
20170602 20170602
20170619 20170619
20170622 20170622
20170702 20170702
20170704 20170704
20170709 20170709
20170712 20170712
20170717 20170717
20170719 20170719
20170806 20170806
20170808 20170808
20170813 20170813
20170816 20170816
20170818 20170818
20170821 20170821
20170823 20170823
20170826 20170826
20170910 20170910
20170912 20170912
20170917 20170917
20170920 20170920
20170925 20170925
20170927 20170927
20171002 20171002
20171012 20171012
20171022 20171022
20171025 20171025
20171027 20171027
20171030 20171030
20171106 20171106
20171114 20171114
20171119 20171119
20171121 20171121
20171126 20171126
20171129 20171129
20171201 20171201
20171209 20171209
20171216 20171216
20171219 20171219
20171229 20171229
