### **For given aoi, run Deforestation model(Unet-Diff) inference**

In [62]:
import os
import geopandas as gp
import numpy as np
import torch

import rasterio
import re
import tempfile
import pyproj
import uuid
import json
import geojson
from pathlib import Path
from os.path import join


from geojson import Feature

import rasterio.mask
from rasterio import Affine
from rasterio.plot import reshape_as_raster
from rasterio.merge import merge
from rasterio.warp import calculate_default_transform, reproject, Resampling

from shapely import wkt
from shapely.geometry import Polygon, box
from shapely.ops import transform


from pathlib import Path
from datetime import datetime, timedelta
from sentinel2download.downloader import Sentinel2Downloader, logger
from sentinel2download.overlap import Sentinel2Overlap


from time_dependent.data_prepare.prepare_tif import prepare_data
from time_dependent.data_prepare.landcover import LandcoverPolygons
from time_dependent.model import load_model


### 0. Setting up parameters
#### Read Input from environment and setup output folders and filenames

In [84]:
REQUEST_ID = os.getenv('REQUEST_ID')
START_DATE = os.getenv('START_DATE')
END_DATE = os.getenv('END_DATE')
# AOI = os.getenv('AOI')
AOI = "POLYGON ((8.5162410750000390 50.0260314940001081, 8.5158796300000859 50.0269889830001944, 8.5184841149999784 50.0275382990001276, 8.5158920279999997 50.0327034010001057, 8.5189819329999636 50.0333328250001728, 8.5189971920000858 50.0346412670000404, 8.5245599749999883 50.0371131900000137, 8.5411300660001075 50.0441131600001086, 8.5482006070000693 50.0465087900000754, 8.5522546770001782 50.0475234980000323, 8.5593004230001384 50.0488510129999966, 8.5548954010001808 50.0550117490001298, 8.5530500420001658 50.0563774100000387, 8.5530567170000609 50.0575828550000210, 8.5530920040000638 50.0601120000001174, 8.5529613500000892 50.0612602240000797, 8.5522604000000229 50.0619773870001836, 8.5529508600001236 50.0666084300001444, 8.5511627190000468 50.0692863470001157, 8.5445032120001656 50.0690193180001302, 8.5433311470000604 50.0696372990001919, 8.5419759760001170 50.0709648140001491, 8.5415248870000369 50.0717391970001700, 8.5414047240001310 50.0759773260000998, 8.5388774880000824 50.0796051030001763, 8.5369453430001840 50.0799713130001010, 8.5309686650000458 50.0811004640001443, 8.5290689460001090 50.0820388800000273, 8.5283260340001448 50.0824050900001225, 8.5277976990001889 50.0826644900000701, 8.5255699170000980 50.0837707520001345, 8.5249490730000730 50.0832366940001634, 8.5241708760000279 50.0824317940001720, 8.5235195159999648 50.0816268930001343, 8.5228462220000551 50.0806732190000616, 8.5224790570001119 50.0800704960001895, 8.5222902310001700 50.0795822150000163, 8.5221729280001455 50.0790290840001830, 8.5220851890000517 50.0784606930001246, 8.5221042640001201 50.0780029310001282, 8.5221948610000595 50.0775108330000762, 8.5223646170001075 50.0767822260000912, 8.5226259240001241 50.0758895870000629, 8.5228281010000728 50.0750732420000872, 8.5231428149999715 50.0742416380000464, 8.5233802790000937 50.0735702510001488, 8.5237302780001869 50.0728912360000322, 8.5240926750000767 50.0722427360001916, 8.5248222350000447 50.0711936949999767, 8.5254230500000858 50.0702018729999736, 8.5259141930001192 50.0694923400001812, 8.5261430740001742 50.0690193180001302, 8.5263071060000470 50.0684356700001558, 8.5263996129999668 50.0679168710001363, 8.5264854420000802 50.0675315860001433, 8.5265235890001350 50.0672798160000525, 8.5265531530000658 50.0669097910001142, 8.5263986580001756 50.0662612910001030, 8.5261058800001024 50.0655517580000833, 8.5256805420000887 50.0650253300000827, 8.5250625610000270 50.0644264220000537, 8.5244979860001422 50.0638771070001667, 8.5236711500000979 50.0632705680001777, 8.5227413170001682 50.0627593990000719, 8.5207157140001755 50.0617065430001844, 8.5172824860001128 50.0642395010001451, 8.5176134110000703 50.0650177000001122, 8.5111980440001389 50.0676155090000634, 8.5102252949999997 50.0694618230000401, 8.5059823990001746 50.0719261170000323, 8.5065879820000987 50.0728302010000448, 8.5013723380001807 50.0754470840001318, 8.5014228830000320 50.0759048460001281, 8.5017356870001208 50.0814285290000498, 8.4990386960001842 50.0820121770001947, 8.4951124200000550 50.0845718390000343, 8.4948902120001435 50.0858535770000799, 8.4902992240000685 50.0849189750001642, 8.4890184390000627 50.0864257810001732, 8.4885625829999753 50.0882530210001846, 8.4820413600001530 50.0909385670001370, 8.4787282940001205 50.0941390999999996, 8.4792652120001435 50.0943565370001807, 8.4785346980000895 50.0963745120000681, 8.4734630580000498 50.0984992990000819, 8.4737300880001385 50.0994606020001356, 8.4726333610000779 50.0999069220001161, 8.4741706860001500 50.1012573240001302, 8.4756526950001216 50.1012840270001334, 8.4780197140001405 50.1031913760000975, 8.4809980380001662 50.1052131660000555, 8.4781503670001825 50.1073341380000556, 8.4759769430001484 50.1087532050001414, 8.4775171280001018 50.1097068790001572, 8.4794740670001261 50.1104164130000527, 8.4810657489999812 50.1098518380001678, 8.4876604080000106 50.1074981680001201, 8.4917078020000645 50.1064376830001947, 8.4920988080000939 50.1071205130000408, 8.4924507150001887 50.1077270510001540, 8.4940099710000254 50.1069259640000269, 8.4960842130000742 50.1058731070000363, 8.4971437450001872 50.1069450370000595, 8.4979095460001872 50.1077079780000645, 8.4988098150001861 50.1072654720000514, 8.4997673030001692 50.1067962650000140, 8.5021352760000468 50.1077270510001540, 8.5014886860000161 50.1081314080000766, 8.4998502740000390 50.1091461180000124, 8.5041379930001426 50.1130104070001039, 8.5067529680000575 50.1153678900000159, 8.5070905680000806 50.1156806940001616, 8.5080881130000989 50.1165809640000361, 8.5089378350000970 50.1166763310000007, 8.5092430120001836 50.1167526240000143, 8.5094623570001318 50.1168479920000891, 8.5097637180001016 50.1170120240001893, 8.5101194380000607 50.1171989450000979, 8.5103406910001809 50.1172752390000369, 8.5110616690001279 50.1174316410001666, 8.5119266500000208 50.1175003050001919, 8.5125160220001135 50.1174774180001350, 8.5127811430000975 50.1191635130001032, 8.5116395940000871 50.1193313600000465, 8.5136594760000435 50.1218948380000597, 8.5169401170001606 50.1211242669999706, 8.5187692630001379 50.1212310790001538, 8.5206651690001536 50.1200904860001515, 8.5219001770000204 50.1203384400000687, 8.5254011160000687 50.1176261900001805, 8.5305414200001337 50.1158714300001407, 8.5391530990000319 50.1195182810001825, 8.5415468210000540 50.1205139160000499, 8.5528583530000333 50.1252632130001530, 8.5572919840001305 50.1270828260001622, 8.5590391150001892 50.1281738290001044, 8.5624294290001899 50.1298332220001726, 8.5653619770001796 50.1310348510000381, 8.5702009209999801 50.1321983330001899, 8.5729742060000262 50.1325340270000765, 8.5770635600001697 50.1338806150001233, 8.5789041510001312 50.1347732539999811, 8.5901174540000511 50.1403732310001828, 8.5919513700001175 50.1421318060000658, 8.5949792870000579 50.1469154350000395, 8.5952692030001003 50.1473808290001557, 8.6007671360001723 50.1572914120001201, 8.5969696050000834 50.1580085750000535, 8.5910387050001873 50.1601982130000010, 8.5902387660001409 50.1595460590000357, 8.5893535609999994 50.1644515990000741, 8.5890342730000384 50.1665688680001836, 8.5887789990000556 50.1682589999999990, 8.5887277710000944 50.1683432340000763, 8.5886747560000458 50.1684302580001713, 8.5919713980001688 50.1672973630001593, 8.5943527220000000 50.1700820930001328, 8.5967216490001874 50.1732063300000846, 8.6022138590000168 50.1714973450000343, 8.6046915050000621 50.1739959720000570, 8.6080217360001825 50.1755371090001177, 8.6087093350001282 50.1758613590001232, 8.6079788210000743 50.1765975960000219, 8.6070165630000588 50.1771774290001531, 8.6058588020000570 50.1780929560001141, 8.6048555370001054 50.1789932259999887, 8.6044206620001091 50.1797065740000789, 8.6038646710000002 50.1809768690001192, 8.6052684790001877 50.1816101079999726, 8.6083106990001852 50.1826591500001200, 8.6107254020000710 50.1832656860000270, 8.6135034550001137 50.1838874830000350, 8.6153697970000280 50.1842002870001807, 8.6166019450001841 50.1856231690001096, 8.6180667890000677 50.1868705750001141, 8.6199598320001201 50.1882514949999745, 8.6229162210000254 50.1900558470000533, 8.6325492849999819 50.1956672670000330, 8.6358165730001701 50.1974296569999865, 8.6379241940000000 50.2012367260001611, 8.6402397149999999 50.2008323669999754, 8.6422195430000670 50.2018280030001165, 8.6466197969999712 50.2040405270000747, 8.6507663730000672 50.2069969180001294, 8.6524562830001059 50.2082481390000339, 8.6608133320000888 50.2086448680000785, 8.6624965660001294 50.2100753780001128, 8.6642026910001277 50.2120094300000801, 8.6689043040000229 50.2169914240001845, 8.6711235050000823 50.2161788940001088, 8.6724586480001449 50.2172012340001288, 8.6799049390000960 50.2136116030000608, 8.6817693710000299 50.2125511160001565, 8.6854143150001732 50.2151489250000509, 8.6868810660001259 50.2138290410001105, 8.6903772350000850 50.2155761730001018, 8.6917877210000825 50.2149963380001623, 8.6939907070001254 50.2167968750001705, 8.6954307560001780 50.2161979670000278, 8.7003850940001257 50.2179565440001170, 8.7014570240001490 50.2170677180001235, 8.7055616390000523 50.2190399170001456, 8.7056531900000778 50.2195892330001357, 8.7108812330001797 50.2222709660000532, 8.7103643410000586 50.2229576110000835, 8.7096939100001123 50.2262229920000323, 8.7113761910001131 50.2262573230000839, 8.7116260529999749 50.2248497020000855, 8.7128248220001296 50.2250366210001857, 8.7158079140000382 50.2259140020001951, 8.7165384280000922 50.2244110120001892, 8.7193241110001054 50.2253112790001524, 8.7215595240000425 50.2214660640000830, 8.7236967090000803 50.2213287350001565, 8.7239961620000486 50.2200737010000466, 8.7276105870000720 50.2190933230000951, 8.7310705190001840 50.2177238460000694, 8.7322654719999999 50.2157821650000642, 8.7334718710000629 50.2153854360001901, 8.7332916259999820 50.2123336790000394, 8.7313737860001197 50.2118797300001916, 8.7318134309999778 50.2065048220001131, 8.7305831910001643 50.2052879330000792, 8.7307300560001408 50.2035980240001436, 8.7290792459999693 50.2023773190001066, 8.7196912760000487 50.1988563539999859, 8.7183628080000517 50.1961631780001198, 8.7140235900000107 50.1944999700001517, 8.7126779550001743 50.1948661800000764, 8.7090625750001891 50.1933441160001053, 8.7058610920000206 50.1919899000001237, 8.7055854800001384 50.1918601990001321, 8.7060585020000190 50.1915855410001654, 8.7078227999999740 50.1905441290001590, 8.7111635210001168 50.1885871880001559, 8.7093544010001551 50.1860122680000700, 8.7077074040000184 50.1836204530001737, 8.7044916160000412 50.1844978340001830, 8.7041502000000719 50.1839065560001245, 8.7029752730001064 50.1819877640001550, 8.7022123340001372 50.1806907660001116, 8.7027635580000720 50.1805648800001904, 8.7040405270001315 50.1801528930001268, 8.7056341169999882 50.1795310980000977, 8.7063140870001234 50.1793327340000133, 8.7067823410001211 50.1791954050001436, 8.7071619040000314 50.1791648870000699, 8.7077789300000745 50.1791572560001100, 8.7084646220001787 50.1791687020001405, 8.7092170720000581 50.1759758000001170, 8.7122678750001796 50.1723785410001142, 8.7143115990001547 50.1713294990001373, 8.7146158230000879 50.1703186040001015, 8.7164354330001856 50.1696281440001712, 8.7177486420000605 50.1689910900000200, 8.7190752020000559 50.1676254280001785, 8.7202215200000008 50.1662216200000444, 8.7227849960000299 50.1636161800001332, 8.7289409640001168 50.1631164550000790, 8.7326421729999879 50.1619224560001271, 8.7358350750000113 50.1628608700001450, 8.7375888820001251 50.1633758540001509, 8.7386837010001841 50.1625823970001079, 8.7436532980000834 50.1645774850001658, 8.7476511009999740 50.1667060850000439, 8.7496986390001439 50.1678009040001598, 8.7511358270000414 50.1691017150000675, 8.7572250370000688 50.1715431220001733, 8.7579946510001605 50.1719322200000306, 8.7602863310000316 50.1724891660001049, 8.7629966730001456 50.1727104189999977, 8.7629947670001229 50.1731529250000676, 8.7630281440001454 50.1791076660000499, 8.7636432650001552 50.1791534430000183, 8.7786836630000948 50.1802291870000658, 8.7776451110001403 50.1783409130001132, 8.7791023250001103 50.1783638010000459, 8.7792148600000814 50.1771850590000668, 8.7825393680001866 50.1771011350000435, 8.7890062340000004 50.1761512750001657, 8.7898654940000824 50.1760482780000530, 8.7938861850001331 50.1729774470001075, 8.7942895890000727 50.1726684570000998, 8.7963514330001544 50.1745529180001313, 8.7973613730000579 50.1741371160000540, 8.7995100010000442 50.1732330330000877, 8.8002901080001834 50.1714248660001658, 8.8001346580001041 50.1695060740000258, 8.7979240419999769 50.1690139780001800, 8.7976980210000306 50.1675148010000385, 8.7949676510000927 50.1670875550001938, 8.7949466699999999 50.1668815610001957, 8.7950000770001111 50.1642379770001412, 8.7945747370000618 50.1636085499999993, 8.7941761010001187 50.1630096439999988, 8.7906360630001359 50.1636581420000311, 8.7898969650000822 50.1623954780001782, 8.7895545970001763 50.1617889400001218, 8.7900228490000245 50.1616325380001058, 8.7911291130000677 50.1612548820001507, 8.7903470990000301 50.1588745110000787, 8.7841272350000281 50.1584777840000697, 8.7835683820000554 50.1584396360000824, 8.7838964470000747 50.1567459110000300, 8.7839288710001142 50.1558456410001554, 8.7825088500001129 50.1556091300001867, 8.7824974060000613 50.1541557319999995, 8.7824859610000772 50.1527328500000635, 8.7824687950001135 50.1505317700000433, 8.7812910070001635 50.1432685850001008, 8.7817478190001452 50.1426277169999821, 8.7832889560000353 50.1414718630001062, 8.7806463240000312 50.1367340080000758, 8.7791023250001103 50.1364517220001176, 8.7779769900001270 50.1362190240000700, 8.7773113260001310 50.1360359200000403, 8.7766323089999787 50.1358299260000422, 8.7762308129999838 50.1357040400001210, 8.7755270000001246 50.1354255670000839, 8.7749338150001677 50.1351814280001804, 8.7737092970000958 50.1346054080001409, 8.7729406359999871 50.1341705330001446, 8.7726383220000343 50.1340141310001854, 8.7724323270001037 50.1339111329999696, 8.7720890040000654 50.1337165840001262, 8.7717351920001079 50.1334419260001596, 8.7713546740000652 50.1331367500001193, 8.7709350590000668 50.1327285780001262, 8.7705354680001051 50.1321640020001382, 8.7702589030000695 50.1316986100001714, 8.7698497760000578 50.1308021550001399, 8.7696142200001077 50.1299400330000253, 8.7697000490000008 50.1287994389999767, 8.7698755270001243 50.1276588450000986, 8.7707338330001221 50.1260719310000695, 8.7758522030001131 50.1210060110001336, 8.7796602260001464 50.1177215569999817, 8.7804584510000154 50.1165618900000709, 8.7810812000001306 50.1153144840000664, 8.7812633509999998 50.1139717110000902, 8.7811641689999647 50.1130294800001934, 8.7806940069999655 50.1119575500001133, 8.7802085870001179 50.1114387510000938, 8.7796792990000654 50.1109809890000406, 8.7790908820001050 50.1106224070000508, 8.7770290380000802 50.1096420290000992, 8.7762470239999857 50.1092681890000904, 8.7750072480001222 50.1087493890001383, 8.7730941780000649 50.1084136960001842, 8.7716932299999826 50.1082496640001409, 8.7700996400001259 50.1082954410001093, 8.7684259420000217 50.1084861750000528, 8.7661628730000984 50.1090164190000564, 8.7645769120000523 50.1094512950001558, 8.7633409510000320 50.1098365780001700, 8.7619514460000687 50.1103858960001389, 8.7605123510001022 50.1112442009999768, 8.7565603260000557 50.1139945980000903, 8.7552404410001827 50.1146430980001014, 8.7538166050001109 50.1150741580000840, 8.7520275110000512 50.1154022210001244, 8.7502069479999705 50.1155662540001003, 8.7484674440001413 50.1155395500000509, 8.7466955190001272 50.1153907780001759, 8.7448177330000476 50.1151008620001335, 8.7432413090001546 50.1146736150000720, 8.7413387300000522 50.1140480049999724, 8.7396526340001515 50.1132965100000547, 8.7385807030000251 50.1127662650001753, 8.7369279860001257 50.1120300300000849, 8.7351217260001022 50.1113014219999968, 8.7262821200001781 50.1085548400001244, 8.7270555510001486 50.1080780030000597, 8.7285413730001551 50.1069145210000215, 8.7310886380001307 50.1051063540000996, 8.7323637020001570 50.1051483159999975, 8.7337064750001332 50.1051940930001365, 8.7371740350001801 50.1043128970000566, 8.7405624390000298 50.1040763850000417, 8.7419023520001247 50.1039619449999805, 8.7422056210000960 50.1029663080001342, 8.7424297340000976 50.1029548650001857, 8.7427253730000984 50.1029129030000604, 8.7430925370001091 50.1028213490001235, 8.7434196480000992 50.1026802070001622, 8.7438917150001316 50.1023902889999704, 8.7446584709999797 50.1017379760000949, 8.7448472969999784 50.1015319830000294, 8.7452659609999728 50.1011886600001048, 8.7455244060001291 50.1010017390000257, 8.7457103730001791 50.1009063720000540, 8.7459526060000599 50.1007766730000412, 8.7462654110000813 50.1006469720000496, 8.7472572330000844 50.1003189090001797, 8.7477540960000510 50.1001205450001521, 8.7481241230000251 50.0999526970001057, 8.7484846120001407 50.0996704110000337, 8.7486000060000606 50.0995750420000263, 8.7489318859999798 50.0993003840000597, 8.7492399210000258 50.0989074710001319, 8.7485446930000421 50.0980529780000552, 8.7498188020000498 50.0961265560001721, 8.7488641730001859 50.0943298340001775, 8.7462759020001499 50.0935821540001029, 8.7431755060001137 50.0925254820000987, 8.7416191110000341 50.0895729060000576, 8.7404947280000442 50.0891532910000592, 8.7380228050000142 50.0879096980000895, 8.7345638280000912 50.0861625680001339, 8.7312211990000002 50.0812873850001097, 8.7326164240000708 50.0806159980000416, 8.7292633070000534 50.0756797800000300, 8.7284698490000778 50.0748367300000723, 8.7277259820000950 50.0741615290000368, 8.7248497000001635 50.0740280150000103, 8.7243366240001592 50.0705604550001908, 8.7241163260000576 50.0697326660001636, 8.7237968449999812 50.0685653690001118, 8.7216014850000647 50.0674514770001338, 8.7198324200001593 50.0665245060001212, 8.7183628080000517 50.0650711050001860, 8.7179336560000706 50.0631103510001481, 8.7178344740000284 50.0624046330000283, 8.7169532770000728 50.0620498660000521, 8.7058277140001223 50.0610771180000711, 8.6997241980001263 50.0592231760001596, 8.6950902930001348 50.0590324400001805, 8.6864519110000007 50.0590858470000626, 8.6823749550000571 50.0588684090001834, 8.6808309549999763 50.0586814880001043, 8.6792325970000661 50.0584220890000324, 8.6770439150001835 50.0579109200000971, 8.6749715800000331 50.0572738650000701, 8.6732082370000967 50.0569572450001488, 8.6714382170001727 50.0568122860001381, 8.6700181959999991 50.0567665109999780, 8.6642618180001705 50.0555076600000461, 8.6625394829999891 50.0555801400001315, 8.6537008289999999 50.0556907660000547, 8.6500825890001352 50.0542106639999815, 8.6420631410001079 50.0533790600001112, 8.6323070530000336 50.0544242860000850, 8.6239223480000646 50.0544700630000534, 8.6230058680000639 50.0535354620000703, 8.6193552020001789 50.0507392890000915, 8.6156177520000004 50.0487480170000367, 8.6130447380000987 50.0466918940001619, 8.6081132900001194 50.0434875490001332, 8.6054935450001722 50.0394668580000825, 8.5972404480000364 50.0424423220000563, 8.5952272409999750 50.0389976510001020, 8.5920438770000374 50.0316467290001015, 8.5918512350000924 50.0298233030000574, 8.5916509629999993 50.0296134960001950, 8.5915508260001729 50.0293159490001358, 8.5913734430001796 50.0288620010000500, 8.5911407470001109 50.0276985170000899, 8.5909900670000638 50.0265960700000960, 8.5909061430000406 50.0258941650001816, 8.5908908850000216 50.0242080680000001, 8.5909137720000217 50.0236358640001413, 8.5909280770000578 50.0232849120000012, 8.5909080510000422 50.0230865480001512, 8.5913209910000887 50.0176925650001749, 8.5810146330001658 50.0172920240001417, 8.5761814110001069 50.0155754080000179, 8.5736999520000268 50.0202674860001366, 8.5725154870000324 50.0219192510000994, 8.5714149479999833 50.0232849120000012, 8.5696287160001816 50.0255470280001759, 8.5644273750001503 50.0264930720001075, 8.5619544989999667 50.0256652840001834, 8.5525121690001242 50.0249557500000606, 8.5486507410000172 50.0247459410000488, 8.5410814290000872 50.0243377689999988, 8.5377140040000654 50.0232315070001619, 8.5318202970001380 50.0212974540000914, 8.5188446040000940 50.0205688470001064, 8.5162410750000390 50.0260314940001081))" #os.getenv('AOI')
SENTINEL2_GOOGLE_API_KEY = os.getenv('SENTINEL2_GOOGLE_API_KEY')
SATELLITE_CACHE_FOLDER = os.getenv('SENTINEL2_CACHE')
OUTPUT_FOLDER = os.getenv('OUTPUT_FOLDER')


LOAD_DIR = SATELLITE_CACHE_FOLDER
PRODUCT_TYPE = 'L1C'
BANDS = {'TCI','B01','B02','B04','B05','B08','B8A','B09','B10','B11','B12'}
CONSTRAINTS = {'NODATA_PIXEL_PERCENTAGE': 0.1, 'CLOUDY_PIXEL_PERCENTAGE': 15.0, }
CLOUDS_PROBABILITY_THRESHOLD = 1
REMOVE_OTHER_DATES = True


CODE_FOLDER = Path('/code')  #Path(os.getenv('CODE_FOLDER'))

MODEL_PATH = CODE_FOLDER/'models'/'unet_diff.pth'
PREPARED_DATA_FOLDER = '/output/prepared'
CLOUD_DATA_FOLDER = CODE_FOLDER/'data'/'clouds'


MAX_SHIFT_ITERS = 2
MAX_SHIFT = 30

In [85]:
START_DATE, END_DATE = ('2020-07-01', '2020-09-25')

### 1. Transform AOI got GeoJSON file

In [86]:
aoi = gp.GeoDataFrame(geometry=[wkt.loads(AOI)], crs="epsg:4326")
aoi_filename = "provided_aoi.geojson"
aoi.to_file(aoi_filename, driver="GeoJSON") 

In [87]:
s2overlap = Sentinel2Overlap(aoi_path=aoi_filename)
overlap_tiles = s2overlap.overlap_with_geometry()
landcover_tiles = set(overlap_tiles.Name.apply(lambda x:x[:3]).to_list())

In [88]:
def shift_date(date, delta=5, format='%Y-%m-%d'):
    date = datetime.strptime(date, format)
    date = date - timedelta(days=delta)    
    return datetime.strftime(date, format)

def diff_date(date_a, date_b, format='%Y-%m-%d'):
    date_a, date_b =  datetime.strptime(date_a, format), datetime.strptime(date_b, format)
    delta = date_a - date_b
    return delta

In [89]:
start_timestamp, end_timestamp = datetime.strptime(START_DATE, '%Y-%m-%d'), datetime.strptime(END_DATE,'%Y-%m-%d')
year = ((end_timestamp - start_timestamp) / 2 + start_timestamp).year
year

2020

In [90]:
def load_images(tiles, start_date, end_date):
    loader = Sentinel2Downloader(SENTINEL2_GOOGLE_API_KEY)
    loadings = dict()
        
    for tile in tiles:
        start = start_date
        end = end_date
        
        print(f"Loading images for tile: {tile}...")
        count = 0
        while count < MAX_SHIFT_ITERS:
            loaded = loader.download(PRODUCT_TYPE,
                                [tile],
                                start_date=start,
                                end_date=end,
                                output_dir=LOAD_DIR,                       
                                bands=BANDS,
                                constraints=CONSTRAINTS)
        
            if not loaded:
                end = start_date
                start = shift_date(start_date, delta=MAX_SHIFT) 
                print(f"For tile: {tile} and dates {start_date} {end_date} proper images not found! Shift dates to {start} {end}!")
            else:
                break
            count += 1
        if loaded:
            loadings[tile] = loaded
            print(f"Loading images for tile {tile} finished")
        else:
            print(f"Images for tile {tile} were not loaded!")
        
    # tile_folders = dict()
    # for tile, tile_paths in loadings.items():
    #    tile_folders[tile] = {str(Path(tile_path[0]).parent) for tile_path in tile_paths}
    return loadings

In [91]:
loadings_start_date = load_images(overlap_tiles.Name.values, shift_date(START_DATE, delta=10), START_DATE)
loadings_end_date = load_images(overlap_tiles.Name.values, shift_date(END_DATE, delta=10), END_DATE)

Loading images for tile: 32UMA...
Loading images for tile 32UMA finished
Loading images for tile: 32UMA...
Loading images for tile 32UMA finished


In [92]:
def filter_by_date(loadings, func=max, filtered=dict(), tag='start'):
    def _find_agg_date(folders, func=func):        
        dates = list()
        for i, folder in enumerate(folders):  
            search = re.search(r"_(\d+)T\d+_", str(folder))
            date = search.group(1)
            date = datetime.strptime(date, '%Y%m%d')
            dates.append(date)    
        last_date = func(dates)
        last_date = datetime.strftime(last_date, '%Y%m%d')
        return last_date
    
    def _get_folder(files):
        return os.path.join('/',*files[0][0].split('/')[:-1])
    
    for tile, items in loadings.items():
        try:
            
            last_date = _find_agg_date(items)
            bands_paths = dict()
            for path, _ in items:
                if PRODUCT_TYPE == 'L2A':
                    if last_date in path:
                        if 'B8A_20m.jp2' in path:
                            bands_paths['B8A'] = path
                        if 'B11_20m.jp2' in path:
                            bands_paths['B11'] = path
                        if 'B04_10m.jp2' in path:
                            bands_paths['B04'] = path
                        if 'B08_10m.jp2' in path:
                            bands_paths['B08'] = path
                        if 'B12_20m.jp2' in path:
                            bands_paths['B12'] = path
                        if 'TCI_10m.jp2' in path:
                            bands_paths['TCI'] = path
                        folder = _get_folder(items)
                elif PRODUCT_TYPE == 'L1C':
                    if last_date in path:
                        if 'B8A.jp2' in path:
                            bands_paths['B8A'] = path
                        if 'B11.jp2' in path:
                            bands_paths['B11'] = path
                        if 'B04.jp2' in path:
                            bands_paths['B04'] = path
                        if 'B08.jp2' in path:
                            bands_paths['B08'] = path
                        if 'B12.jp2' in path:
                            bands_paths['B12'] = path
                        if 'TCI.jp2' in path:
                            bands_paths['TCI'] = path
                        folder_path = _get_folder(items)
                        
            info_dict = { 
                        tag : dict(paths=bands_paths, 
                                      date=last_date,
                                      folder=folder_path)
                        }
            
            if tile in filtered.keys():
                filtered[tile].update(info_dict)
            else:
                filtered.update({tile:info_dict})
 


        except Exception as ex:
            print(f"Error for {tile}: {str(ex)}")
    return filtered

In [93]:
filtered = filter_by_date(loadings_start_date, func=max, tag='start')
filtered = filter_by_date(loadings_end_date, func=max, filtered=filtered, tag='end')

In [94]:
def get_tile_and_images_folders(fitered, idx=0):
    tile = sorted(list(filtered.keys()))[0]
    return tile, filtered[tile]['start']['folder'], filtered[tile]['end']['folder']

tile, start_date_folder, end_date_folder = get_tile_and_images_folders(filtered)
start_date_folder, end_date_folder

('/input/SENTINEL2_CACHE/S2A_MSIL1C_20200626T104031_N0209_R008_T32UMA_20200626T125124',
 '/input/SENTINEL2_CACHE/S2A_MSIL1C_20200921T103031_N0209_R108_T32UMA_20200921T142406')

In [95]:
import shutil

def remove_not_used_dates(start_date_folder, end_date_folder, cache_dir=SATELLITE_CACHE_FOLDER):
    start_split, end_split = start_date_folder.split('/'), end_date_folder.split('/')
    if cache_dir != os.path.join('/',*start_split[:-1]) or cache_dir != os.path.join('/',*end_split[:-1]):
        raise ValueError('cache_dir is not valid')
    used_dates = start_split[-1], end_split[-1]
    for folder in os.listdir(cache_dir):
        if folder not in used_dates:
            shutil.rmtree(os.path.join(cache_dir,folder))

# if REMOVE_OTHER_DATES:            
#     remove_not_used_dates(start_date_folder, end_date_folder)

### Prepare images (calculating ndmi ndvi, scaling, merging to tiff)

In [96]:
from time_dependent.data_prepare.prepare_tif import search_band, to_tiff, merge, scale_img, get_ndvi, get_ndmi




def prepare_data(data_folder, save_path):
    img_folder = data_folder
    
    tmp_file = data_folder.split('/')[-1]
    
    os.makedirs(save_path, exist_ok=True)
    save_file_merged = join(save_path, f'all_merged_{tmp_file}.tif')

    
    bands, band_names =['TCI','B08','B8A','B11','B12'], []

    for band in bands:
        band_names.append(join(img_folder, search_band(band, img_folder, 'jp2')))
    
    b4_name = join(img_folder, search_band('B04', img_folder, 'jp2'))
    ndvi_name = join(img_folder, 'ndvi')
    ndmi_name = join(img_folder, 'ndmi')
    print('\nall bands are converting to *tif...\n')
    
    for band_name in band_names:
        print(band_name[-3:])
        if 'B08' in band_name: b8_name=band_name
        if 'B8A' in band_name: b8a_name=band_name
        if 'B11' in band_name: b11_name=band_name
        to_tiff(f'{band_name}.jp2')

    to_tiff(f'{b4_name}.jp2')
    print('\nndvi band is processing...')    

    get_ndvi(f'{b4_name}.tif', f'{b8_name}.tif', f'{ndvi_name}.tif')
    
    print('\nndmi band is processing...')    

    get_ndmi(f'{b11_name}.tif', f'{b8a_name}.tif', f'{ndmi_name}.tif')

    band_names.append(ndvi_name)
    band_names.append(ndmi_name)

    bands.append('ndvi')
    bands.append('ndmi')

    print('\nall bands are scaling to 8-bit images...\n')
    band_names_scaled = []
    for band_name in band_names:
        print(band_name)
        scaled_name = scale_img(f'{band_name}.tif')
        band_names_scaled.append(scaled_name)

    print('\nall bands are being merged...\n')
    print(band_names_scaled)

    merge(save_file_merged, *band_names_scaled)
    
    for item in os.listdir(img_folder):
        if item.endswith('.tif'):
            os.remove(join(img_folder, item))
    return save_file_merged


In [97]:
os.makedirs(PREPARED_DATA_FOLDER, exist_ok=True)

start_date_merged_path = prepare_data(start_date_folder, PREPARED_DATA_FOLDER)
end_date_merged_path = prepare_data(end_date_folder, PREPARED_DATA_FOLDER)


all bands are converting to *tif...

TCI
Input file size is 10980, 10980
0...10...20...30...40...50...60...70...80...90...100 - done.
B08
Input file size is 10980, 10980
0...10...20...30...40...50...60...70...80...90...100 - done.
B8A
Input file size is 5490, 5490
0...10...20...30...40...50...60...70...80...90...100 - done.
B11
Input file size is 5490, 5490
0...10...20...30...40...50...60...70...80...90...100 - done.
B12
Input file size is 5490, 5490
0...10...20...30...40...50...60...70...80...90...100 - done.
Input file size is 10980, 10980
0...10...20...30...40...50...60...70...80...90...100 - done.

ndvi band is processing...
0 .. 10 .. 20 .. 30 .. 40 .. 50 .. 60 .. 70 .. 80 .. 90 .. 100 - Done

ndmi band is processing...
0 .. 10 .. 20 .. 30 .. 40 .. 50 .. 60 .. 70 .. 80 .. 90 .. 100 - Done

all bands are scaling to 8-bit images...

/input/SENTINEL2_CACHE/S2A_MSIL1C_20200626T104031_N0209_R008_T32UMA_20200626T125124/T32UMA_20200626T104031_TCI
Input file size is 10980, 10980
0...10..


Processing file     1 of     7,  0.000% completed in 0 minutes.
Filename: /input/SENTINEL2_CACHE/S2A_MSIL1C_20200921T103031_N0209_R108_T32UMA_20200921T142406/T32UMA_20200921T103031_TCI_scaled.tif
File Size: 10980x10980x3
Pixel Size: 10.000000 x -10.000000
UL:(399960.000000,5600040.000000)   LR:(509760.000000,5490240.000000)
Copy 0,0,10980,10980 to 0,0,10980,10980.
Copy 0,0,10980,10980 to 0,0,10980,10980.
Copy 0,0,10980,10980 to 0,0,10980,10980.

Processing file     2 of     7, 14.286% completed in 0 minutes.
Filename: /input/SENTINEL2_CACHE/S2A_MSIL1C_20200921T103031_N0209_R108_T32UMA_20200921T142406/T32UMA_20200921T103031_B08_scaled.tif
File Size: 10980x10980x1
Pixel Size: 10.000000 x -10.000000
UL:(399960.000000,5600040.000000)   LR:(509760.000000,5490240.000000)
Copy 0,0,10980,10980 to 0,0,10980,10980.

Processing file     3 of     7, 28.571% completed in 0 minutes.
Filename: /input/SENTINEL2_CACHE/S2A_MSIL1C_20200921T103031_N0209_R108_T32UMA_20200921T142406/T32UMA_20200921T103031_

In [98]:
from time_dependent.data_prepare.prepare_clouds import search_band, to_tiff, merge, detect_clouds


def prepare_clouds(data_folder, save_path):    
        img_folder = data_folder
        tile_folder=data_folder.split('/')[-1]
        print(tile_folder)
        bands, band_names =['B01','B02','B04','B05','B08','B8A','B09','B10','B11','B12'], []

        for band in bands:
            band_names.append(join(img_folder, search_band(band, img_folder, 'jp2')))

        print('\nall bands are converting to *tif...\n')
    
        for band_name in band_names:
            print(band_name[-3:])
            to_tiff(f'{band_name}.jp2')

        print('\n all bands are being merged...\n')
    
        save_file_merged = join(save_path, f'{tile_folder}_full_merged.tif')
        merge(save_file_merged, *band_names)
    
        save_file_clouds = join(save_path, f'{tile_folder}_clouds.tiff')
        detect_clouds(save_file_merged, save_file_clouds)
        os.remove(save_file_merged)
    
        for item in os.listdir(img_folder):
            if item.endswith('.tif'):
                os.remove(join(img_folder, item))

        #os.system(f'rm {join(granule_folder, tile_folder, 'IMG_DATA')}*.jp2')
        print('\ntemp files have been deleted\n')

In [99]:
os.makedirs(CLOUD_DATA_FOLDER, exist_ok=True)

prepare_clouds(start_date_folder,CLOUD_DATA_FOLDER)
prepare_clouds(end_date_folder,  CLOUD_DATA_FOLDER)

S2A_MSIL1C_20200626T104031_N0209_R008_T32UMA_20200626T125124

all bands are converting to *tif...

B01
Input file size is 1830, 1830
0...10...20...30...40...50...60...70...80...90...100 - done.
B02
Input file size is 10980, 10980
0...10...20...30...40...50...60...70...80...90...100 - done.
B04
Input file size is 10980, 10980
0...10...20...30...40...50...60...70...80...90...100 - done.
B05
Input file size is 5490, 5490
0...10...20...30...40...50...60...70...80...90...100 - done.
B08
Input file size is 10980, 10980
0...10...20...30...40...50...60...70...80...90...100 - done.
B8A
Input file size is 5490, 5490
0...10...20...30...40...50...60...70...80...90...100 - done.
B09
Input file size is 1830, 1830
0...10...20...30...40...50...60...70...80...90...100 - done.
B10
Input file size is 1830, 1830
0...10...20...30...40...50...60...70...80...90...100 - done.
B11
Input file size is 5490, 5490
0...10...20...30...40...50...60...70...80...90...100 - done.
B12
Input file size is 5490, 5490
0...10

predict.
resize.
save cloud.

temp files have been deleted



In [100]:
import os
import re
import cv2
import torch
import logging
import imageio
import rasterio
import argparse
import geopandas
import numpy as np
import segmentation_models_pytorch as smp


from geopandas import GeoSeries
from scipy import spatial
from shapely.geometry import Polygon
from shapely.ops import unary_union
from torchvision import transforms
from torch import nn
from tqdm import tqdm
from rasterio.windows import Window
from rasterio.plot import reshape_as_image
from skimage.exposure import match_histograms


CLOUDS_PROBABILITY_THRESHOLD = 15
NEAREST_POLYGONS_NUMBER = 10
DATES_FOR_TILE = 2

import warnings
warnings.filterwarnings('ignore')

os.environ.get('CUDA_VISIBLE_DEVICES', '0')

logging.basicConfig(format='%(asctime)s %(message)s')


def predict(image_tensor, model, channels, neighbours, size, device):
    image_shape = 1, count_channels(channels)*neighbours, size, size
    prediction, _ = model.predict(image_tensor.view(image_shape).to(device, dtype=torch.float))
    result = prediction.view(size, size).detach().cpu().numpy()
    return result


def diff(img1, img2):
    img2 = match_histograms(img2, img1, multichannel=True)
    difference = ( (img1 - img2) / (img1 + img2) )
    difference = (difference + 1) * 127
    return np.concatenate((difference.astype(np.uint8), img1.astype(np.uint8), img2.astype(np.uint8)), axis=-1)


def mask_postprocess(mask):
    kernel = np.ones((3, 3), np.uint8)
    erosion = cv2.erode(mask, kernel, iterations = 1)
    kernel = np.ones((5, 5), np.uint8)
    closing = cv2.morphologyEx(erosion, cv2.MORPH_CLOSE, kernel)
    return closing


def predict_raster(img_current, img_previous, 
                   channels, 
                   network = 'unet-diff',
                   model_weights_path = '/code/models/unet_diff.pth',
                   input_size=56, neighbours=3):

    model, device = load_model(network, model_weights_path)

    with rasterio.open(img_current) as source_current, \
         rasterio.open(img_previous) as source_previous:

        meta = source_current.meta
        meta['count'] = 1
        
        clearcut_mask = np.zeros((source_current.height, source_current.width))
        for i in tqdm(range(source_current.width // input_size)):
            for j in range(source_current.height // input_size):
                bottom_row = j * input_size
                upper_row = (j + 1) * input_size
                left_column = i * input_size
                right_column = (i + 1) * input_size

                corners=[
                    source_current.xy(bottom_row, left_column),
                    source_current.xy(bottom_row, right_column),
                    source_current.xy(upper_row, right_column),
                    source_current.xy(upper_row, left_column),
                    source_current.xy(bottom_row, left_column)
                    ]

                window = Window(bottom_row, left_column, input_size, input_size)
                image_current = reshape_as_image(source_current.read(window=window))
                image_previous = reshape_as_image(source_previous.read(window=window))

                difference_image = diff(image_current, image_previous)
                image_tensor = transforms.ToTensor()(difference_image.astype(np.uint8)).to(device, dtype=torch.float)

                predicted = predict(image_tensor, model, channels, neighbours, input_size, device)
                predicted = mask_postprocess(predicted)
                clearcut_mask[left_column:right_column, bottom_row:upper_row] += predicted
    meta['dtype'] = 'float32'
    return clearcut_mask.astype(np.float32), meta



def count_channels(channels):
    count = 0
    for ch in channels:
        ch = ch.lower()
        if ch == 'rgb':
            count += 3
        elif ch in ['ndvi', 'ndmi', 'b08', 'b8a', 'b11', 'b12']:
            count += 1
        else:
            raise Exception('{} channel is unknown!'.format(ch))

    return count


def scale(tensor, max_value):
    max_ = tensor.max()
    if max_ > 0:
        return tensor / max_ * max_value
    return tensor


def save_raster(raster_array, meta, save_path, filename):
    if not os.path.exists(save_path):
        os.makedirs(save_path, exist_ok=True)
        logging.info("Data directory created.")
    
    filename = filename.split('_all_merged')[-1]
    save_path = os.path.join(save_path, f'predicted_{filename}')
    
    cv2.imwrite(f'{save_path}.png', raster_array)

    with rasterio.open(f'{save_path}.tif', 'w', **meta) as dst:
        for i in range(1, meta['count'] + 1):
            dst.write(raster_array, i)


def polygonize(raster_array, meta, transform=True, mode=cv2.RETR_TREE):
    raster_array = (raster_array * 255).astype(np.uint8)

    contours, _ = cv2.findContours(raster_array, mode, cv2.CHAIN_APPROX_SIMPLE)

    polygons = []
    for i in tqdm(range(len(contours))):
        c = contours[i]
        n_s = (c.shape[0], c.shape[2])
        if n_s[0] > 2:
            if transform:
                polys = [tuple(i) * meta['transform'] for i in c.reshape(n_s)]
            else:
                polys = [tuple(i) for i in c.reshape(n_s)]
            polygons.append(Polygon(polys))

    return polygons


def save_polygons(polygons, save_path, filename):
    if len(polygons) == 0:
        logging.info('no_polygons detected')
        return 100
    
    if not os.path.exists(save_path):
        os.makedirs(save_path, exist_ok=True)
        logging.info("Data directory created.")
    

    logging.info(f'{filename} saved.')
    print(f'{filename} saved.')
    polygons.to_file(os.path.join(save_path, f"{filename}.geojson"), driver='GeoJSON')


def intersection_poly(test_poly, mask_poly):
    intersecion_score = False
    if test_poly.is_valid and mask_poly.is_valid:
        intersection_result = test_poly.intersection(mask_poly)
        if not intersection_result.is_valid:
            intersection_result = intersection_result.buffer(0)
        if not intersection_result.is_empty:
            intersecion_score = True
    return intersecion_score


def morphological_transform(img):
    kernel = np.ones((5,5),np.uint8)
    closing = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)

    kernel = np.ones((3,3),np.uint8)
    closing = cv2.dilate(closing,kernel,iterations = 1)
    return closing


def postprocessing(tile, cloud_files, clearcuts, src_crs, year, aoi):

    def get_intersected_polygons(polygons, masks, mask_column_name):
        """Finding in GeoDataFrame with clearcuts the masked polygons.

        :param polygons: GeoDataFrame with clearcuts and mask columns
        :param masks: list of masks (e.g., polygons of clouds)
        :param mask_column_name: name of mask column in polygons GeoDataFrame

        :return: GeoDataFrame with filled mask flags in corresponding column
        """
        masked_values = []
        if len(masks) > 0:
            centroids = [[mask.centroid.x, mask.centroid.y] for mask in masks]
            kdtree = spatial.KDTree(centroids)
            for _, clearcut in polygons.iterrows():
                polygon = clearcut['geometry']
                _, idxs = kdtree.query(polygon.centroid, k=NEAREST_POLYGONS_NUMBER)
                masked_value = 0
                for idx in idxs:
                    if idx >= len(masks):
                        break
                    if intersection_poly(polygon, masks[idx].buffer(0)):
                        masked_value = 1
                        break
                masked_values.append(masked_value)
        polygons[mask_column_name] = masked_values
        return polygons



    landcover = LandcoverPolygons(tile, src_crs, year, aoi)
    forest_polygons = landcover.get_polygon()

#     cloud_files = [f"{img_path}/{tile}_{i}/clouds.tiff" for i in range(DATES_FOR_TILE)]
    cloud_polygons = []
    for cloud_file in cloud_files:
        with rasterio.open(cloud_file) as src:
            clouds = src.read(1)
            meta = src.meta
        clouds = morphological_transform(clouds)
        clouds = (clouds > CLOUDS_PROBABILITY_THRESHOLD).astype(np.uint8)
        if clouds.sum() > 0:
            cloud_polygons.extend(polygonize(clouds, meta, mode=cv2.RETR_LIST))
    
    
    n_clearcuts = len(clearcuts)
    polygons = {'geometry': clearcuts,
                'forest': np.zeros(n_clearcuts),
                'clouds': np.zeros(n_clearcuts)}

    polygons = geopandas.GeoDataFrame(polygons, crs=src_crs)
    
    if len(cloud_polygons) > 0:
        polygons = get_intersected_polygons(polygons, cloud_polygons, 'clouds')
    polygons = get_intersected_polygons(polygons, forest_polygons, 'forest')
    return polygons
    

In [101]:
"""
Model's helpers
"""
import io
import os
import logging
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
from google.oauth2 import service_account
import geopandas as gpd
from rasterio.mask import mask as riomask
import urllib
import rasterio
from shapely.geometry import MultiPolygon
from rasterio import features
import shapely
import pandas as pd
import cv2
import numpy as np


# from config import SCOPES, LANDCOVER_POLYGONS_PATH, SENTINEL_TILES, LANDCOVER_GEOJSON


SCOPES = ['https://www.googleapis.com/auth/drive.file']
LANDCOVER_POLYGONS_PATH = '/code/data/landcovers'
os.makedirs(LANDCOVER_POLYGONS_PATH, exist_ok=True)
LANDCOVER_FILENAME = 'S2A_OPER_GIP_TILPAR_MPC__20151209T095117_V20150622T000000_21000101T000000_B00.kml'
SENTINEL_TILES = os.path.join(LANDCOVER_POLYGONS_PATH, LANDCOVER_FILENAME)


logging.basicConfig(format='%(asctime)s %(message)s')


class LandcoverPolygons:
    """
    LandcoverPolygon class to access forest polygons. Before usage,
    be sure that SENTINEL_TILES file is downloaded.
    SENTINEL_TILES_POLYGONS = 'https://sentinel.esa.int/documents/247904/1955685/S2A_OPER_GIP_TILPAR_MPC__20151209T095117_V20150622T000000_21000101T000000_B00.kml'
    
    :param tile: tile name (str), e.g. '36UYA'
    :param crs: coordinate system (str), e.g. 'EPSG:4326'

    :return polygons: list of forest polygons within a tile in CRS of a S2A image
    """

    def __init__(self, tile, crs, year, aoi):
        self.tile = tile
        self.crs = crs
        self.LANDCOVER_GEOJSON = prepare_landcover(year, [tile[:3]], LANDCOVER_POLYGONS_PATH, aoi)
        gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'

        
    def get_polygon(self):
        polygon_path = os.path.join(LANDCOVER_POLYGONS_PATH, f'{self.tile}.geojson')
        logging.info(f'LANDCOVER_POLYGONS_PATH: {polygon_path}')
        if os.path.isfile(polygon_path):
            logging.info(f'{self.tile} forests polygons file exists.')
            polygons = gpd.read_file(polygon_path)
        else:
            logging.info(f'{self.tile} forests polygons file does not exist. Creating polygons...')
            polygons = self.create_polygon()

        if len(polygons) > 0:
            polygons = polygons.to_crs(self.crs)
            polygons = list(polygons['geometry'])
        else:
            logging.info('No forests polygons.')
        return polygons
    
    
    def create_polygon(self):
        polygons = []
        if os.path.isfile(SENTINEL_TILES):
            logging.info(f'read forests_polygons_file: {SENTINEL_TILES}, for tile {self.tile}')
            
            sentinel_tiles = gpd.read_file(SENTINEL_TILES, driver='KML')
            sentinel_tiles = sentinel_tiles[sentinel_tiles['Name'] == self.tile]
            
            logging.info(f'sentinel_tiles for {self.tile}: {sentinel_tiles}')
            
            bounding_polygon = sentinel_tiles['geometry'].values[0]
            polygons = gpd.read_file(self.LANDCOVER_GEOJSON)
            polygons = polygons[polygons['geometry'].intersects(bounding_polygon)]
            polygon_path = os.path.join(LANDCOVER_POLYGONS_PATH, f'{self.tile}.geojson')
            
            logging.info(f'forests_polygons_file_path: {polygon_path}')
            
            if polygons.empty:
                return polygons
            polygons.to_file(polygon_path, driver='GeoJSON')
        else:
            logging.error(f'{SENTINEL_TILES} doth not exists')
            raise FileNotFoundError
        return polygons


def landcover_annual(year, landcover_tiles, output_path, aoi):
    
    # landcover_classes = {
    #    1: "Water",
    #    2: "Trees",
    #    4: "Flooded vegetation",
    #    5: "Crops",
    #    7: "Built Area",
    #    8: "Bare ground",
    #   9: "Snow/Ice",
    #   10: "Clouds",
    #    11: "Rangeland"
    #}   
    EPSG = "EPSG:4326"
    landcover_downloaded = []
    os.makedirs(output_path, exist_ok=True)
    
    for tile_i in landcover_tiles:
        tile_url = f"https://lulctimeseries.blob.core.windows.net/lulctimeseriespublic/lc{year}/{tile_i}_{year}0101-{year+1}0101.tif"
        path = f"{output_path}/{os.path.basename(tile_url)}"

        if not os.path.exists(output_path):
            os.mkdir(output_path)
        if not os.path.exists(path):
            urllib.request.urlretrieve(tile_url, path)
        else:
            print('File already exists')
        landcover_downloaded.append(path)
        
    crops = []
    for path in landcover_downloaded:
        src = rasterio.open(path, 'r') 
        src_crs = src.crs
        profile = src.profile
        aoi_crs = aoi.to_crs(src_crs)
        crop, transform = riomask(src, aoi_crs.geometry, all_touched=False, crop=True)
        profile['width'] = crop.shape[2]
        profile['height'] = crop.shape[1]
        profile['transform'] =transform
        crop_name = os.path.join(output_path, os.path.split(path)[1].split('_')[0]+'_crop.tif')
        with rasterio.open(crop_name, 'w', **profile, nbits=1) as dst:
            dst.write(np.where(crop==2, 1, 0).astype(np.uint8))
        crops.append(crop_name)
    landcover_name = os.path.join(output_path, f'landcover{year}.tif')
    listToStr = ' '.join(crops)
    print(' '.join([f"gdalwarp --config GDAL_CACHEMAX 3000 -wm 3000 -t_srs {EPSG}", listToStr, landcover_name]))
    os.system(' '.join([f"gdalwarp --config GDAL_CACHEMAX 3000 -wm 3000 -t_srs {EPSG}", listToStr, landcover_name]))
    print(f'{landcover_name} was merged')    

    
def rescale(img, ratio):
    width = int(img.shape[1] * ratio)
    height = int(img.shape[0] * ratio)
    dim = (width, height) 
    resized = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
    return resized

    
def mask_to_polygons(mask: np.ndarray, transform) -> MultiPolygon:
    """
    Converts raster mask to shapely MultiPolygon
    """
    polygons = []
    shapes = features.shapes(
        mask.astype(np.uint8), 
        mask=(mask > 0), 
        transform=transform)
        
    for shape, _ in (shapes):
        polygons.append(shapely.geometry.shape(shape))

    polygons = MultiPolygon(polygons)
    if not polygons.is_valid:
        polygons = polygons.buffer(0)
        if polygons.type == 'Polygon':
            polygons = MultiPolygon([polygons])
    return polygons
    
    
def create_landcover_gdf(landcover_dir, output_dir, filename):
    landcover_names = [name for name in os.listdir(landcover_dir) if name.endswith('_crop.tif')]
    
    gdfs = []
    for name in tqdm(landcover_names):
        lc_fullpath = os.path.join(landcover_dir, name)
        print(lc_fullpath)
        with rasterio.open(lc_fullpath, 'r') as src:
            data = src.read().squeeze()
            data = rescale(data, 0.5)
            kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(10, 10))
            data = cv2.morphologyEx(data, cv2.MORPH_CLOSE, kernel)
            data = cv2.morphologyEx(data, cv2.MORPH_OPEN, kernel)
            data = rescale(data, 2)
            
            current_polygons = list(mask_to_polygons(data, src.transform))
            current_polygons = [poly for poly in current_polygons if poly.area]
            
            current_areas = [poly.area for poly in current_polygons] 
            current_tilename = [name]*len(current_areas)

            crs = src.crs
            current_gdf = gpd.GeoDataFrame(
                {'area': current_areas, 'names': current_tilename, 'geometry': current_polygons}, 
                crs=crs)
            current_gdf.to_crs('EPSG:4326', inplace=True)
            gdfs.append(current_gdf.copy())

    gdf = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True), crs = "EPSG:4326")
    
    os.makedirs(output_dir, exist_ok=True)
    output_path = os.path.join(output_dir, f"{filename}.geojson")
    gdf.to_file(output_path, driver='GeoJSON')
    return output_path


def prepare_landcover(year, landcover_tiles, output_path, aoi):
    filename = f'{landcover_tiles[0]}_{year}_landcover'
    
    for file in os.listdir(output_path):
        if file != LANDCOVER_FILENAME and not file.endswith('_landcover.geojson'):
            os.remove(os.path.join(output_path, file)) 
    
    for file in os.listdir(output_path):
        if filename in file:
            return os.path.join(output_path, file)

    landcover_annual(year, landcover_tiles, output_path, aoi)
    landcover_geojson = create_landcover_gdf(output_path, output_path, filename)
    return landcover_geojson


def weights_exists_or_download(path, file_id):
    if not Path(path).exists():
        creds_file = os.environ.get('CREDENTIAL_FILE')
        creds = service_account.Credentials.from_service_account_file(creds_file, scopes=SCOPES)

        service = build('drive', 'v3', credentials=creds)
        request = service.files().get_media(fileId=file_id)

        fh = io.FileIO('unet_v4.pth', mode='wb')
        downloader = MediaIoBaseDownload(fh, request)
        done = False
        while done is False:
            status, done = downloader.next_chunk()
            print(f'Download {int(status.progress() * 100)}')

    return path


In [102]:
end_date_merged_path

'/output/prepared/all_merged_S2A_MSIL1C_20200921T103031_N0209_R108_T32UMA_20200921T142406.tif'

In [103]:
img_start_path, img_end_path, tile = start_date_merged_path, end_date_merged_path, tile
network='unet-diff'
model_weights_path=MODEL_PATH
save_path=OUTPUT_FOLDER
channels =['RGB', 'B08', 'B8A', 'B11', 'B12', 'NDVI', 'NDMI']
threshold= 0.4
polygonize_only=False

filename = img_start_path.split('/')[-1].split('.')[0] # Path(img_start_path).stem
predicted_filename = f'predicted_{filename}'

if not polygonize_only:
    raster_array, meta = predict_raster(
        img_start_path,
        img_end_path,
        channels,
        network, 
        model_weights_path
    )
    save_raster(raster_array, meta, save_path, filename)
else:
    with rasterio.open(os.path.join(save_path, f'{predicted_filename}.tif')) as src:
        raster_array = src.read()
        raster_array = np.moveaxis(raster_array, 0, -1)
        meta = src.meta
        src.close()



# inference(start_date_merged_path, end_date_merged_path, tile)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 196/196 [26:16<00:00,  8.04s/it]


In [104]:
logging.info('Polygonize raster array of clearcuts...')
clearcuts = polygonize(raster_array > threshold, meta)
logging.info('Filter polygons of clearcuts')
polygons = postprocessing(tile,
                          [os.path.join(CLOUD_DATA_FOLDER, file) for file in os.listdir(CLOUD_DATA_FOLDER)],
                          clearcuts, 
                          meta['crs'],
                          year,
                          aoi)

save_polygons(polygons, save_path, predicted_filename)


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8561/8561 [00:01<00:00, 5422.83it/s]


gdalwarp --config GDAL_CACHEMAX 3000 -wm 3000 -t_srs EPSG:4326 /code/data/landcovers/32U_crop.tif /code/data/landcovers/landcover2020.tif
Copying color table from /code/data/landcovers/32U_crop.tif to new file.
Creating output file that is 2797P x 1793L.
Processing input file /code/data/landcovers/32U_crop.tif.
Using internal nodata values (e.g. 0) for image /code/data/landcovers/32U_crop.tif.
Copying nodata values from source /code/data/landcovers/32U_crop.tif to destination /code/data/landcovers/landcover2020.tif.
0...10...20...30...40...50...60...70...80...90...100 - done.
/code/data/landcovers/landcover2020.tif was merged


  0%|                                                                                                                                | 0/1 [00:00<?, ?it/s]

/code/data/landcovers/32U_crop.tif


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.21it/s]


predicted_all_merged_S2A_MSIL1C_20200626T104031_N0209_R008_T32UMA_20200626T125124 saved.
