In [None]:
%%capture
!pip -q install geopandas
!pip -q install geojson
!pip -q install --upgrade folium
!pip -q install geemap
!pip -q install rasterio
!pip install fastai --upgrade

In [None]:
import geojson
import rasterio as rio
from rasterio.plot import show
import geopandas as gpd
import ee
import geemap
import geemap.eefolium as emap
from matplotlib import pyplot as plt
import numpy
from fastai.vision.all import *

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Load the full .tif data

We have 4 different archives, each for one quarter of the year. Labels are contained in the **landcovernet_full_1**.

In [None]:
shutil.rmtree('landcovernet')

In [None]:
%%capture
!unzip 'drive/My Drive/Omdena/My work/landcovernet_full_1.zip'

In [None]:
%%capture
!unzip 'drive/My Drive/Omdena/My work/landcovernet_full_2.zip'

In [None]:
%%capture
!unzip 'drive/My Drive/Omdena/My work/landcovernet_full_3.zip'

In [None]:
%%capture
!unzip 'drive/My Drive/Omdena/My work/landcovernet_full_4.zip'

Rename the folders, adding the date to the folder name. This is done because for each location, we have up to 4 different shots. This way, we'll have a distinct folder for each location and date in our final landcovernet folder.

In [None]:
for i in range(4) :
  for loc in Path(f'landcovernet_{i+1}').ls() :
    if len(str(loc).split('_')) > 3 : continue
    if 'ipynb_checkpoints' in str(loc) : shutil.rmtree(loc); continue
    for file in loc.ls() :
      if '2018_LC_10m' in str(file) : continue 
      name = str(file).split('.')[0].split('/')[-1]
    date = re.findall(r'[0-9]+[A-Z]+_[0-9]+(_2018[0-9]+).*', name)[0]
    os.rename(loc, str(loc)+date)

In [None]:
Path('landcovernet_full_tif').mkdir(exist_ok=True)

In [None]:
# merge all the folders together into landcovernet_full_tif
!gsutil rsync -r 'landcovernet_1' 'landcovernet_full_tif'
!gsutil rsync -r 'landcovernet_2' 'landcovernet_full_tif'
!gsutil rsync -r 'landcovernet_3' 'landcovernet_full_tif'
!gsutil rsync -r 'landcovernet_4' 'landcovernet_full_tif'

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Copying file://landcovernet_1/landcovernet_3/33PWQ_28_20180708/33PWQ_28_20180708_B02_10m.tif...
Copying file://landcovernet_1/landcovernet_3/33PWQ_28_20180708/33PWQ_28_20180708_B03_10m.tif...
Copying file://landcovernet_1/landcovernet_3/33PWQ_28_20180708/33PWQ_28_20180708_B04_10m.tif...
Copying file://landcovernet_1/landcovernet_3/33PWQ_28_20180708/33PWQ_28_20180708_B08_10m.tif...
Copying file://landcovernet_1/landcovernet_3/33PWQ_29_20180708/33PWQ_29_20180708_B02_10m.tif...
Copying file://landcovernet_1/landcovernet_3/33PWQ_29_20180708/33PWQ_29_20180708_B03_10m.tif...
Copying file://landcovernet_1/landcovernet_3/33PWQ_29_20180708/33PWQ_29_20180708_B04_10m.tif...
Copying file://landcovernet_1/landcovernet_3/33PWQ_29_20180708/33PWQ_29_20180708_B08_10m.tif...
Copying file://landcovernet_1/landcovernet_3/33RXQ_00_20180701/33RXQ_00_20180701_B02_10m.tif...
Copying file://landcovernet_1/landcovernet_3/33RXQ_00_20180701/33RXQ_00

In [None]:
%%capture
!zip -r landcovernet_full_clean_tif.zip landcovernet_full_tif

# Delete locs that are not present in the clean archive **landcovernet_final_png**

We now have the full raw data that includes cloudy images as well as badly labelled ones.

In [None]:
%%capture
!unzip 'drive/My Drive/Omdena/Data/landcovernet/landcovernet_final_png.zip'

In [None]:
locs = [re.findall(r'(.*)_RGB.png', str(pic))[0].split('/')[-1] for pic in Path('landcovernet_final/inputs').ls()]

In [None]:
for fold in Path('landcovernet_full_tif').ls() :
  loc = str(fold).split('/')[-1] # find the location name
  if not loc in locs : shutil.rmtree(fold) # delete if this location is not present in the clean .png data

In [None]:
Path('landcovernet_full_tif').ls()

(#2512) [Path('landcovernet_full_tif/35KKP_29_20180702'),Path('landcovernet_full_tif/33KUT_07_20180103'),Path('landcovernet_full_tif/34NCG_24_20181022'),Path('landcovernet_full_tif/35NRD_21_20181015'),Path('landcovernet_full_tif/35LNF_22_20180108'),Path('landcovernet_full_tif/31NFH_28_20180106'),Path('landcovernet_full_tif/35JKL_21_20180405'),Path('landcovernet_full_tif/34NBP_27_20180103'),Path('landcovernet_full_tif/31NFH_13_20181202'),Path('landcovernet_full_tif/38KMA_11_20181001')...]

We end up with the same 2512 images, but now in .tif format.

Now, delete labels. This is done because we already have labels in the **targets** subfolder.

In [None]:
for fold in Path('landcovernet_full_tif').ls() :
  for pic in fold.ls() :
    name = str(pic).split('/')[-1]
    if '_LC_10m' in name : os.remove(pic)

Rename the first folder to **inputs** and merge with the **labels** folder from landcovernet_final_png into a new folder, save it as zip

In [None]:
Path('landcovernet_yes_baby/targets').mkdir(parents=True, exist_ok=True)

In [None]:
# copy inputs to a new folder
!gsutil rsync -r 'landcovernet_full_tif' 'landcovernet_yes_baby/inputs'

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Copying file://landcovernet_full_tif/35JQG_05_20181001/35JQG_05_20181001_B02_10m.tif...
Copying file://landcovernet_full_tif/35JQG_05_20181001/35JQG_05_20181001_B03_10m.tif...
Copying file://landcovernet_full_tif/35JQG_05_20181001/35JQG_05_20181001_B04_10m.tif...
Copying file://landcovernet_full_tif/35JQG_05_20181001/35JQG_05_20181001_B08_10m.tif...
Copying file://landcovernet_full_tif/35JQG_06_20180104/35JQG_06_20180104_B02_10m.tif...
Copying file://landcovernet_full_tif/35JQG_06_20180104/35JQG_06_20180104_B03_10m.tif...
Copying file://landcovernet_full_tif/35JQG_06_20180104/35JQG_06_20180104_B04_10m.tif...
Copying file://landcovernet_full_tif/35JQG_06_20180104/35JQG_06_20180104_B08_10m.tif...
Copying file://landcovernet_full_tif/35JQG_06_20180402/35JQG_06_20180402_B02_10m.tif...
Copying file://landcovernet_full_tif/35JQG_06_20180402/35JQG_06_20180402_B03_10m.tif...
Copying file://landcovernet_full_tif/35JQG_06_20180402/

In [None]:
# copy targets to a new folder
!gsutil rsync -r 'landcovernet_final/targets' 'landcovernet_yes_baby/targets'

Building synchronization state...
Starting synchronization...
Copying file://landcovernet_final/targets/28QDE_00_2018_LC_10m.png...
/ [0 files][    0.0 B/  591.0 B]                                                / [1 files][  591.0 B/  591.0 B]                                                Copying file://landcovernet_final/targets/28QDE_01_2018_LC_10m.png...
/ [1 files][  591.0 B/  1.1 KiB]                                                / [2 files][  1.1 KiB/  1.1 KiB]                                                Copying file://landcovernet_final/targets/28QDE_02_2018_LC_10m.png...
Copying file://landcovernet_final/targets/28QDE_05_2018_LC_10m.png...
/ [4 files][  3.7 KiB/  3.7 KiB]                                                
==> NOTE: You are performing a sequence of gsutil operations that may
run significantly faster if you instead use gsutil -m rsync ... Please
see the -m section under "gsutil help options" for further information
about when gsutil -m can be advantageous.

In [None]:
!zip -r landcovernet_clean_RGBN.zip landcovernet_yes_baby

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  adding: landcovernet_yes_baby/inputs/35JQG_29_20180409/35JQG_29_20180409_B08_10m.tif (deflated 27%)
  adding: landcovernet_yes_baby/inputs/35JQG_29_20180409/35JQG_29_20180409_B02_10m.tif (deflated 41%)
  adding: landcovernet_yes_baby/inputs/35JQG_29_20180409/35JQG_29_20180409_B03_10m.tif (deflated 35%)
  adding: landcovernet_yes_baby/inputs/35JQG_29_20180409/35JQG_29_20180409_B04_10m.tif (deflated 38%)
  adding: landcovernet_yes_baby/inputs/35JNN_16_20180105/ (stored 0%)
  adding: landcovernet_yes_baby/inputs/35JNN_16_20180105/35JNN_16_20180105_B02_10m.tif (deflated 38%)
  adding: landcovernet_yes_baby/inputs/35JNN_16_20180105/35JNN_16_20180105_B08_10m.tif (deflated 38%)
  adding: landcovernet_yes_baby/inputs/35JNN_16_20180105/35JNN_16_20180105_B03_10m.tif (deflated 36%)
  adding: landcovernet_yes_baby/inputs/35JNN_16_20180105/35JNN_16_20180105_B04_10m.tif (deflated 33%)
  adding: landcovernet_yes_baby/inputs/36RVP_27_2

Create bands for NDVI and save them

In [None]:
def get_ndvi(red_path, nir_path) :
  numpy.seterr(divide='ignore', invalid='ignore') # Allow division by zero
  # Calculate NDVI
  red = rio.open(red_path).read()
  nir = rio.open(nir_path).read()
  ndvi = (nir.astype(float) - red.astype(float)) / (red.astype(float) + nir.astype(float))
  return ndvi

In [None]:
for loc in Path('landcovernet_yes_baby/inputs').ls() :
  for pic in loc.ls() :
    if '_B04_10m' in str(pic) : red = pic
    if '_B08_10m' in str(pic) : nir = pic
  ndvi = get_ndvi(red_path=red, nir_path=nir)

  dest = re.findall(r'(.*_)B04_10m.tif', str(red))[0]
  with rio.open(pic) as src :
    kwargs = src.meta
  ndvi_dtype = ndvi.dtype
  kwargs.update(dtype=ndvi_dtype)
  with rio.open(os.path.join(dest + 'NDVI_10m.tif'), 'w', **kwargs) as dst:
    dst.write(ndvi)

In [None]:
!cp landcovernet_clean_RGBN.zip 'drive/My Drive/Omdena/My work'

Delete B8 band

In [None]:
for loc in Path('landcovernet_yes_baby/inputs').ls() :
  for pic in loc.ls() :
    if '_B08_10m' in str(pic) : os.remove(pic)

In [None]:
!zip -r landcovernet_clean_RGB_NDVI.zip landcovernet_yes_baby

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  adding: landcovernet_yes_baby/inputs/35JQG_29_20180409/35JQG_29_20180409_NDVI_10m.tif (deflated 16%)
  adding: landcovernet_yes_baby/inputs/35JQG_29_20180409/35JQG_29_20180409_B02_10m.tif (deflated 41%)
  adding: landcovernet_yes_baby/inputs/35JQG_29_20180409/35JQG_29_20180409_B03_10m.tif (deflated 35%)
  adding: landcovernet_yes_baby/inputs/35JQG_29_20180409/35JQG_29_20180409_B04_10m.tif (deflated 38%)
  adding: landcovernet_yes_baby/inputs/35JNN_16_20180105/ (stored 0%)
  adding: landcovernet_yes_baby/inputs/35JNN_16_20180105/35JNN_16_20180105_NDVI_10m.tif (deflated 13%)
  adding: landcovernet_yes_baby/inputs/35JNN_16_20180105/35JNN_16_20180105_B02_10m.tif (deflated 38%)
  adding: landcovernet_yes_baby/inputs/35JNN_16_20180105/35JNN_16_20180105_B03_10m.tif (deflated 36%)
  adding: landcovernet_yes_baby/inputs/35JNN_16_20180105/35JNN_16_20180105_B04_10m.tif (deflated 33%)
  adding: landcovernet_yes_baby/inputs/36RVP_27

In [None]:
!cp landcovernet_clean_RGB_NDVI.zip 'drive/My Drive/Omdena/My work'

In [None]:
def get_rgb(path_dict) :
  red = rio.open(path_dict['red']).read(1) # B4
  green = rio.open(path_dict['green']).read(1) # B3
  blue = rio.open(path_dict['blue']).read(1) # B2

  rgb = np.dstack((red, green, blue))

  # normalize and convert to range 0-255
  rgb = ((rgb - rgb.min()) / (rgb.max() - rgb.min()) * 255).astype(int)
  return rgb

In [None]:
def get_labels(path):
  labels = rio.open(path).read(1) # LC_10m.tif file
  return labels