In [1]:
import os, sys

from jinja2 import Template
import yaml
import shutil

# OUTLINE

[General options](#General-options)  
[Job Config file for pulling pipeline code from repo](#Job-Config-file-for-pulling-pipeline-code-from-repo)  
[Job for data download](#Job-Config-file-for-data-download)  
[Job for data post-processing](#Job-Config-file-for-data-post-processing)  
[Job for image filtering and normalization](#Job-Config-file-for-image-filtering-and-normalization)  
[Job for creating labels](#Job-Config-file-for-creating-labels)  
[Job for generating chips](#Job-Config-file-for-generating-chips)  

# Prepare folders

In [2]:
if os.path.exists('canada_2019_sentinel_pipeline'):
    print('canada_2019_sentinel_pipeline alredy exists')
else:
    os.mkdir('canada_2019_sentinel_pipeline')
    
if os.path.exists('canada_2019_sentinel_pipeline/0.pull_repo'):
    print('canada_2019_sentinel_pipeline/0.pull_repo')
else:
    os.mkdir('canada_2019_sentinel_pipeline/0.pull_repo')
    
if os.path.exists('canada_2019_sentinel_pipeline/1.download'):
    print('canada_2019_sentinel_pipeline/1.download')
else:
    os.mkdir('canada_2019_sentinel_pipeline/1.download')
    
if os.path.exists('canada_2019_sentinel_pipeline/2.postprocessing'):
    print('canada_2019_sentinel_pipeline/2.postprocessing')
else:
    os.mkdir('canada_2019_sentinel_pipeline/2.postprocessing')
    
if os.path.exists('canada_2019_sentinel_pipeline/3.normalization'):
    print('canada_2019_sentinel_pipeline/3.normalization')
else:
    os.mkdir('canada_2019_sentinel_pipeline/3.normalization')

if os.path.exists('canada_2019_sentinel_pipeline/4.create_label'):
    print('canada_2019_sentinel_pipeline/4.create_label')
else:
    os.mkdir('canada_2019_sentinel_pipeline/4.create_label')

if os.path.exists('canada_2019_sentinel_pipeline/5.generate_chips'):
    print('canada_2019_sentinel_pipeline/5.generate_chips')
else:
    os.mkdir('canada_2019_sentinel_pipeline/5.generate_chips')
    

canada_2019_sentinel_pipeline alredy exists
canada_2019_sentinel_pipeline/0.pull_repo
canada_2019_sentinel_pipeline/1.download
canada_2019_sentinel_pipeline/2.postprocessing
canada_2019_sentinel_pipeline/3.normalization
canada_2019_sentinel_pipeline/4.create_label
canada_2019_sentinel_pipeline/5.generate_chips


# General options

In [3]:
# create dictionary that holds all the options for generating all the yml files 

In [4]:
sets = 5
batches = [5,15,49,59,59]
pvc_name = 'canada2019-3'
mountpath = 'canada'

In [5]:
general_options = {'sets':sets,'batches':batches, 'pvc_name':pvc_name,'mountpath':mountpath}
general_options

{'sets': 5,
 'batches': [5, 15, 49, 59, 59],
 'pvc_name': 'canada2019-3',
 'mountpath': 'canada'}

# Job Config file for pulling pipeline code from repo

In [6]:
# description:
'''
this yml files are used to pull the repo containing the code into appropriate folder
'''

'\nthis yml files are used to pull the repo containing the code into appropriate folder\n'

## template

In [7]:
template1 ='''apiVersion: batch/v1
kind: Job
metadata:
  name: job-sentinel-pullrepo-{{ set_idx }}-{{ batch }}-3
spec:
  template:
    spec:
      containers:
      - name: pod-sentinel-pullrepo-{{ set_idx }}-{{ batch }}-3
        image: alpine/git
        workingDir: /canada/data/set{{ set_idx }}/batch_{{ batch }}
        args:
          - clone
          - --single-branch
          - https://gitlab.nrp-nautilus.io/aomqc/sentinel_download
          - /{{ mountpath }}/data/set{{ set_idx }}/batch_{{ batch }}        
        volumeMounts:
        - name: {{ mountpath }}
          mountPath: /{{ mountpath }}
        resources:
            limits:
              memory: 1Gi
              cpu: "1"
            requests:
              memory: 1Gi
              cpu: "1"    
      volumes:
      - name: {{ mountpath }}
        persistentVolumeClaim:
            claimName: {{ pvc_name }}
      restartPolicy: OnFailure      
  backoffLimit: 
'''

In [8]:
j2_template1 = Template(template1)

## yml generation

In [9]:
for set_idx in list(range(general_options['sets'])):
    if os.path.exists('canada_2019_sentinel_pipeline/0.pull_repo/set{}'.format(set_idx+1)):
        shutil.rmtree('canada_2019_sentinel_pipeline/0.pull_repo/set{}'.format(set_idx+1))
    
    os.mkdir('canada_2019_sentinel_pipeline/0.pull_repo/set{}'.format(set_idx+1))
    
    for idx in list(range(general_options['batches'][set_idx])):
        
        data = {'set_idx':set_idx+1,
                'batch':idx,
                'pvc_name':general_options['pvc_name'],
                'mountpath':general_options['mountpath']}
        
        output_file = j2_template1.render(data)    
        fileout = open('canada_2019_sentinel_pipeline/0.pull_repo/set{}/job-sentinelpullrepo-{}-{}.yaml'.format(set_idx+1,set_idx+1,idx),'w')
        fileout.write(output_file)
        fileout.close()

# Job Config file for data download

In [10]:
# description:
'''
this yml files are used to download images from sentinel hub into the specified folder
this operation will be repeated multiple times until all possible images are downloaded
'''

'\nthis yml files are used to download images from sentinel hub into the specified folder\nthis operation will be repeated multiple times until all possible images are downloaded\n'

## template

In [11]:
template2 ='''apiVersion: batch/v1
kind: Job
metadata:
  name: job-sentinel-download-{{ set_idx }}-{{ batch }}-3
spec:
  template:
    spec:
      containers:
      - name: pod-sentinel-download-{{ set_idx }}-{{ batch }}-3
        image: gitlab-registry.nrp-nautilus.io/aomqc/deeplearning_pytorch:bc1aa4e7
        workingDir: /{{ mountpath }}/data/set{{ set_idx }}/batch_{{ batch }}
        command: ["/bin/sh","-c"]
        args:
        - chmod 777 dhusget.sh;
          python3 sentinel_data_download.py {{username}} {{password}} sentinel_coordinates/Canada_fir_2019_{{ set_idx }}_{{ batch }}.yaml
        volumeMounts:
        - name: {{ mountpath }}
          mountPath: /{{ mountpath }}  
        resources:
            limits:
              memory: 1Gi
              cpu: "1"
            requests:
              memory: 1Gi
              cpu: "1"               
      volumes:
      - name: {{ mountpath }} 
        persistentVolumeClaim:
            claimName: {{ pvc_name }}
      restartPolicy: OnFailure      
  backoffLimit: 

'''

In [12]:
j2_template2 = Template(template2)

## yml generation

In [13]:
for set_idx in list(range(general_options['sets'])):
    if os.path.exists('canada_2019_sentinel_pipeline/1.download/set{}'.format(set_idx+1)):
        shutil.rmtree('canada_2019_sentinel_pipeline/1.download/set{}'.format(set_idx+1))
    os.mkdir('canada_2019_sentinel_pipeline/1.download/set{}'.format(set_idx+1))
    
    for idx in list(range(general_options['batches'][set_idx])):
        data = {'set_idx':set_idx+1,
                'batch':idx,
                'pvc_name':general_options['pvc_name'],
                'mountpath':general_options['mountpath'],
                'username':'username',
                'password':'password',
               }
        
        output_file = j2_template2.render(data)
        fileout = open('canada_2019_sentinel_pipeline/1.download/set{}/job-sentineldownload-{}-{}.yaml'.format(set_idx+1,set_idx+1,idx),'w')
        fileout.write(output_file)
        fileout.close()

# Job Config file for data postprocessing

In [14]:
# description:
'''
this yml files are used to process the download images by performing the following steps:
    unzip downloaded images in folder PRODUCT and place them in 
    Separate images based on type to 1C and 2A
    Create a folder for each image type
    Move each image type to its corresponding folder
'''

'\nthis yml files are used to process the download images by performing the following steps:\n    unzip downloaded images in folder PRODUCT and place them in \n    Separate images based on type to 1C and 2A\n    Create a folder for each image type\n    Move each image type to its corresponding folder\n'

## template

In [15]:
template3 ='''apiVersion: batch/v1
kind: Job
metadata:
  name: job-sentinel-postprocess-{{ set_idx }}-{{ batch }}-3
spec:
  template:
    spec:
      containers:
      - name: pod-sentinel-postprocess-{{ set_idx }}-{{ batch }}-3
        image: gitlab-registry.nrp-nautilus.io/aomqc/deeplearning_pytorch:bc1aa4e7
        workingDir: /{{ mountpath }}/data/set{{ set_idx }}/batch_{{ batch }}
        command: ["/bin/sh","-c"]
        args:
        - python3 sentinel_data_postprocessing.py
        volumeMounts:
        - name: {{ mountpath }}
          mountPath: /{{ mountpath }}  
        resources:
            limits:
              memory: 10Gi
              cpu: "2"
            requests:
              memory: 10Gi
              cpu: "2"               
      volumes:
      - name: {{ mountpath }} 
        persistentVolumeClaim:
            claimName: {{ pvc_name }}
      restartPolicy: OnFailure      
  backoffLimit: 

'''

In [16]:
j2_template3 = Template(template3)

## yml generation

In [17]:
for set_idx in list(range(general_options['sets'])):
    if os.path.exists('canada_2019_sentinel_pipeline/2.postprocessing/set{}'.format(set_idx+1)):
        shutil.rmtree('canada_2019_sentinel_pipeline/2.postprocessing/set{}'.format(set_idx+1))
    os.mkdir('canada_2019_sentinel_pipeline/2.postprocessing/set{}'.format(set_idx+1))
    
    for idx in list(range(general_options['batches'][set_idx])):
        data = {'set_idx':set_idx+1,
                'batch':idx,
                'pvc_name':general_options['pvc_name'],
                'mountpath':general_options['mountpath']}
        
        output_file = j2_template3.render(data)
        fileout = open('canada_2019_sentinel_pipeline/2.postprocessing/set{}/job-sentinelpostprocess-{}-{}.yaml'.format(set_idx+1,set_idx+1,idx),'w')
        fileout.write(output_file)
        fileout.close()

# Job Config file for image filtering and normalization

In [18]:
# description
'''
this code takes in tif files and performs the following:
    Filter out images with nodata covering more than 80% of the image
    Filter out images where cloud mask covers more than 25% of valid data
    Read each band as npy array
    normalize bands using one of theese three methods:
        Method1:
            get *min* of each band
            select the minmum among the three as *global min*
            compute *25th* and *75th* percentiles for each band
            compute *IQR = 75th-25th* for each band
            compute *max = 1.5xIQR* for each band
            select the top max among all max values of each band as *global max*
            compute *global range = global max - global min* 
            normalize each band using * new_band = (band - global min)/global range*
        Method2:
        Method3:
At the end bands are stack togther as sinlge numpy array
    save numpy array as numpy file
    save numpy array as png file 
'''

'\nthis code takes in tif files and performs the following:\n    Filter out images with nodata covering more than 80% of the image\n    Filter out images where cloud mask covers more than 25% of valid data\n    Read each band as npy array\n    normalize bands using one of theese three methods:\n        Method1:\n            get *min* of each band\n            select the minmum among the three as *global min*\n            compute *25th* and *75th* percentiles for each band\n            compute *IQR = 75th-25th* for each band\n            compute *max = 1.5xIQR* for each band\n            select the top max among all max values of each band as *global max*\n            compute *global range = global max - global min* \n            normalize each band using * new_band = (band - global min)/global range*\n        Method2:\n        Method3:\nAt the end bands are stack togther as sinlge numpy array\n    save numpy array as numpy file\n    save numpy array as png file \n'

## template

In [19]:
template5 ='''apiVersion: batch/v1
kind: Job
metadata:
  name: job-sentinel-normalize-{{ set_idx }}-{{ batch }}-3
spec:
  template:
    spec:
      containers:
      - name: pod-sentinel-normalize-{{ set_idx }}-{{ batch }}-3
        image: gitlab-registry.nrp-nautilus.io/aomqc/deeplearning_pytorch:bc1aa4e7
        workingDir: /{{ mountpath }}/data/set{{ set_idx }}/batch_{{ batch }}
        command: ["/bin/sh","-c"]
        args:
        - python3 raster_normalize.py --nodata 0.8 --valid 0.25 --norm 2 --source 2A
        volumeMounts:
        - name: {{ mountpath }}
          mountPath: /{{ mountpath }}  
        resources:
            limits:
              memory: 40Gi
              cpu: "2"
              nvidia.com/gpu: 1
            requests:
              memory: 40Gi
              cpu: "2"
              nvidia.com/gpu: 1
      volumes:
      - name: {{ mountpath }} 
        persistentVolumeClaim:
            claimName: {{ pvc_name }}
      restartPolicy: OnFailure      
  backoffLimit: 

'''

In [20]:
j2_template5 = Template(template5)

## yml generation

In [21]:
for set_idx in list(range(general_options['sets'])):
    if os.path.exists('canada_2019_sentinel_pipeline/3.normalization/set{}'.format(set_idx+1)):
        shutil.rmtree('canada_2019_sentinel_pipeline/3.normalization/set{}'.format(set_idx+1))
    os.mkdir('canada_2019_sentinel_pipeline/3.normalization/set{}'.format(set_idx+1))
    
    for idx in list(range(general_options['batches'][set_idx])):
        data = {'set_idx':set_idx+1,
                'batch':idx,
                'pvc_name':general_options['pvc_name'],
                'mountpath':general_options['mountpath']}
        
        output_file = j2_template5.render(data)
        fileout = open('canada_2019_sentinel_pipeline/3.normalization/set{}/job-sentinelnormalize-{}-{}.yaml'.format(set_idx+1,set_idx+1,idx),'w')
        fileout.write(output_file)
        fileout.close()

# Job Config file for creating labels

In [22]:
# description
'''
This yml config runs the code that generate labels by doing the following:
    Read group of polygons representing burned area
    Read image tif file
    convert tif image into polygon using its bounds
    intersect burned area polygons with image polygons
    if the intersection generates polygons plot them on a matrix with the same dimension as the image (label array)
    save label array as npy
    save label array as png
'''

'\nThis yml config runs the code that generate labels by doing the following:\n    Read group of polygons representing burned area\n    Read image tif file\n    convert tif image into polygon using its bounds\n    intersect burned area polygons with image polygons\n    if the intersection generates polygons plot them on a matrix with the same dimension as the image (label array)\n    save label array as npy\n    save label array as png\n'

## template

In [23]:
template7 ='''apiVersion: batch/v1
kind: Job
metadata:
  name: job-sentinel-createlabel-{{ set_idx }}-{{ batch }}-3
spec:
  template:
    spec:
      containers:
      - name: pod-sentinel-createlabel-{{ set_idx }}-{{ batch }}-3
        image: gitlab-registry.nrp-nautilus.io/aomqc/deeplearning_pytorch:bc1aa4e7
        workingDir: /{{ mountpath }}/data/set{{ set_idx }}/batch_{{ batch }}
        command: ["/bin/sh","-c"]
        args:
        - python3 create_label.py --poly CanadaFull_firearea_polygons_set_{{ set_idx }}.shp
        volumeMounts:
        - name: {{ mountpath }}
          mountPath: /{{ mountpath }}  
        resources:
            limits:
              memory: 40Gi
              cpu: "2"
              nvidia.com/gpu: 2
            requests:
              memory: 40Gi
              cpu: "2"
              nvidia.com/gpu: 2               
      volumes:
      - name: {{ mountpath }} 
        persistentVolumeClaim:
            claimName: {{ pvc_name }}
      restartPolicy: OnFailure      
  backoffLimit: 

'''

In [24]:
j2_template7 = Template(template7)

## yml generation

In [25]:
for set_idx in list(range(general_options['sets'])):
    if os.path.exists('canada_2019_sentinel_pipeline/4.create_label/set{}'.format(set_idx+1)):
        shutil.rmtree('canada_2019_sentinel_pipeline/4.create_label/set{}'.format(set_idx+1))
    os.mkdir('canada_2019_sentinel_pipeline/4.create_label/set{}'.format(set_idx+1))
    
    for idx in list(range(general_options['batches'][set_idx])):
        data = {'set_idx':set_idx+1,
                'batch':idx,
                'pvc_name':general_options['pvc_name'],
                'mountpath':general_options['mountpath']}
        
        output_file = j2_template7.render(data)
        fileout = open('canada_2019_sentinel_pipeline/4.create_label/set{}/job-sentinelcreatelabel-{}-{}.yaml'.format(set_idx+1,set_idx+1,idx),'w')
        fileout.write(output_file)
        fileout.close()

# Job Config file for generating chips

In [26]:
# description
'''
'''

'\n'

## template

In [27]:
template8 ='''apiVersion: batch/v1
kind: Job
metadata:
  name: job-sentinel-generatechips-{{ set_idx }}-{{ batch }}-3
spec:
  template:
    spec:
      containers:
      - name: pod-sentinel-generatechips-{{ set_idx }}-{{ batch }}-3
        image: gitlab-registry.nrp-nautilus.io/aomqc/deeplearning_pytorch:bc1aa4e7
        workingDir: /{{ mountpath }}/data/set{{ set_idx }}/batch_{{ batch }}
        command: ["/bin/sh","-c"]
        args:
        - python3 generate_chips.py --csize 256 --cratio 0.25
        volumeMounts:
        - name: {{ mountpath }}
          mountPath: /{{ mountpath }}  
        resources:
            limits:
              memory: 40Gi
              cpu: "2"
              nvidia.com/gpu: 2
            requests:
              memory: 40Gi
              cpu: "2"
              nvidia.com/gpu: 2
      volumes:
      - name: {{ mountpath }} 
        persistentVolumeClaim:
            claimName: {{ pvc_name }}
      restartPolicy: OnFailure      
  backoffLimit: 

'''

In [28]:
j2_template8 = Template(template8)

## yml generation

In [29]:
for set_idx in list(range(general_options['sets'])):
    if os.path.exists('canada_2019_sentinel_pipeline/5.generate_chips/set{}'.format(set_idx+1)):
        shutil.rmtree('canada_2019_sentinel_pipeline/5.generate_chips/set{}'.format(set_idx+1))
    os.mkdir('canada_2019_sentinel_pipeline/5.generate_chips/set{}'.format(set_idx+1))
    
    for idx in list(range(general_options['batches'][set_idx])):
        data = {'set_idx':set_idx+1,
                'batch':idx,
                'pvc_name':general_options['pvc_name'],
                'mountpath':general_options['mountpath']}
        
        output_file = j2_template8.render(data)
        fileout = open('canada_2019_sentinel_pipeline/5.generate_chips/set{}/job-sentinelgeneratechips-{}-{}.yaml'.format(set_idx+1,set_idx+1,idx),'w')
        fileout.write(output_file)
        fileout.close()