In [1]:
#Import relevant Classes 
import descarteslabs as dl
import descarteslabs.workflows as wf
import numpy as np
import math
import pickle as pkl
import csv
import sys
import os
import random
sys.path.append("../")
import constants
from multiprocessing.pool import ThreadPool
from descarteslabs.workflows.models.exceptions import JobTimeoutError
from gzip import compress, decompress
from descarteslabs import Storage, Auth
sys.path.append("../UI_Labeling")
import config
from tqdm.notebook import tqdm
from descarteslabs.scenes import AOI

!{sys.executable} -m pip install opencv-contrib-python
import cv2

  serialization_context = pa.SerializationContext()
  pa.register_default_serialization_handlers(serialization_context)


Looking in indexes: https://pypi.org/simple, https://packagecloud.io/github/git-lfs/pypi/simple


## Editable Variables, change as needed
## Ensure the ../UI_Labeling/config.py file has the correct user information

In [23]:
datasetName='updatedModel_state_sampled_slender_gerbil'
STARTFROM=0
TIMEOUT=600
DATASIZE=2500
PATHNAME='Image_Datasets/int_0_updatedModel_state_sampled_pakistan_dl_dataset/'

In [24]:
def create_CSV_from_dict_keys(confirmed,denied,lrimage,start):
    columns=[]
    count=start
    
    #Iterate through our image dictionary and save the image array according to the respective classification
    print("Downloading Images to "+ PATHNAME +" folders!")
    for key in tqdm(lrimage.keys()):
        folder=''
        if key in confirmed:
            label=1
            folder='positives/'
        elif key in denied:
            label=0
            folder='negatives/'
        else:
            continue;
        info=dl.scenes.DLTile.from_key(key)
        columns.append([folder+str(count)+'.npy',label,info.geometry,key])
        np.save(PATHNAME+folder+str(count),lrimage[key])
        count+=1
        
    #Check for existing metadata file and either add to or create using the collected column information    
    if os.path.exists(PATHNAME+'metadata.csv'):
        print("Adding to metadata.csv in "+PATHNAME+"!")
        with open(PATHNAME+'metadata.csv','a',newline='') as file:
            writer = csv.writer(file)
            writer.writerows(columns)
    else:
        print("Writing to metadata.csv in "+PATHNAME+"!")
        columns.insert(0,['Image','Label','Geometry','Key'])
        with open(PATHNAME+'metadata.csv', 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerows(columns)

In [25]:
def get_10_band_images(total_dt):
    #Initiate 10 band s2 satellite and filter by cloud_fraction
    bands = "coastal-aerosol blue green red red-edge red-edge-3 red-edge-4 nir swir1 swir2"

    s2=wf.ImageCollection.from_id(
        "esa:sentinel-2:l2a:v1",
        start_datetime='2018-11-01',
        end_datetime='2019-03-01',
    )

    s2_bands = s2.pick_bands(bands)
    s2_bands = s2_bands.filter(lambda img: img.properties["cloud_fraction"]<0.1)
    s2_bands = s2_bands.median(axis="images")
    
    keyImageDict={}
    if not os.path.exists(PATHNAME):
        os.mkdir(PATHNAME)
    if not os.path.exists(PATHNAME+'/positives'):
        os.mkdir(PATHNAME+'/positives')
    if not os.path.exists(PATHNAME+'/negatives'):
        os.mkdir(PATHNAME+'/negatives')
    
    #Initiate getimage Function and write to the randomKeyImageDict
    def getimage(keyPair):
        tileKey2=keyPair
        tile_area2=dl.scenes.DLTile.from_key(tileKey2)
        try:
            img_data=s2_bands.scale_values(0,255).compute(tile_area2,progress_bar=False,timeout=TIMEOUT)
        except JobTimeoutError:
            print('Low Res Time Out, key:',tileKey2)
            return
        if (img_data is not None):
            if (img_data.ndarray is not None):
                keyImageDict[tileKey2]=img_data.ndarray.data
                
    #Chunk out the images to pass to getimage to save as we go            
    print('Getting Images!')
    steps=math.ceil(len(total_dt)/DATASIZE)
    for x in range(STARTFROM,steps):
        keyImageDict.clear()
        
        start=x*DATASIZE
        if (x != steps-1):
            end=(x+1)*DATASIZE
        else:
            end=(len(total_dt))
        
        #Use threadpooling to run through each image
        print('Group#',x, 'of',steps-1)
        with tqdm(total=len(total_dt[start:end])) as pbar:
            with ThreadPool(processes=64) as pool:
                for _ in pool.imap_unordered(getimage,total_dt[start:end]):
                    pbar.update()
        
        #Pass the finished key-image dict to our saving function
        print('Saving this batch')
        create_CSV_from_dict_keys(confirmed,denied,keyImageDict,start)
    print('Done!')
    

In [26]:
def get_3_band_images(total_dt):
    #Initiate 10 band s2 satellite and filter by cloud_fraction
    bands = "red green blue"

    ab=wf.ImageCollection.from_id(
        "esa:sentinel-2:l2a:v1",
        start_datetime='2018-11-01',
        end_datetime='2019-03-01',
    )

    ab_bands = ab.pick_bands(bands)
    ab_bands = ab_bands.filter(lambda img: img.properties["cloud_fraction"]<0.1)
    ab_bands = ab_bands.median(axis="images")
    
    keyImageDict={}
    if not os.path.exists(PATHNAME):
        os.mkdir(PATHNAME)
    if not os.path.exists(PATHNAME+'/positives'):
        os.mkdir(PATHNAME+'/positives')
    if not os.path.exists(PATHNAME+'/negatives'):
        os.mkdir(PATHNAME+'/negatives')
    
    #Initiate getimage Function and write to the randomKeyImageDict
    def getimage(keyPair):
        tileKey2=keyPair
        tile_area2=dl.scenes.DLTile.from_key(tileKey2)
        aoi = AOI(tile_area2.geometry,resolution=2,crs=tile_area2.crs)
        try:
            img_data=ab_bands.scale_values(0,255).compute(aoi,progress_bar=False,timeout=TIMEOUT)
        except JobTimeoutError:
            print('Low Res Time Out, key:',tileKey2)
            return
        if (img_data is not None):
            if (img_data.ndarray is not None):
                keyImageDict[tileKey2]=img_data.ndarray.data[:,:320,:320]
                
    #Chunk out the images to pass to getimage to save as we go            
    print('Getting Images!')
    steps=math.ceil(len(total_dt)/DATASIZE)
    for x in range(STARTFROM,steps):
        keyImageDict.clear()
        
        start=x*DATASIZE
        if (x != steps-1):
            end=(x+1)*DATASIZE
        else:
            end=(len(total_dt))
        
        #Use threadpooling to run through each image
        print('Group#',x, 'of',steps-1)
        with tqdm(total=len(total_dt[start:end])) as pbar:
            with ThreadPool(processes=64) as pool:
                for _ in pool.imap_unordered(getimage,total_dt[start:end]):
                    pbar.update()
        
        #Pass the finished key-image dict to our saving function
        print('Saving this batch')
        create_CSV_from_dict_keys(confirmed,denied,keyImageDict,start)
    print('Done!')
    

In [27]:
def get_3_band_low_images(total_dt):
    #Initiate 10 band s2 satellite and filter by cloud_fraction
    bands = "red green blue"

    s2=wf.ImageCollection.from_id(
        "esa:sentinel-2:l2a:v1",
        start_datetime='2018-11-01',
        end_datetime='2019-03-01',
    )

    s2_bands = s2.pick_bands(bands)
    s2_bands = s2_bands.filter(lambda img: img.properties["cloud_fraction"]<0.1)
    s2_bands = s2_bands.median(axis="images")
    
    keyImageDict={}
    if not os.path.exists(PATHNAME):
        os.mkdir(PATHNAME)
    if not os.path.exists(PATHNAME+'/positives'):
        os.mkdir(PATHNAME+'/positives')
    if not os.path.exists(PATHNAME+'/negatives'):
        os.mkdir(PATHNAME+'/negatives')
    
    #Initiate getimage Function and write to the randomKeyImageDict
    def getimage(keyPair):
        tileKey2=keyPair
        tile_area2=dl.scenes.DLTile.from_key(tileKey2)
        try:
            img_data=s2_bands.compute(tile_area2,progress_bar=False,timeout=TIMEOUT)
        except JobTimeoutError:
            print('Low Res Time Out, key:',tileKey2)
            return
        if (img_data is not None):
            if (img_data.ndarray is not None):
                sr=cv2.dnn_superres.DnnSuperResImpl_create()
                path="ESPCN_x4.pb"
                sr.readModel(path)
                sr.setModel('espcn',4)
                rgb=np.transpose(img_data.ndarray.data)
                rgb=rgb-np.min(rgb)
                rgb=rgb/np.max(rgb)
                rgb = np.uint8(255 * rgb)
                result=sr.upsample(rgb)
                keyImageDict[tileKey2]=result
                
    #Chunk out the images to pass to getimage to save as we go            
    print('Getting Images!')
    steps=math.ceil(len(total_dt)/DATASIZE)
    for x in range(STARTFROM,steps):
        keyImageDict.clear()
        
        start=x*DATASIZE
        if (x != steps-1):
            end=(x+1)*DATASIZE
        else:
            end=(len(total_dt))
        
        #Use threadpooling to run through each image
        print('Group#',x, 'of',steps-1)
        with tqdm(total=len(total_dt[start:end])) as pbar:
            with ThreadPool(processes=64) as pool:
                for _ in pool.imap_unordered(getimage,total_dt[start:end]):
                    pbar.update()
        
        #Pass the finished key-image dict to our saving function
        print('Saving this batch')
        create_CSV_from_dict_keys(confirmed,denied,keyImageDict,start)
    print('Done!')
    

## FOR STANDARD DATASET GENERATION

In [28]:
#"""
storage=Storage(auth=Auth(client_id=config.ID,client_secret=config.SECRET))
confirmed=pkl.loads(decompress(storage.get(datasetName+'Confirmed')))
denied=pkl.loads(decompress(storage.get(datasetName+'Denied')))

total_dt={}
total_dt.update(denied)
total_dt.update(confirmed)
#"""

## FOR FULL COUNTRY ANALYSIS

In [29]:
"""
import pandas as pd
totalDataset=pd.read_csv('../Predicted_Datasets/'+datasetName+'.csv').to_dict()
confirmed=[]
denied=[]
for idx,key in enumerate(totalDataset['tile_key'].values()):
    if totalDataset['prediction'][idx]:
        confirmed.append(key)
    else:
        denied.append(key)
total_dt=confirmed.extend(denied)
"""

"\nimport pandas as pd\ntotalDataset=pd.read_csv('../Predicted_Datasets/'+datasetName+'.csv').to_dict()\nconfirmed=[]\ndenied=[]\nfor idx,key in enumerate(totalDataset['tile_key'].values()):\n    if totalDataset['prediction'][idx]:\n        confirmed.append(key)\n    else:\n        denied.append(key)\ntotal_dt=confirmed.extend(denied)\n"

In [30]:
print(len(confirmed))
print(len(denied))
print(len(total_dt))

653
1739
2392


## GET THEM IMAGES

In [31]:
get_10_band_images(list(total_dt))

Getting Images!
Group# 0 of 0


  0%|          | 0/2392 [00:00<?, ?it/s]

Saving this batch
Downloading Images to Image_Datasets/int_0_updatedModel_state_sampled_pakistan_dl_dataset/ folders!


  0%|          | 0/2392 [00:00<?, ?it/s]

Writing to metadata.csv in Image_Datasets/int_0_updatedModel_state_sampled_pakistan_dl_dataset/!
Done!


In [None]:
print('hi')