In [1]:
from sentinelsat import SentinelAPI, read_geojson, geojson_to_wkt
from osgeo import gdal
from rasterio import rio

In [2]:
import pandas as pd

In [3]:
from datetime import date

In [4]:
import requests

In [5]:
import threading
import multiprocessing

In [6]:
import os
import shutil
from glob import glob
import zipfile

In [7]:
USERNAME = ["barmasushma1", "SumanjaliDamarla"]
PASSWORD = ["barmasushma1", "D.Sumanjali1"]
API_SOURCE = "https://scihub.copernicus.eu/dhus"
PROCESSING_LEVEL = ["Level-1", "Level-2"]
SATELLITE = "Sentinel-2"
STUDY_AREA = "StudyArea.geojson"

In [8]:
footprint = geojson_to_wkt(read_geojson(STUDY_AREA))

In [9]:
api = []
for i in range(len(USERNAME)):
    api.append(SentinelAPI(USERNAME[i], PASSWORD[i], API_SOURCE))

In [10]:
DAYS = {
    "jan" : '31',
    "feb" : '28',
    "mar" : '31',
    "apr" : '30',
    "may" : '31',
    "jun" : '30',
    "jul" : '31',
    "aug" : '31',
    "sep" : '30',
    "oct" : '31',
    "nov" : '30',
    "dec" : '31'
}
MONTHS = {
    "jan" : '01',
    "feb" : '02',
    "mar" : '03',
    "apr" : '04',
    "may" : '05',
    "jun" : '06',
    "jul" : '07',
    "aug" : '08',
    "sep" : '09',
    "oct" : '10',
    "nov" : '11',
    "dec" : '12'
}

In [11]:
DEC_BEGIN = "20181201"
DEC_END = "20181231"

JAN_BEGIN = "20190101"
JAN_END = "20190131"

FEB_BEGIN = "20190201"
FEB_END = "20190228"

MAR_BEGIN = "20190301"
MAR_END = "20190331"

APR_BEGIN = "20190401"
APR_END = "20190430"

MAY_BEGIN = "20190501"
MAY_END = "20190531"

In [12]:
def queryForProducts(footprint, beg, end, sat):
    return api[0].query(footprint, date=(beg, end),platformname=sat)

In [13]:
def getLevelURLs(df):
    level1 = []
    for i in range(df.shape[0]):
        if PROCESSING_LEVEL[0] in df["processinglevel"][i]:
            level1.append(df["link"][i])

    level2 = []
    for i in range(df.shape[0]):
        if PROCESSING_LEVEL[1] in df["processinglevel"][i]:
            level2.append(df["link"][i])
    
    return level1, level2

In [14]:
def curateLevelProducts(products, df, level):
    keys = list(products.keys())
    for i in range(df.shape[0]):
        if level not in df["processinglevel"][i]:
            products.pop(keys[i])
    return products

In [15]:
def downloadDataUsingThread(prod):
    try:
        t1 = threading.Thread(api[0].download_all(prod))
        t1.start()
        t1.join()
    except AssertionError:
        print("Download of specified products done")
    except:
        print("Something went wrong")

In [16]:
def createDataDirectory(month):
    data_dir = os.getcwd()+"\\data\\"+month+"_data"
    if not os.path.exists(data_dir):
        os.mkdir(data_dir)
    return data_dir
def getDataDirectory(month):
    return os.getcwd()+"\\data\\"+month+"_data"
def createBackupDirectory(month):
    backup = "D:\\"+month+"_data"
    if not os.path.exists(backup):
        os.mkdir(backup)
    return backup
def getBackupDirectory(month):
    return "D:\\"+month+"_data"

In [17]:
def listDataFiles(data_dir, ext):
    paths = glob(data_dir+"/*"+ext)
    fileNames = []
    for path in paths:
        fileNames.append(path.split('.')[0].split('\\')[-1])
    return paths, fileNames

In [18]:
def checkAllFiles(data_det, fileNames):
    product_ids = (list(data_det.keys()))
    desired_files = []
    for pid in product_ids:
        desired_files.append(data_det[pid]['title'])
    return sorted(desired_files) == sorted(fileNames)

In [19]:
def arrangeData(month, curr_path, data_det):
    curr_paths, FileNames = listDataFiles(curr_path, ".zip")
    if not (checkAllFiles(data_det, FileNames)):
        raise Exception("All files are not downloaded")
    else:
        print("All files  Downloaded successfully")
    dst_dir = createDataDirectory(month)
    backup = createBackupDirectory(month)
    for path in curr_paths:
        print("Backup files to "+backup)
        shutil.copy2(path, backup)
        print("Moving files to "+dst_dir)
        shutil.move(path, dst_dir)
    return dst_dir

In [20]:
def extractFiles(data_dir):
    paths, fileNames = listDataFiles(data_dir, ".zip")
    l = len(paths)
    i = 0
    for path in paths:
        print(str(i)+"/"+str(l)+"  Extracting "+path.split('.')[0].split('\\')[-1]+"...")
        with zipfile.ZipFile(path, 'r') as zip_ref:
            zip_ref.extractall(data_dir)

In [21]:
def cleanup(data_dir):
    paths, fileNames = listDataFiles(data_dir, ".zip")
    l = len(paths)
    i = 0
    for path in paths:
        print(str(i)+"/"+str(l)+"Deleting "+path.split('.')[0].split('\\')[-1]+"...")
        os.remove(path)

In [22]:
dec_products = queryForProducts(footprint, DEC_BEGIN, DEC_END,SATELLITE)
jan_products = queryForProducts(footprint, JAN_BEGIN, JAN_END,SATELLITE)
feb_products = queryForProducts(footprint, FEB_BEGIN, FEB_END,SATELLITE)
mar_products = queryForProducts(footprint, MAR_BEGIN, MAR_END,SATELLITE)
apr_products = queryForProducts(footprint, APR_BEGIN, APR_END,SATELLITE)
may_products = queryForProducts(footprint, MAY_BEGIN, MAY_END,SATELLITE)

In [23]:
dec_df = api[0].to_dataframe(dec_products)
jan_df = api[0].to_dataframe(jan_products)
feb_df = api[0].to_dataframe(feb_products)
mar_df = api[0].to_dataframe(mar_products)
apr_df = api[0].to_dataframe(apr_products)
may_df = api[0].to_dataframe(may_products)

In [24]:
dec_level1, dec_level2 = getLevelURLs(dec_df)
jan_level1, jan_level2 = getLevelURLs(jan_df)
feb_level1, feb_level2 = getLevelURLs(feb_df)
mar_level1, mar_level2 = getLevelURLs(mar_df)
apr_level1, apr_level2 = getLevelURLs(apr_df)
may_level1, may_level2 = getLevelURLs(may_df)

In [25]:
dec_data_det = curateLevelProducts(dec_products, dec_df, PROCESSING_LEVEL[1])
jan_data_det = curateLevelProducts(jan_products, jan_df, PROCESSING_LEVEL[1])
feb_data_det = curateLevelProducts(feb_products, feb_df, PROCESSING_LEVEL[1])
mar_data_det = curateLevelProducts(mar_products, mar_df, PROCESSING_LEVEL[1])
apr_data_det = curateLevelProducts(apr_products, apr_df, PROCESSING_LEVEL[1])
may_data_det = curateLevelProducts(may_products, may_df, PROCESSING_LEVEL[1])

In [26]:
months = [
    "Dec", "Jan", "Feb", "Mar", "Apr", "May"
]
levelCount = [
    [dec_df.shape[0], len(dec_level1), len(dec_level2)],
    [jan_df.shape[0], len(jan_level1), len(jan_level2)],
    [feb_df.shape[0], len(feb_level1), len(feb_level2)],
    [mar_df.shape[0], len(mar_level1), len(mar_level2)],
    [apr_df.shape[0], len(apr_level1), len(apr_level2)],
    [may_df.shape[0], len(may_level1), len(may_level2)]
]

print("      Total  L1  L2")
for i in range(len(months)):
    print(months[i] + "   " + str(levelCount[i][0]) + "     " + str(levelCount[i][1]) + "  " + str(levelCount[i][2]))

      Total  L1  L2
Dec   65     42  23
Jan   90     45  45
Feb   74     37  37
Mar   84     42  42
Apr   84     42  42
May   84     42  42


In [None]:
downloadDataUsingThread(dec_data_det)
dec_data_dir = arrangeData("dec", os.getcwd(), dec_data_det)

In [None]:
downloadDataUsingThread(jan_data_det)
jan_data_dir = arrangeData("jan", os.getcwd(), jan_data_det)

In [28]:
downloadDataUsingThread(feb_data_det)
feb_data_dir = arrangeData("feb", os.getcwd(), feb_data_det)

Downloading:  30%|███       | 342M/1.13G [50:57<1:57:50, 112kB/s]  
MD5 checksumming: 100%|██████████| 342M/342M [00:00<00:00, 531MB/s] 
Invalid checksum. The downloaded file for '8b3663da-902d-4199-83ec-59a165b20774' is corrupted.
Downloading:   9%|▉         | 102M/1.13G [43:41<7:22:34, 38.8kB/s]  

Something went wrong





Exception: All files are not downloaded

In [None]:
downloadDataUsingThread(mar_data_det)
mar_data_dir = arrangeData("mar", os.getcwd(), mar_data_det)

In [None]:
downloadDataUsingThread(apr_data_det)
apr_data_dir = arrangeData("apr", os.getcwd(), apr_data_det)

In [None]:
downloadDataUsingThread(may_data_det)
may_data_dir = arrangeData("may", os.getcwd(), may_data_det)

In [None]:
extractFiles(dec_data_dir)
extractFiles(jan_data_dir)
extractFiles(feb_data_dir)
extractFiles(mar_data_dir)
extractFiles(apr_data_dir)
extractFiles(may_data_dir)

In [None]:
dec_data_dir = getDataDirectory("dec")
extractFiles(dec_data_dir)
cleanup(dec_data_dir)

In [None]:
jan_data_dir = getDataDirectory("jan")
extractFiles(jan_data_dir)
cleanup(jan_data_dir)

In [None]:
feb_data_dir = getDataDirectory("feb")
extractFiles(feb_data_dir)
cleanup(feb_data_dir)

In [None]:
mar_data_dir = getDataDirectory("mar")
extractFiles(mar_data_dir)
cleanup(mar_data_dir)

In [None]:
apr_data_dir = getDataDirectory("apr")
extractFiles(apr_data_dir)
cleanup(apr_data_dir)

In [None]:
may_data_dir = getDataDirectory("may")
extractFiles(may_data_dir)
cleanup(may_data_dir)