In [1]:
import os
import sys
sys.path.append("../")
import settings
from datetime import date, datetime
import time
from IPython.display import clear_output
import traceback

from prediction_dataset import PredictionDataset
from process_sentinel3 import OLCIdataGenerator
from sentinelsat_download import SentinelsatProducts



In [2]:
footprint = settings.footprint
data_path = settings.raw_data_path

### Download all data between two dates

In [None]:
date1 = date(2021, 1, 1)
date2 = date(2021, 12, 31)

while True:
    try:
        datos = SentinelsatProducts(date1, date2, footprint=footprint, 
                                    platformname="Sentinel-3", instrument="SLSTR", 
                                    level="L2", p_type="SL_2_LST___")
        #datos.filter_products(instrument="SLSTR", level="L2", p_type="SL_2_LST___", timeliness="Non Time Critical")
        datos.download_products(data_path)
    except Exception as e:
        clear_output(wait=True)
        print("/"*20)
        print(str(e))
        print("/"*20)
        time.sleep(60*10)

### Download data from valid Sentinel-2 days

In [None]:
DATA_PATH = settings.data_path
DATE_FORMAT = '%Y-%m-%d'
START_DATE = '2016-12-21'
END_DATE = '2022-04-20'

data_generator = DayDataGenerator(START_DATE, END_DATE, DATE_FORMAT, DATA_PATH, skip_invalid=True)
valid_dates = [day.date.date() for day in data_generator]
print("Total days to download:", len(valid_dates))
while True:
    date1 = date(2016, 12, 21)
    date2 = date(2022, 4, 20)
    try:
        datos = SentinelsatProducts(date1, date2, footprint=footprint, 
                                    platformname="Sentinel-3", instrument="SLSTR", 
                                    level="L2", p_type="SL_2_LST___")
        datos.filter_by_dates(valid_dates)
        datos.download_products(data_path)
    except Exception as e:
        clear_output(wait=True)
        print("/"*20)
        print(traceback.format_exc())
        print("/"*20)
        time.sleep(60*10)

In [None]:
def get_folder_size(folder_path):
    size = 0
    for path, dirs, files in os.walk(folder_path):
        for f in files:
            fp = os.path.join(path, f)
            size += os.path.getsize(fp)/1024/1024
    return size

def get_sentinel_folder_datetime(folder_path):
    folder_name = os.path.basename(folder_path)
    return datetime.strptime(folder_name.split("____")[1].split("_")[0][0:15], "%Y%m%dT%H%M%S")
    

dirs_to_delete = {}
for day_dir in os.listdir(DATA_PATH):
    sen3_dirs = [os.path.join(DATA_PATH, day_dir, sat_data) for sat_data in os.listdir(os.path.join(DATA_PATH, day_dir)) if sat_data.endswith("SEN3")]
    sen3_dir_size = [get_folder_size(s3_data) for s3_data in sen3_dirs]    
    sen3_dir_dt = [get_sentinel_folder_datetime(s3_data) for s3_data in sen3_dirs]    
    
    small_dirs = [[sen3_dirs[i], dir_size] for i, dir_size in enumerate(sen3_dir_size) if dir_size < 1000]
    big_dirs = [[sen3_dirs[i], dir_size] for i, dir_size in enumerate(sen3_dir_size) if dir_size > 1000]
    
    if len(small_dirs) > 0:
        for big_dir in big_dirs:
            dirs_to_delete[big_dir[0]] = big_dir[1]  

### Download data from relevant Sentinel-3 days

In [3]:
processed_data_path = settings.processed_data_path

DATE_FORMAT = '%Y-%m-%d'

ndci_data_path = os.path.join(settings.final_data_path, "kmeans_3", "ndci_data.csv")
precipitation_data_path = os.path.join(settings.final_data_path, "precipitation", "precipitation_pdir.csv")
wind_data_path = os.path.join(settings.final_data_path, "wind_data.csv")
water_temperature_data_path = os.path.join(settings.final_data_path, "water_temperature.csv")
algae_gt_path = os.path.join(settings.final_data_path, "algae_gt.csv")
ta_ose_path = os.path.join(settings.final_data_path, "agua_open_data(1).csv")
s3_brrs_path = os.path.join(settings.final_data_path, "s3_brrs.json")

raw_cyano_gt_path = os.path.join(settings.final_data_path, "MUESTREOS_SEMANALES_2016-2017-2018-2019-2020.xls")

# bloom_thresholds = {
#                     "MALLORQUINA":5000,
#                     "ARROYO SAUCE": 5000,
#                     "SAUCE NORTE": 5000,
#                     "SAUCE SUR": 5000,
#                     "CISNES": 5000,
#                     "POTRERO": 5000,
#                     "TA":5000
#                    }
bloom_thresholds = {
                    "MALLORQUINA":5000,
                    "ARROYO SAUCE": 5000,
                    "SAUCE NORTE": 5000,
                    "SAUCE SUR": 5000,
                    "CISNES": 5000,
                    "POTRERO": 5000,
                    "TA":3000
                   }

sampling_points_coords = {"SAUCE NORTE": [-34.795398, -55.047355],
                          "SAUCE SUR": [-34.843127, -55.064624],
                          "TA": [-34.829670, -55.049758]}

pre_bloom_max_days = 7

In [4]:
dataset = PredictionDataset(wind_data_path, water_temperature_data_path, precipitation_data_path, 
                            ndci_data_path, algae_gt_path, s3_brrs_path, bloom_thresholds=bloom_thresholds, 
                            pre_bloom_max_days=pre_bloom_max_days)

gt_dates = set([d.date() for d in dataset.algae_gt.date.tolist()])

olci_data_generator = OLCIdataGenerator(processed_data_path, DATE_FORMAT, skip_invalid=True)
clear_olci_dates = set([olci_data.date.date() for olci_data in olci_data_generator])

relevant_dates = list(gt_dates.intersection(clear_olci_dates))
relevant_dates.sort()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  location_df['label'] = location_df['fico'].apply(lambda x: 'Bloom' if x > threshold else 'No Bloom')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  location_data_df["days until bloom"] = days_until_bloom


In [None]:
while True:
    date1 = relevant_dates[0]
    date2 = relevant_dates[-1]
    try:
        datos = SentinelsatProducts(date1, date2, footprint=footprint, 
                                    platformname="Sentinel-3", instrument="SLSTR", 
                                    level="L2", p_type="SL_2_LST___")
        datos.filter_by_dates(relevant_dates, filter_time_of_day=True)
        datos.download_products(data_path)
    except Exception as e:
        clear_output(wait=True)
        print("/"*20)
        print(traceback.format_exc())
        print("/"*20)
        time.sleep(60*10)