In [1]:
import settings

In [28]:
DATA_PATH = settings.data_path
DATE_FORMAT = '%Y-%m-%d'
START_DATE = '2016-12-21'
END_DATE = '2021-04-20'

### Define Class to instantiate data from each day

In [18]:
from osgeo import gdal
from datetime import datetime
import numpy as np
import utils
import matplotlib.pyplot as plt
import os
import cv2

S2A_wavelengths = {"B1": 443, "B2": 492, "B3": 560, "B4": 665, "B5": 704, "B6": 740, "B7": 783, "B8": 833, 
                   "B8A": 865, "B9": 945, "B11": 1373, "B12": 1614, "B13": 2202}

S2B_wavelengths = {"B1": 442, "B2": 492, "B3": 559, "B4": 665, "B5": 704, "B6": 739, "B7": 780, "B8": 833, 
                   "B8A": 864, "B9": 943, "B11": 1377, "B12": 1610, "B13": 2186}

class day_data():
    def __init__(self, path):
        self._relevant_bands = ["B2", "B3", "B4", "B5", "B6", "B7"]
        #path to directory with acolite output
        self.data_path = path
        # datetime of captured data
        self.date = self._get_date()
        # get relevant bands as dictionary with keys as band name and values as np arrays
        self.bands = self._get_bands_data()
        # make display-ready rgb array from band data
        self.rgb = self._get_rgb_array()
        # make array with latitude and longitude for every pixel
        self.lat_lon = self._get_lat_lon()
    
    def _get_date(self):
        for file_name in os.listdir(self.data_path):
            if "MSI" in file_name:
                return datetime.strptime(file_name[8:27], '%Y_%m_%d_%H_%M_%S')
            
    def _get_bands_data(self):
        output = {}
        for file_name in os.listdir(self.data_path):
            try:
                band_wavelength = int(file_name.split("_")[-1].split(".")[0])
            except ValueError:
                continue
            if "S2A" in file_name:
                band_name = list(S2A_wavelengths.keys())[list(S2A_wavelengths.values()).index(band_wavelength)]
            elif "S2B" in file_name: 
                band_name = list(S2A_wavelengths.keys())[list(S2B_wavelengths.values()).index(band_wavelength)]
            else:
                continue
            if band_name in self._relevant_bands:
                tif_path = os.path.join(self.data_path, file_name)
                ds = gdal.Open(tif_path)
                band = ds.GetRasterBand(1)
                arr = band.ReadAsArray()
                output[band_name] = arr
        return output
                
    def _get_rgb_array(self):
        r = self.bands["B4"]
        g = self.bands["B3"]
        b = self.bands["B2"]
        
        r_stretched = utils.stretch_to_MinMax(r)
        g_stretched = utils.stretch_to_MinMax(g)
        b_stretched = utils.stretch_to_MinMax(b)
        
        rgb_stretched = np.dstack([r_stretched, g_stretched, b_stretched])
        #rgb_95quantile = np.dstack([, g_stretched, b_stretched])
        return rgb_stretched
    
    def _get_lat_lon(self):
        for file_name in os.listdir(self.data_path):
            if file_name.endswith("lat.tif"):
                tif_path = os.path.join(self.data_path, file_name)
                ds = gdal.Open(tif_path)
                band = ds.GetRasterBand(1)
                latitude = band.ReadAsArray()
            if file_name.endswith("lon.tif"):
                tif_path = os.path.join(self.data_path, file_name)
                ds = gdal.Open(tif_path)
                band = ds.GetRasterBand(1)
                longitude = band.ReadAsArray()
        return np.array([latitude, longitude])
    
    def show_rgb(self):
        plt.figure(figsize=(10,10))
        plt.imshow(self.rgb)
        plt.show()
    
    def dominant_color(self, n_colors=3):
        pixels = np.float32(self.rgb.reshape(-1, 3))
        criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 200, .1)
        flags = cv2.KMEANS_RANDOM_CENTERS

        _, labels, palette = cv2.kmeans(pixels, n_colors, None, criteria, 10, flags)
        _, counts = np.unique(labels, return_counts=True)
        
        dominant = palette[np.argmax(counts)]
        return dominant
    
    def get_pos_index(self, lat, lon):
        dist_array = np.zeros(self.lat_lon.shape[:2])
        for i, row in enumerate(dist_array):
            for j, dist in enumerate(row):
                dist_array[i, j] = np.linalg.norm(self.lat_lon[i,j] - np.array([lat, lon], dtype=np.float32))
        result = np.where(dist_array == np.amin(dist_array))
        return list(zip(result[0], result[1]))[0]
    
    def paint_coords(self, coords, color):
        for coord in coords:
            index = self.get_pos_index(coord[0], coord[1])
            self.rgb[index[0], index[1]] = color

### Define generator of day_data

In [32]:
def laguna_data_generator(start_date, end_date, date_format, data_path):
    start_datetime = datetime.strptime(start_date, date_format)
    end_datetime = datetime.strptime(end_date, date_format)

    data_directorys = sorted(os.listdir(data_path))
    data_directorys = [date for date in data_directorys if (not date.startswith(".") and
                                                            datetime.strptime(date, date_format) >= start_datetime and 
                                                            datetime.strptime(date, date_format) <= end_datetime)]
    for directory in data_directorys:
        print(directory)
        if "acolite_output" in os.listdir(os.path.join(data_path, directory)):
            try:
                instance = day_data(os.path.join(data_path, directory, "acolite_output"))
                yield instance
            except Exception as e:
                print("Error in %s: %s" % (directory, str(e)))
                yield None

### Generate data from date range and keep dates of cloudy and non-cloudy days

In [33]:
data_generator = laguna_data_generator(START_DATE, END_DATE, DATE_FORMAT, DATA_PATH)
clear_data = []
cloudy_data = []
for day in data_generator:
    if day == None:
        continue
    if np.std(day.dominant_color()) < 2:
        cloudy_data.append(day.date)
        print("Dia nublado")
    else:
        clear_data.append(day.date)
        print("Dia lindo")

2017-11-04
Dia lindo
2017-11-12
Dia nublado
2017-11-17
Dia lindo
2017-11-19
Dia lindo
2017-11-22
Dia lindo
2017-11-24
2017-11-29
Dia lindo
2017-12-02
Dia nublado
2017-12-07
Dia lindo
2017-12-09
Dia nublado
2017-12-12
Dia nublado
2017-12-14
2017-12-19
Dia nublado
2017-12-22
Dia lindo
2017-12-27
Dia nublado
2017-12-29
Dia lindo
2018-01-01
2018-01-03
2018-01-08
Dia nublado
2018-01-11
Dia lindo
2018-01-16
Dia nublado
2018-01-18
2018-01-21
Dia nublado
2018-01-23
Dia lindo
2018-01-28
Dia nublado
2018-01-31
Dia nublado
2018-02-07
Dia lindo
2018-02-10
Dia lindo
2018-02-12
Dia nublado
2018-02-15
Dia nublado
2018-02-17
Dia lindo
2018-02-20
Dia lindo
2018-02-22
Dia nublado
2018-02-25
Dia nublado
2018-02-27
2018-03-02
Dia lindo
2018-03-04
2018-03-07
Dia lindo
2018-03-09
Dia lindo
2018-03-12
Dia lindo
2018-03-14
Dia lindo
2018-03-17
Dia nublado
2018-03-19
Dia lindo
2018-03-22
Dia lindo
2018-03-24
Dia lindo
2018-03-27
Dia nublado
2018-03-29
Dia lindo
2018-04-01
Dia nublado
2018-04-03
Dia lindo
2018-

In [34]:
print(len(cloudy_data))
print(len(clear_data))

129
125
