In [None]:
from matplotlib import pyplot as plt
import matplotlib.dates as mdates
from matplotlib.patches import Rectangle

from datetime import datetime, timedelta
import numpy as np
import pandas as pd
import xarray as xr
import zarr
import fsspec

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
plt.rcParams['figure.figsize'] = 12,8

# !pip install opencv-python-headless
import cv2
from matplotlib.patches import Rectangle
from typing import List, Tuple
import itertools

Error: Kernel is dead

In [176]:
"""
Complete pipeline to find heat events. Process by year.

Input: the np arrays produced by the 1st process before this pipeline. These are the same size 
        arrays that have a heat event flag per iX,iY. But each location of unaware from each other. This
        pipe aggregates them in x,y axeses first, then in time axis.
Output: 3D Bounding boxes of all heat events.
"""

def bounding_boxes(arr2d: np.array) -> List[tuple]:
    
    H = arr2d.astype(np.uint8)
    ret, thresh = cv2.threshold(H, 0, 1, 0, cv2.THRESH_BINARY)
    contours, hier = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    boxes = [cv2.boundingRect(c) for c in contours]
    return boxes

def isoverlap(box1:tuple, box2:tuple) -> bool:
    """Return True if two windows overlap"""
    x1,y1,w1,h1 = box1
    x2,y2,w2,h2 = box2
    return not (x2>x1+w1 or x2+w2<x1 or y2>y1+h1 or y2+h2<y1)

def outer(box1:tuple, box2:tuple) -> tuple:
    """Fuse two windows into one, parent window."""
    x1,y1,w1,h1 = box1
    x2,y2,w2,h2 = box2
    x = min(x1,x2)
    y = min(y1,y2)
    w = max(x1+w1,x2+w2)-x
    h = max(y1+h1,y2+h2)-y
    return (x, y, w, h)

def istiny(box:tuple, min_area:int) -> bool:
    x,y,w,h = box
    return w*h <= min_area

def filter_tiny_ones(boxes:List[tuple]) -> List[tuple]:
    return [c for c in boxes if not istiny(c, 10)]

def collapse(boxes:List[tuple]) -> List[tuple]:
    
    for box1, box2 in itertools.combinations(boxes, 2):
        if isoverlap(box1,box2):
            boxes.remove(box1)
            boxes.remove(box2)
            boxes.append(outer(box1,box2))
            return collapse(boxes) # recursion

    boxes.sort(key=lambda _:_[0])
    return boxes

def array2boxes(arr2d:np.array) -> List[tuple]:
    """Pipeline. Takes a time-slice (2D array) and returns the boxes."""
    boxes = bounding_boxes(arr2d)
    boxes = filter_tiny_ones(boxes)
    boxes = collapse(boxes)
    return boxes

def groupby_heat_events(arr3d:np.array) -> List[dict]:
    rows = []
    num_days = arr3d.shape[0]

    for i in range(num_days):
        arr2d = arr3d[i,:,:]
        boxes = array2boxes(arr2d)
        rows += [dict(time=i, boxes=boxes)]
    df = pd.DataFrame(rows)
    df['hasEvent'] = df['boxes'].apply(lambda x: len(x)) > 0
    df['label'] = df['hasEvent'].diff().ne(False).cumsum()

    dff = df[df['hasEvent']]

    dfg = dff.groupby('label').agg({
        'time':[np.min,np.max], 
        'boxes':lambda _: collapse(np.sum(_))
    }).reset_index()
    dfg.columns = ['label', 'i1', 'i2', 'boxes']
    dfg = dfg.assign(d1=dr[dfg['i1']], d2=dr[dfg['i2']])
    dfg = dfg.drop('label', axis=1)
    
    return dfg


# Test it

year = 1971
dr = pd.date_range(start=f'1/1/{year}', periods=365, freq='D').date
arr3d = np.load(f'Koray/CMIP5_flagged/arr_heat3d-{year}.npy')

df_events = groupby_heat_events(arr3d)
df_events

Unnamed: 0,i1,i2,boxes,d1,d2
0,133,140,"[(56, 121, 90, 75)]",1971-05-14,1971-05-21
1,155,162,"[(134, 177, 5, 5), (216, 160, 86, 40)]",1971-06-05,1971-06-12
2,167,169,"[(158, 165, 12, 7)]",1971-06-17,1971-06-19
3,175,180,"[(70, 125, 31, 29), (85, 157, 5, 4), (91, 185,...",1971-06-25,1971-06-30
4,182,195,"[(104, 155, 58, 30), (129, 103, 78, 47)]",1971-07-02,1971-07-15
5,203,206,"[(219, 160, 43, 15)]",1971-07-23,1971-07-26
6,210,222,"[(64, 167, 15, 8), (148, 152, 96, 33), (251, 1...",1971-07-30,1971-08-11
7,224,226,"[(127, 101, 7, 2)]",1971-08-13,1971-08-15
8,228,247,"[(141, 116, 102, 67)]",1971-08-17,1971-09-05
9,249,253,"[(135, 102, 15, 24), (192, 72, 10, 5)]",1971-09-07,1971-09-11


In [None]:
############### INPUTS ######################
lat_min = 0   
lat_max = 50  
lon_min = 220 
lon_max = 300 
############################################# 

area = dict(lat=slice(lat_min, lat_max), lon=slice(lon_min, lon_max))

# temp data
ds = xr.open_mfdataset('Koray/CMIP5/*.nc')


for ev, (i1, i2) in df_events[['i1','i2']].iterrows():

    t_indexes = range(i1,i2+1)

    fig, axs = plt.subplots(1, len(t_indexes), figsize=(24,16))

    for i, idx in enumerate(t_indexes):

        # arr2d = arr3d[idx,:,:]

        ax = axs.ravel()[i]
        # ax.imshow(arr2d, interpolation='none')
        day = dr[idx].strftime("%Y-%m-%d")
        tasmax = ds['tasmax'].sel(**area).sel(time=day) - 273.15
        # ax = tasmax.plot(figsize=(16,10))
        ax.imshow(tasmax.values[0,:,:], extent=[lon_min, lon_max, lat_min, lat_max])
        ax.set_title(day)
        # ax.set_xticks(range(10))
        # ax.set_xticklabels(np.linspace(lon_min, lon_max, 10, dtype=np.int64))
        ax.set_xlabel("longitude [degrees_east]")
        # ax.set_yticklabels(range(lat_min, lat_max, 10))
        ax.set_ylabel("latitude [degrees east]")

        lenx, leny = tasmax.values[0,:,:].shape

        boxes = df_events['boxes'].iloc[ev]
        for b in boxes:
            x,y,w,h = b
            # print(x,y,w,h)
            x = lon_min + x / lenx * (lon_max-lon_min)
            y = lat_min + y / leny * (lat_max-lat_min)
            # print(x,y,w,h)
            rect = Rectangle((x, y), w, h, color='red', fill=False, linewidth=2)
            ax.add_patch(rect)
            
    # fig.suptitle(f"Year {year}, Heat Event {ev}", fontsize=14)
    fig.tight_layout()



Error: Kernel is dead

In [215]:
tasmax.values[0,:,:].shape

(200, 320)

In [231]:
np.linspace(lon_min, lon_max, 10, dtype=np.int64)

array([220, 228, 237, 246, 255, 264, 273, 282, 291, 300])

In [232]:
len(range(10))

10