**About** : This notebook is used to prepare the data.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
cd ../src/

## Initialization

### Imports

In [None]:
import os
import sys
import cv2
import glob
import json
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm
from sklearn.linear_model import *

pd.set_option('display.width', 500)
pd.set_option('max_colwidth', 100)

In [None]:
from util.plots import *
from params import *
from data.preparation import load_record, get_false_color_img

In [None]:
# pip install goes
# pip install pyresample

import GOES
import pyproj
import pyresample 

from pyresample.geometry import SwathDefinition
from pyresample.kd_tree import resample_nearest
from pyresample.bilinear import NumpyBilinearResampler

In [None]:
def resample(ds):
    domain = [-120, -70, 5, 40]
    
    CMI, LonCen, LatCen = ds.image('Rad', lonlat='center', domain=domain, up_level=True)

    LonCenCyl, LatCenCyl = GOES.create_gridmap(domain, PixResol=2.0)
    LonCorCyl, LatCorCyl = GOES.calculate_corners(LonCenCyl, LatCenCyl)
    
    Prj = pyproj.Proj('+proj=eqc +lat_ts=0 +lat_0=0 +lon_0=0 +x_0=0 +y_0=0 +a=6378.137 +b=6378.137 +units=km')
    AreaID = 'cyl'
    AreaName = 'cyl'
    ProjID = 'cyl'
    Proj4Args = '+proj=eqc +lat_ts=0 +lat_0=0 +lon_0=0 +x_0=0 +y_0=0 +a=6378.137 +b=6378.137 +units=km'

    ny, nx = LonCenCyl.data.shape
    SW = Prj(LonCenCyl.data.min(), LatCenCyl.data.min())
    NE = Prj(LonCenCyl.data.max(), LatCenCyl.data.max())
    area_extent = [SW[0], SW[1], NE[0], NE[1]]

    AreaDef = pyresample.utils.get_area_def(AreaID, AreaName, ProjID, Proj4Args, nx, ny, area_extent)
    SwathDef = SwathDefinition(lons=LonCen.data, lats=LatCen.data)
    
    CMICyl = resample_nearest(
        SwathDef, CMI.data, AreaDef, radius_of_influence=600000, # fill_value=np.nan, epsilon=1, reduce_data=False
    )
    
    # slow
    # resampler = NumpyBilinearResampler(SwathDef, AreaDef, radius_of_influence=6000, epsilon=3, reduce_data=True)
    # CMICyl = resampler.resample(CMI.data)

    return CMICyl

## Data

In [None]:
BASE_DIR = '../input/goes16/'

PLOT = False
SAVE = True

In [None]:
SAVE_FOLDER = "../output/goes16_false_color_3/"
os.makedirs(SAVE_FOLDER, exist_ok=True)

In [None]:
DAYS = [
#     '121', '122', '123', '124',
#     '125', '126', '127', '128',
#     '129', '130', '131', '132',
#     '133', '134', '135', '136',
#     '137', '138', '139', '140',
#     '141', '142', '143', '144',
#     '145', '146', '147', '148',
#     '149', '150', '151',
]
DAYS = [
#     '152', '153', '154', '155',
#     '156', '157', '158', '159',
#     '160', '161', '162', '163',
#     '164', '165', '166', '167',
#     '168', '169', '170', '171',
#     '172', '173', '174', '175',
#     '176', '177', '178', '179',
#     '180', '181', '182'
]

DAYS = [
#     '183', '184', '185', '186', '206',
#     '187', '188', '189', '190', '205', 
#     '191', '192', '193', '194', '204',
#     '195', '196', '197', '198', '203',  
#     '199', '200', '201', '202',
    '207', '208', '209', '210', '211',
]

DAYS = sorted(os.listdir(BASE_DIR))

In [None]:
for day in DAYS:
    for hour in tqdm(sorted(os.listdir(BASE_DIR + day))):
        
        root = BASE_DIR + f"{day}/{hour}/"
        files = files = glob.glob(root + "*.nc")
        
#         print(day, hour, len(files))

        imgs = {}
        for file in files:
            band = int(file.split('/')[-1].split('-')[3][3:5])
            date = file.split('/')[-1].split('_')[3][1:]
            
#             if date != "20231210040203":
#                 continue

            ds = GOES.open_dataset(file)
            
            img = resample(ds)

#             domain = [-120, -70, 10, 40]
#             img, LonCor, LatCor = ds.image('Rad', lonlat='corner', domain=domain, up_level=True)  # [-135.0, -30.0, -50.0, 50.0]
#             img = img.data
#             plt.figure(figsize=(15, 15))
#             plt.axis(False)
#             plt.imshow(img)
#             plt.show()
#             break

            try:
                imgs[date][band] = img
            except:
                imgs[date] = {band: img}
                
        for i, k in enumerate(imgs):
            try:
                img = get_false_color_img(imgs[k])
            except:
                continue
            
            if PLOT and (i == 0):
                plt.figure(figsize=(15, 15))
                plt.axis(False)
                plt.imshow(img)
                plt.show()
                
            if SAVE:
                cv2.imwrite(SAVE_FOLDER + f"{day}_{hour}_{k}.png", (img * 255).astype(np.uint8))

#         break
#     break

In [None]:
# bands = imgs['20231210040203']
# patches = bands
# img = get_false_color_img(patches)

In [None]:
# %%time
# ds = GOES.open_dataset(file)
# img = resample(ds)

In [None]:
# plt.figure(figsize=(15, 15))
# plt.axis(False)
# plt.imshow(img, cmap="gray")
# plt.show()

## Loop

Done ! 