# Download and Import Libraries 

In [None]:
!pip install cdflib
!pip install xarray

import glob
import pandas as pd
import cdflib
import xarray as xr
from datetime import datetime, timedelta
from PIL import Image, ImageOps
import regex as re
import os


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3[0m[39;49m -> [0m[32;49m22.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3[0m[39;49m -> [0m[32;49m22.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


# Helper Functions

In [None]:
def preprocess_image(images):
    normalized_images = []
    for image in images:
        new_image = image - np.percentile(image, 1) # subtract 1st percentile
        new_image = new_image / np.percentile(image, 99) # devide by 99th percentile
        new_image = np.maximum(np.zeros(new_image.shape), new_image) # set negative to 0
        new_image = np.minimum(np.ones(new_image.shape), new_image) # set upper bound to 1
        new_image = (new_image * 255).astype(np.uint8)
        normalized_images.append(new_image)
    return normalized_images

def convert_cdf_to_png(images, times, dest_dir):
    # preprocess images
    images = preprocess_image(images)

    for image, time_str in zip(images, times):

        # convert to PIL Image
        im = Image.fromarray(image)

        # grayscale
        im = ImageOps.grayscale(im)

        # save as png in destination format
        im.save(f'{dest_dir}/image_{time_str}.png')

# Load Image Datasets

This file expects image data to be stored under `data/cdfs/images/fsmi_image/` and each date should have own folder in the format of `YYYYMMDD`. Each cdf file contains all sky images taken once in 3 seconds. Images and times are stored in `thg_asf_fsmi` and `thg_asf_fsmi_epoch`, respectively.

In [None]:
datasets = {}
filenames = {}
days = []

for day_dir in glob.glob('data/cdfs/images/fsmi_image/*'):
    
    day = int(re.findall(r'data/cdfs/images/fsmi_image/200802([\d]{2})', day_dir)[0])
    
    day_datasets = {}
    day_filenames = []

    for files in glob.glob(day_dir + '/*'):
        day_filenames.append(files)
        xarr = cdflib.cdf_to_xarray(files, to_datetime=True)
        images = xarr['thg_asf_fsmi'].to_numpy()
        times = xarr['thg_asf_fsmi_epoch'].dt.strftime('%Y%m%d%H%M%S').to_numpy()
        day_datasets[files] = (images, times)
    
    print(f'complete loading day {day}')
    
    days.append(day)
    datasets[day] = day_datasets
    filenames[day] = day_filenames


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3[0m[39;49m -> [0m[32;49m22.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3[0m[39;49m -> [0m[32;49m22.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
complete loading day 16
complete loading day 11
complete loading day 18
complete loading day 20
complete loading day 21
complete loading day 19
complete loading day 10
complete loading day 17
complete loading day 28
complete loading day 4
complete loading day 3
complete loading day 2
complete loading day 5
complete loading day 12
complete loading day 15
complete loading day 23
complete loading day 24
complete loading day 25
complete loading day 22
complete loading day 14
complete loading day 13
comp

# Save Images as png

Images are preprocessed and saved as png under `/data/images/fsmi/YYYYMMDD/`. 

In [None]:
for day in datasets:
    date_dir = re.findall(r'data/cdfs/images/fsmi_image/([\d]{8})', filenames[day][0])[0]
    dest_path = 'data/images/fsmi/' + date_dir
    
    os.makedirs(dest_path)
    
    for filename, dataset in datasets[day].items():
        convert_cdf_to_png(dataset[0], dataset[1], dest_path)
    
    print(f'complete saving images of day {day}')

complete saving images of day 16
complete saving images of day 11
complete saving images of day 18
complete saving images of day 20
complete saving images of day 21
complete saving images of day 19
complete saving images of day 10
complete saving images of day 17
complete saving images of day 28
complete saving images of day 4
complete saving images of day 3
complete saving images of day 2
complete saving images of day 5
complete saving images of day 12
complete saving images of day 15
complete saving images of day 23
complete saving images of day 24
complete saving images of day 25
complete saving images of day 22
complete saving images of day 14
complete saving images of day 13
complete saving images of day 1
complete saving images of day 8
