In [25]:
import os
import numpy
import bz2
from datetime import datetime, timedelta

In [26]:
WORKSPACE_FOLDER = os.getcwd()
AMATERASS_RSD_FOLDER = '/data01/people/beichen/workspace/AMATERASS_Ta'

In [27]:
START_TIME = '2018-01-01T00:00:00Z' # local time
END_TIME = '2019-12-31T23:59:59Z'

UTC_OFFSET = 9 # hour
time_internal = 10  # mins

pixel_size = 0.01 # 0.01°
# AMATERASS extent
amaterass_extent = (120.0, 150.0, 22.4, 47.6) # l_lon, r_lon, b_lat, t_lat

In [28]:
tky_site_loc = (36.146, 137.423) # lat, lon
crk_site_loc = (38.201, 127.251) # lat, lon
gck_site_loc = (37.748, 127.162) # lat, lon
gdk_site_loc = (37.749, 127.149) # lat, lon
site_names = ['TKY','CRK', 'GCK', 'GDK']
site_locs = [tky_site_loc, crk_site_loc, gck_site_loc, gdk_site_loc]

In [29]:
def read_AMATERASS_data(bz2_filename, site_locs):
    if os.path.exists(bz2_filename):
        try:
            bz2_file = bz2.BZ2File(bz2_filename)
            bz2_data = bz2_file.read()
            jp_data = numpy.frombuffer(bz2_data, dtype='>f4').reshape(2521, 3001)

            site_array = []
            for site_loc in site_locs:
                site_lat_idx = int((amaterass_extent[3] - site_loc[0])/pixel_size)
                site_lon_idx = int((site_loc[1] - amaterass_extent[0])/pixel_size)
                site_v = jp_data[site_lat_idx][site_lon_idx]
                site_array.append(site_v)
            return numpy.array(site_array)
        except Exception as e:
            print(bz2_filename)
            print(e)
            nan_array = numpy.zeros(4,)
            nan_array[nan_array==0] = numpy.NaN
            return nan_array
    else:
        nan_array = numpy.zeros(4,)
        nan_array[nan_array==0] = numpy.NaN
        return nan_array

In [30]:
utc_start_date = datetime.strptime(START_TIME, "%Y-%m-%dT%H:%M:%SZ") - timedelta(hours=UTC_OFFSET)
utc_end_date = datetime.strptime(END_TIME, "%Y-%m-%dT%H:%M:%SZ") - timedelta(hours=UTC_OFFSET)

site_record = []
for site_idx in range(len(site_names)):
    site_record.append([])
temp_date = utc_start_date
while temp_date < utc_end_date:
    current_time_str = temp_date.strftime("%Y%m%d%H%M")
    month_folder = temp_date.strftime("%Y%m")
    day_folder = temp_date.strftime("%Y%m%d")
    file_path = AMATERASS_RSD_FOLDER + '/' + month_folder + '/' + day_folder + '/' + current_time_str + '.tsfc.msm.1km.bin.bz2'
    site_vs = read_AMATERASS_data(file_path, site_locs)
    
    this_day_date = temp_date + timedelta(hours=UTC_OFFSET)
    this_day_time = this_day_date.strftime("%Y-%m-%dT%H:%M:%SZ")
    for site_idx in range(len(site_names)):
        site_record[site_idx].append([this_day_time, str(site_vs[site_idx])])
    
    temp_date = temp_date + timedelta(minutes=time_internal)

In [31]:
for site_idx in range(len(site_names)):
    site_name = site_names[site_idx]
    site_record_item = numpy.array(site_record[site_idx])
    numpy.savetxt(os.path.join(WORKSPACE_FOLDER, site_name+'_AMATERASS_Ta'+'.csv'), site_record_item, delimiter=",", fmt='%s')