In [10]:
import os
import numpy
import re
import random
from scipy.stats import gaussian_kde, pearsonr
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import math
import matplotlib.pyplot as plt

WORK_SPACE = os.getcwd()
RAA_LOC_FOLDER = '/disk1/workspace/20230901'

PIXEL_PAIRS_MAX = 600

MEAN_VALUE_COUNT = 500

MONTH_LABEL = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

def identifer(data):
    down,up = numpy.nanpercentile(data,[0,75])
    IQR = up-down
    lower_limit = down - 1.5*IQR
    upper_limit = up + 1.5*IQR
    result = numpy.where(data > upper_limit,numpy.nan, data)
    result = numpy.where(result < lower_limit,numpy.nan, result)
    return result


def mapping_scatter(Y, X, figure_title='demo', band_name='band3', axis_min=0.0, axis_max=0.8):
    # filter

    if band_name == 'band3':
        axis_max = 0.6

    lim_x = numpy.copy(X)
    lim_y = numpy.copy(Y)

    up4_x = numpy.quantile(lim_x, 0.98, interpolation='higher')
    up4_y = numpy.quantile(lim_y, 0.98, interpolation='higher')
    lim_x[lim_x > up4_x] = numpy.nan
    lim_y[lim_y > up4_y] = numpy.nan

    lim_mask = (lim_x*lim_y)*0+1
    temp_x = lim_x*lim_mask
    temp_y = lim_y*lim_mask
    temp_x = temp_x[~numpy.isnan(temp_x)]
    temp_y = temp_y[~numpy.isnan(temp_y)]

    diff_array = abs(numpy.array(temp_y)-numpy.array(temp_x))/abs(numpy.minimum(numpy.array(temp_x), numpy.array(temp_y)))
    diff_array_filtered = numpy.array(identifer(diff_array))
    show_x = (diff_array_filtered*0+1)*temp_x
    show_y = (diff_array_filtered*0+1)*temp_y
    X = show_x[~numpy.isnan(show_x)]
    Y = show_y[~numpy.isnan(show_y)]
    
    # Calculate the point density
    xy = numpy.vstack([X, Y])
    z = gaussian_kde(xy)(xy)
    idx = z.argsort()
    X, Y, z = X[idx], Y[idx], z[idx]
    # main value
    max_n_idx = idx[-MEAN_VALUE_COUNT:]
    sr = (X[max_n_idx] + Y[max_n_idx])*0.5
#     sr = Y[max_n_idx]
    
    model = LinearRegression()
    x = X.reshape(-1, 1)
    model.fit(x, Y)
    y_pred = model.predict(x)
    k = model.coef_[0]
    b = model.intercept_
    rmse = math.sqrt(mean_squared_error(Y, y_pred))
    r_, p = pearsonr(X, Y)

    # slope r RMSE
    return sr, k, r_, rmse, b

In [13]:
if __name__ == "__main__":

    folder_type_list = ['Ray']
    folder_l1_list = ['26']
    folder_l2_list = ['0']
    
    lc_idx = '7'

    for folder_type in folder_type_list:
        print(folder_type)
        folder_type_path = os.path.join('/disk1/workspace/20230907/Ray_RAA_10', folder_type)
        for folder_l1 in folder_l1_list:
            folder_l1_path = os.path.join(folder_type_path, folder_l1)
            month_sr_b3 = numpy.zeros((12,MEAN_VALUE_COUNT))
            month_slope_b3 = numpy.zeros((12,))
            month_r_b3 = numpy.zeros((12,))
            month_rmse_b3 = numpy.zeros((12,))
            month_offset_b3 = numpy.zeros((12,))
            month_sr_b4 = numpy.zeros((12,MEAN_VALUE_COUNT))
            month_slope_b4 = numpy.zeros((12,))
            month_r_b4 = numpy.zeros((12,))
            month_rmse_b4 = numpy.zeros((12,))
            month_offset_b4 = numpy.zeros((12,))
            for month_idx in range(len(MONTH_LABEL)):
                raa_loc_array = numpy.loadtxt(os.path.join(RAA_LOC_FOLDER, 'RAA_month' + str(month_idx) + '.txt'), delimiter=',')
                month = MONTH_LABEL[month_idx]
                # each png
                misr_SR_band3_item_list = []
                ahi_SR_band3_item_list = []
                misr_SR_band4_item_list = []
                ahi_SR_band4_item_list = []
                for folder_l2 in folder_l2_list:
                    folder_l2_path = os.path.join(folder_l1_path, folder_l2)
                    roi_folder_list = os.listdir(folder_l2_path)
                    for roi_folder in roi_folder_list:
                        roi_infos = roi_folder.split('_')
                        roi_lc = roi_infos[1]
                        roi_lat = float(roi_infos[2])
                        roi_lon = float(roi_infos[3])
                        
                        if roi_lc == lc_idx and roi_lat < 0:
                            roi_loc = [roi_lon, roi_lat]
                            if roi_loc in raa_loc_array:                            
                                roi_folder_path = os.path.join(folder_l2_path, roi_folder)
                                roi_file_list = os.listdir(roi_folder_path)
                                roi_misr_SR_band3_list = []
                                roi_ahi_SR_band3_list = []
                                roi_misr_SR_band4_list = []
                                roi_ahi_SR_band4_list = []
                                for roi_file in roi_file_list:
                                    matchObj = re.search(r'(\d+)_band(\d+)_(\d+).npy', str(roi_file))
                                    if matchObj:
                                        ahi_time_str = matchObj.group(1)
                                        band_str = matchObj.group(2)
                                        # camera_idx_str = matchObj.group(3)

                                        obs_month = ahi_time_str[4:6]
                                        obs_month_idx = int(obs_month) - 1

                                        if obs_month_idx == month_idx:
                                            SR_npy_path = os.path.join(roi_folder_path, roi_file)
                                            ROI_SR_pair = numpy.load(SR_npy_path, allow_pickle=True)[0]
                                            misr_sr = ROI_SR_pair['misr_v3']
                                            ahi_sr = ROI_SR_pair['ahi_sr2misr']
                                            x_3Darray_np_1d = misr_sr.flatten()
                                            x_3Darray_np_1d = x_3Darray_np_1d[~numpy.isnan(x_3Darray_np_1d)]
                                            y_3Darray_np_1d = ahi_sr.flatten()
                                            y_3Darray_np_1d = y_3Darray_np_1d[~numpy.isnan(y_3Darray_np_1d)]
                                            if band_str == '3':
                                                roi_misr_SR_band3_list.extend(x_3Darray_np_1d)
                                                roi_ahi_SR_band3_list.extend(y_3Darray_np_1d)
                                            if band_str == '4':
                                                roi_misr_SR_band4_list.extend(x_3Darray_np_1d)
                                                roi_ahi_SR_band4_list.extend(y_3Darray_np_1d)
                                # keep pixel count same
                                if len(roi_misr_SR_band3_list) == len(roi_misr_SR_band4_list):
                                    misr_SR_band3_item_list.extend(roi_misr_SR_band3_list)
                                    ahi_SR_band3_item_list.extend(roi_ahi_SR_band3_list)
                                    misr_SR_band4_item_list.extend(roi_misr_SR_band4_list)
                                    ahi_SR_band4_item_list.extend(roi_ahi_SR_band4_list)

#                 print('Random NO.:', PIXEL_PAIRS_MAX)
#                 print(folder_l1)
#                 print('MISR_SR_Band3_NO.', 'AHI_SR_Band3_NO.', 'MISR_SR_Band4_NO.', 'AHI_SR_Band4_NO.')
#                 print(len(misr_SR_band3_item_list), len(ahi_SR_band3_item_list), len(misr_SR_band4_item_list), len(ahi_SR_band4_item_list))

                if len(misr_SR_band3_item_list) > PIXEL_PAIRS_MAX:
                    # random pairs mapping
                    index_array = random.sample([idx for idx in range(len(misr_SR_band3_item_list))], PIXEL_PAIRS_MAX)
                    index_array = numpy.sort(index_array).tolist()

                    misr_SR_band3_pts = numpy.array(misr_SR_band3_item_list)
                    show_misr_sr_b3 = misr_SR_band3_pts[index_array]
                    ahi_SR_band3_pts = numpy.array(ahi_SR_band3_item_list)
                    show_ahi_sr_b3 = ahi_SR_band3_pts[index_array]
                    figure_title = folder_l1 + '_b3' + '_' + str(month_idx) + month + '_' + str(PIXEL_PAIRS_MAX)
                    sr_b3, slope_b3, r_b3, rmse_b3, offset_b3 = mapping_scatter(show_ahi_sr_b3, show_misr_sr_b3, figure_title, 'band3')
                    month_sr_b3[month_idx] = sr_b3[:]
                    month_slope_b3[month_idx] = round(slope_b3, 2)
                    month_r_b3[month_idx] = round(r_b3, 2)
                    month_rmse_b3[month_idx] = round(rmse_b3, 3)
                    month_offset_b3[month_idx] = round(offset_b3, 2)

                    misr_SR_band4_pts = numpy.array(misr_SR_band4_item_list)
                    show_misr_sr_b4 = misr_SR_band4_pts[index_array]
                    ahi_SR_band4_pts = numpy.array(ahi_SR_band4_item_list)
                    show_ahi_sr_b4 = ahi_SR_band4_pts[index_array]
                    figure_title = folder_l1 + '_b4' + '_' + str(month_idx) + month + '_' + str(PIXEL_PAIRS_MAX)
                    sr_b4, slope_b4, r_b4, rmse_b4, offset_b4 = mapping_scatter(show_ahi_sr_b4, show_misr_sr_b4, figure_title, 'band4')
                    month_sr_b4[month_idx] = sr_b4[:]
                    month_slope_b4[month_idx] = round(slope_b4, 2)
                    month_r_b4[month_idx] = round(r_b4, 2)
                    month_rmse_b4[month_idx] = round(rmse_b4, 3)
                    month_offset_b4[month_idx] = round(offset_b4, 2)
                
#                 else:
#                     # all pairs mapping
#                     pairs_no = len(misr_SR_band3_item_list)
#                     if pairs_no > 3:

#                         misr_SR_band3_pts = numpy.array(misr_SR_band3_item_list)
#                         ahi_SR_band3_pts = numpy.array(ahi_SR_band3_item_list)
#                         figure_title = folder_l1 + '_b3' + '_' + str(month_idx) + month + '_' + str(pairs_no)
#                         sr_b3, slope_b3, r_b3, rmse_b3, offset_b3 = mapping_scatter(ahi_SR_band3_pts, misr_SR_band3_pts, figure_title, 'band3')
#                         month_sr_b3[month_idx] = sr_b3
#                         month_slope_b3[month_idx] = round(slope_b3, 2)
#                         month_r_b3[month_idx] = round(r_b3, 2)
#                         month_rmse_b3[month_idx] = round(rmse_b3, 3)
#                         month_offset_b3[month_idx] = round(offset_b3, 2)

#                         misr_SR_band4_pts = numpy.array(misr_SR_band4_item_list)
#                         ahi_SR_band4_pts = numpy.array(ahi_SR_band4_item_list)
#                         figure_title = folder_l1 + '_b4' + '_' + str(month_idx) + month + '_' + str(pairs_no)
#                         sr_b4, slope_b4, r_b4, rmse_b4, offset_b4 = mapping_scatter(ahi_SR_band4_pts, misr_SR_band4_pts, figure_title, 'band4')
#                         month_sr_b4[month_idx] = sr_b4
#                         month_slope_b4[month_idx] = round(slope_b4, 2)
#                         month_r_b4[month_idx] = round(r_b4, 2)
#                         month_rmse_b4[month_idx] = round(rmse_b4, 3)
#                         month_offset_b4[month_idx] = round(offset_b4, 2)
            print('SR, Slope, r, RMSE, Offset')
            print('Band3')

            print(month_sr_b3)
            print(month_slope_b3)
            print(month_r_b3)
            print(month_rmse_b3)
            print(month_offset_b3)

            print('Band4')
            print(month_sr_b4)
            print(month_slope_b4)
            print(month_r_b4)
            print(month_rmse_b4)
            print(month_offset_b4)

            sr_list = [month_sr_b3, month_sr_b4]
            slope_list = [month_slope_b3, month_slope_b4]
            r_list = [month_r_b3, month_r_b4]
            rmse_list = [month_rmse_b3, month_rmse_b4]
            offest_list = [month_offset_b3, month_offset_b4]

            numpy.save(os.path.join(WORK_SPACE, folder_l1 + '_' + folder_type + '_sr_slope_r_rmse_offset.npy'), [{
                'sr_list':sr_list, 
                'slope_list':slope_list, 
                'r_list':r_list, 
                'rmse_list':rmse_list, 
                'offest_list':offest_list}])

Ray
SR, Slope, r, RMSE, Offset
Band3
[[0.16742079 0.19311726 0.19276851 ... 0.25729027 0.11301721 0.09683883]
 [0.20637213 0.23982844 0.14619615 ... 0.21881083 0.24185583 0.16621141]
 [0.19199313 0.25260408 0.19572297 ... 0.16252979 0.21715015 0.18330849]
 ...
 [0.26703469 0.20402001 0.1862752  ... 0.11933884 0.22681851 0.16995847]
 [0.20382627 0.26494149 0.24114798 ... 0.17511247 0.20931015 0.20043281]
 [0.25845542 0.17101827 0.17808709 ... 0.22525861 0.18923691 0.1767515 ]]
[0.99 1.02 0.96 0.96 0.99 0.96 0.97 0.95 0.93 0.95 0.99 0.99]
[0.94 0.97 0.97 0.97 0.98 0.97 0.98 0.97 0.96 0.94 0.95 0.95]
[0.016 0.012 0.01  0.01  0.009 0.008 0.007 0.008 0.01  0.014 0.012 0.012]
[0.02 0.01 0.03 0.02 0.01 0.01 0.01 0.02 0.02 0.03 0.02 0.02]
Band4
[[0.25276059 0.25432093 0.2466465  ... 0.27362006 0.27313506 0.31354629]
 [0.35950828 0.29532943 0.28627037 ... 0.34908151 0.2830928  0.33184891]
 [0.27489411 0.25618978 0.27916919 ... 0.27985078 0.34162223 0.30044138]
 ...
 [0.23649092 0.33211739 0.304