In [1]:
'''
Creates training data for F17 channel predictor
'''


import numpy as np
import xarray as xr
import glob
from tqdm import tqdm
from util_funcs.L1C import scantime2datetime
from util_funcs import data2xarray, array_funcs
import geography

import sensor_info
import local_functions

satellite = sensor_info.satellite
sensor = sensor_info.sensor

datapath = f'/pdata4/archive/GPM/1Base_{sensor}_V7'

In [2]:
'''
Satellite: AMSR2

Get data and quality check for training.

nscans: generally around 3900
npixs: 243 and 486

    
Tb array will be set up as follows:
    Tbs =  [m x n], where m is the number of samples and n is the
           number of channels (features)

'''

#Set up a random seed for reproducibility
np.random.seed(12)

#Get a random list of files
file_list = glob.glob(f'{datapath}/*/*/1Base.{satellite}.{sensor}.*.HDF5'); file_list.sort()

#Everything looks good before about 04/2016:
good_files = [ifile for ifile in file_list]

flist = np.random.choice(good_files, size=100)

#Loop through files and get good quality data.
for i, ifile in enumerate(tqdm(flist, desc="Processing Files")):

    #print(f'{i+1} of {len(flist)}, {ifile}')

    data = local_functions.read_amsr2_l1b(ifile)

    #Level 1B doesn't screen for sunglint:
    with xr.open_dataset(ifile, group='S1', decode_timedelta=False) as f:
        glintang = f.Sun_Glint_Angle.values

    lat = data['lat']
    lon = data['lon']
    scantime = data['scantime']
    Tbs = data['Tbs']
    qual = data['qual']

    #Get only good quality data and reshape:
    goodqual = np.all(qual == 0, axis=2)
    all_bad = np.all(goodqual == False)
    if all_bad:
        print('all were bad.')
        continue
    lat = lat[goodqual]
    lon = lon[goodqual]
    scantime = scantime[np.where(goodqual)[0]]
    Tbs = Tbs[goodqual]
    glintang = glintang[goodqual]

    
    #Check for NaNs (shouldn't be any if all good, but I've seen some)
    nonans = local_functions.find_nan_rows(Tbs, return_good=True)
    Tbs = Tbs[nonans]
    lat = lat[nonans]
    lon = lon[nonans]
    scantime = scantime[nonans]
    glintang = glintang[nonans]

    #Final check for sunglint:
    maybeglint = np.logical_and(glintang > 0., glintang < 10.).reshape(-1)
    noglint = ~maybeglint
    Tbs = Tbs[noglint]
    lat = lat[noglint]
    lon = lon[noglint]
    scantime = scantime[noglint]

    #Attach GPROF surface map data to each pixel
    sfctype = local_functions.attach_gpm_sfctype(lat, lon, scantime, sensor=sensor)

    npixs = lat.size

    #Output as NetCDF
    dset = data2xarray(data_vars = [lat, lon, scantime, sfctype, Tbs],
                       var_names = ['latitude', 'longitude', 'scantime', 'sfctype', 'Tbs'],
                       dims = [npixs, sensor_info.nfeatures],
                       dim_names = ['pixels', 'channels'])


    if i == 0:
        training_dataset = dset
    else:
        training_dataset = xr.concat((training_dataset, dset), dim='pixels')

training_dataset.to_netcdf(f'training_data/{satellite}_training_data.nc', engine='netcdf4')



  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
Processing Files:  24%|████████████████████████▍        

all were bad.


  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::2]
  qual[:,:,0] = f.Pixel_Data_Quality_6_to_36.values[:,::

In [3]:
with xr.open_dataset(f'training_data/{satellite}_training_data.nc') as f:
    sfctype = f.sfctype.values

for i in range(19):
    print(f'Number of pixels with surface type {i}: {np.sum(sfctype == i)}')

print(f'Total number of ocean pixels: {np.sum(sfctype == 1)}')
print(f'Total number of nonocean pixels: {np.sum(sfctype > 1)}')

Number of pixels with surface type 0: 0
Number of pixels with surface type 1: 49181233
Number of pixels with surface type 2: 9588561
Number of pixels with surface type 3: 2815932
Number of pixels with surface type 4: 1478108
Number of pixels with surface type 5: 1289208
Number of pixels with surface type 6: 807957
Number of pixels with surface type 7: 1922185
Number of pixels with surface type 8: 126717
Number of pixels with surface type 9: 252573
Number of pixels with surface type 10: 1727101
Number of pixels with surface type 11: 8812431
Number of pixels with surface type 12: 1133003
Number of pixels with surface type 13: 2227065
Number of pixels with surface type 14: 1823080
Number of pixels with surface type 15: 1500855
Number of pixels with surface type 16: 502277
Number of pixels with surface type 17: 2985376
Number of pixels with surface type 18: 1240411
Total number of ocean pixels: 49181233
Total number of nonocean pixels: 40232840
