In [1]:
'''
Creates training data for F17 channel predictor
'''


import numpy as np
import xarray as xr
import glob
from tqdm import tqdm
from util_funcs.L1C import scantime2datetime
from util_funcs import data2xarray, array_funcs
import geography

import sensor_info
import local_functions

satellite = sensor_info.satellite
sensor = sensor_info.sensor

datapath = f'/edata1/archive/GPM/1C_{satellite}_V7/'

In [4]:
'''
Satellite: F13

Get data and quality check for training.

nscans: generally around 3220
npixs: 90

    
Tb array will be set up as follows:
    Tbs =  [m x n], where m is the number of samples and n is the
           number of channels (features)
    1-2:   19V and H
    3:     22V
    4-5:   37V and H
    6-9:   92V and H (high-frequency channels are double sampled)
    10-11: 150H
    12-13: 183+-1 H
    14-15: 183+-3 H
    16-17: 183+-7 H
'''

#Set up a random seed for reproducibility
np.random.seed(12)

#Get a random list of files
file_list = glob.glob(f'{datapath}*/*/1C.{satellite}.{sensor}.*.HDF5'); file_list.sort()

#Everything looks good before about 04/2016:
good_files = [ifile for ifile in file_list if int(ifile.split('/')[5]) <= 1604]

flist = np.random.choice(good_files, size=200)

raise

#Loop through files and get good quality data.
for i, ifile in enumerate(tqdm(flist, desc="Processing Files")):

    #print(f'{i+1} of {len(flist)}, {ifile}')

    data = local_functions.read_ssmis_l1c(ifile)

    lat = data['lat']
    lon = data['lon']
    scantime = data['scantime']
    Tbs = data['Tbs']
    qual = data['qual']

    #Get only good quality data and reshape:
    goodqual = np.all(qual == 0, axis=2)
    all_bad = np.all(goodqual == False)
    if all_bad:
        print('all were bad.')
        continue
    lat = lat[goodqual]
    lon = lon[goodqual]
    scantime = scantime[np.where(goodqual)[0]]
    Tbs = Tbs[goodqual]

    #Check for NaNs (shouldn't be any if all good, but I've seen some)
    nonans = local_functions.find_nan_rows(Tbs, return_good=True)
    Tbs = Tbs[nonans]
    lat = lat[nonans]
    lon = lon[nonans]
    scantime = scantime[nonans]
    

    #Attach GPROF surface map data to each pixel
    sfctype = local_functions.attach_gpm_sfctype(lat, lon, scantime, sensor=sensor)

    npixs = lat.size

    #Output as NetCDF
    dset = data2xarray(data_vars = [lat, lon, scantime, sfctype, Tbs],
                       var_names = ['latitude', 'longitude', 'scantime', 'sfctype', 'Tbs'],
                       dims = [npixs, sensor_info.nfeatures],
                       dim_names = ['pixels', 'channels'])


    if i == 0:
        training_dataset = dset
    else:
        training_dataset = xr.concat((training_dataset, dset), dim='pixels')

training_dataset.to_netcdf(f'training_data/{satellite}_training_data.nc', engine='netcdf4')



RuntimeError: No active exception to reraise

In [2]:
with xr.open_dataset(f'training_data/{satellite}_training_data.nc') as f:
    sfctype = f.sfctype.values

for i in range(19):
    print(f'Number of pixels with surface type {i}: {np.sum(sfctype == i)}')

print(f'Total number of ocean pixels: {np.sum(sfctype == 1)}')
print(f'Total number of nonocean pixels: {np.sum(sfctype > 1)}')

Number of pixels with surface type 0: 0
Number of pixels with surface type 1: 28123167
Number of pixels with surface type 2: 6121504
Number of pixels with surface type 3: 1823154
Number of pixels with surface type 4: 1096168
Number of pixels with surface type 5: 959364
Number of pixels with surface type 6: 496573
Number of pixels with surface type 7: 890662
Number of pixels with surface type 8: 242927
Number of pixels with surface type 9: 313284
Number of pixels with surface type 10: 1572325
Number of pixels with surface type 11: 5467484
Number of pixels with surface type 12: 817676
Number of pixels with surface type 13: 1262277
Number of pixels with surface type 14: 1407041
Number of pixels with surface type 15: 944236
Number of pixels with surface type 16: 283542
Number of pixels with surface type 17: 2053833
Number of pixels with surface type 18: 1119462
Total number of ocean pixels: 28123167
Total number of nonocean pixels: 26871512


In [6]:
flist

array(['/edata1/archive/GPM/1C_F17_V7/0905/090503/1C.F17.SSMIS.XCAL2021-V.20090503-S025528-E043723.012863.V07A.HDF5',
       '/edata1/archive/GPM/1C_F17_V7/0811/081109/1C.F17.SSMIS.XCAL2021-V.20081109-S194618-E212813.010401.V07A.HDF5',
       '/edata1/archive/GPM/1C_F17_V7/0908/090825/1C.F17.SSMIS.XCAL2021-V.20090825-S225013-E003209.014485.V07A.HDF5',
       '/edata1/archive/GPM/1C_F17_V7/0812/081207/1C.F17.SSMIS.XCAL2021-V.20081207-S084311-E102507.010790.V07A.HDF5',
       '/edata1/archive/GPM/1C_F17_V7/0804/080408/1C.F17.SSMIS.XCAL2021-V.20080408-S023555-E041751.007354.V07A.HDF5',
       '/edata1/archive/GPM/1C_F17_V7/0910/091023/1C.F17.SSMIS.XCAL2021-V.20091023-S202830-E221025.015317.V07A.HDF5',
       '/edata1/archive/GPM/1C_F17_V7/0812/081208/1C.F17.SSMIS.XCAL2021-V.20081208-S032435-E050631.010801.V07A.HDF5',
       '/edata1/archive/GPM/1C_F17_V7/0902/090205/1C.F17.SSMIS.XCAL2021-V.20090205-S212635-E230830.011645.V07A.HDF5',
       '/edata1/archive/GPM/1C_F17_V7/0911/091118/1C.F17