In [1]:
import xarray as xr
import xradar as xd
import glob
import pyart
import pandas as pd

import matplotlib.pyplot as plt
import cartopy.crs as ccrs

import gc
import csv
from datetime import datetime
import os
import re

# Parameters
lat = 38.956158 
lon = -106.987854
level = 1
data_dir = '/gpfs/wolf2/arm/atm124/world-shared/gucxprecipradarcmacS2.c1/ppi'
output_csv = f'radar_data_lev{level}.csv'

def open_csv(output_csv, keys_to_extract):
    with open(output_csv, mode='w', newline='') as file:
        writer = csv.DictWriter(file, fieldnames=['time'] + keys_to_extract)
        writer.writeheader()

def process_single_file(file, loc, keys_to_extract):
    radar = pyart.io.read_cfradial(file)
    radar.time['units'] = 'seconds since 1970-01-01'
    radar.time['data'] = radar.time['data'] / 1000

    guc_df = pyart.util.columnsect.get_field_location(radar, loc[0], loc[1])

    radar_time = int(radar.time['data'][0])
    radar_time_str = datetime.utcfromtimestamp(radar_time).strftime('%Y-%m-%d %H:%M:%S')

    row_data = {'time': radar_time_str}
    for key in keys_to_extract:
        row_data[key] = guc_df[key].values[level] if key in guc_df else None
    
    del radar
    del guc_df

    gc.collect()
    
    return row_data

def append_to_csv(output_csv, row_data):
    with open(output_csv, mode='a', newline='') as file:
        writer = csv.DictWriter(file, fieldnames=row_data.keys())
        writer.writerow(row_data)

def get_last_timestamp(output_csv):
    try:
        df = pd.read_csv(output_csv)
        if not df.empty:
            last_timestamp = df['time'].max()
            return datetime.strptime(last_timestamp, '%Y-%m-%d %H:%M:%S')
    except FileNotFoundError:
        pass
    return None

def filter_files_by_timestamp(files, last_timestamp):
    filtered_files = []
    for file in files:
        match = re.search(r"(\d{8}\.\d{6})", file)
        if match:
            file_time_str = match.group(1).replace('.', '')
            file_time = datetime.strptime(file_time_str, '%Y%m%d%H%M%S')
            if last_timestamp is None or file_time > last_timestamp:
                filtered_files.append(file)
    return filtered_files

def process_radar_files(files, loc, output_csv):
    keys_to_extract = ['DBZ', 'VEL', 'WIDTH', 'ZDR', 'PHIDP', 'RHOHV', 'NCP', 'DBZhv', 
                       'cbb_flag', 'sounding_temperature', 'height', 'signal_to_noise_ratio', 
                       'velocity_texture', 'gate_id', 'simulated_velocity', 'corrected_velocity', 
                       'unfolded_differential_phase', 'corrected_differential_phase', 
                       'filtered_corrected_differential_phase', 'corrected_specific_diff_phase', 
                       'filtered_corrected_specific_diff_phase', 'corrected_differential_reflectivity', 
                       'corrected_reflectivity', 'height_over_iso0', 'specific_attenuation', 
                       'path_integrated_attenuation', 'specific_differential_attenuation', 
                       'path_integrated_differential_attenuation', 'rain_rate_A', 'snow_rate_ws2012', 
                       'snow_rate_ws88diw', 'snow_rate_m2009_1', 'snow_rate_m2009_2']
    
    if not os.path.exists(output_csv):
        open_csv(output_csv, keys_to_extract)
        print("csv created")

    last_timestamp = get_last_timestamp(output_csv)
    files = filter_files_by_timestamp(files, last_timestamp)

    for i, file in enumerate(files):
        try:
            print(f"Processing file {i + 1}/{len(files)}: {file}")
            row_data = process_single_file(file, loc, keys_to_extract)
            append_to_csv(output_csv, row_data)
        except Exception as e:
            print(f"Error processing file {file}: {e}")

if __name__ == "__main__":
    year = '2022'
    month = '08'
    files = glob.glob(f'{data_dir}/{year}{month}/gucxprecipradarcmacS2.c1.{year}{month}*')
    print(files[0])
    files.sort()
    process_radar_files(files, (lat, lon), output_csv)



## You are using the Python ARM Radar Toolkit (Py-ART), an open source
## library for working with weather radar data. Py-ART is partly
## supported by the U.S. Department of Energy as part of the Atmospheric
## Radiation Measurement (ARM) Climate Research Facility, an Office of
## Science user facility.
##
## If you use this software to prepare a publication, please cite:
##
##     JJ Helmus and SM Collis, JORS 2016, doi: 10.5334/jors.119



ERROR 1: PROJ: proj_create_from_database: Open of /ccsopen/home/braut/analysis-env2/share/proj failed
  from .autonotebook import tqdm as notebook_tqdm


/gpfs/wolf2/arm/atm124/world-shared/gucxprecipradarcmacS2.c1/ppi/202208/gucxprecipradarcmacS2.c1.20220818.073342.nc
csv created
Processing file 1/7408: /gpfs/wolf2/arm/atm124/world-shared/gucxprecipradarcmacS2.c1/ppi/202208/gucxprecipradarcmacS2.c1.20220801.001324.nc


cannot be safely cast to variable data type
  data = self.ncvar[:]
cannot be safely cast to variable data type
  data = self.ncvar[:]


Processing file 2/7408: /gpfs/wolf2/arm/atm124/world-shared/gucxprecipradarcmacS2.c1/ppi/202208/gucxprecipradarcmacS2.c1.20220801.003123.nc
Processing file 3/7408: /gpfs/wolf2/arm/atm124/world-shared/gucxprecipradarcmacS2.c1/ppi/202208/gucxprecipradarcmacS2.c1.20220801.003723.nc
Processing file 4/7408: /gpfs/wolf2/arm/atm124/world-shared/gucxprecipradarcmacS2.c1/ppi/202208/gucxprecipradarcmacS2.c1.20220801.004323.nc
Processing file 5/7408: /gpfs/wolf2/arm/atm124/world-shared/gucxprecipradarcmacS2.c1/ppi/202208/gucxprecipradarcmacS2.c1.20220801.004923.nc
Processing file 6/7408: /gpfs/wolf2/arm/atm124/world-shared/gucxprecipradarcmacS2.c1/ppi/202208/gucxprecipradarcmacS2.c1.20220801.005523.nc


In [2]:
len(files)

7408