![USGS Logo](https://logowik.com/content/uploads/images/usgs2667.logowik.com.webp)
# Notebook 2 of 3: Subset Precipitation Data
### Set up software environment and file locations

In [22]:
import os
import glob
#os.chdir('..')
os.chdir('USGSdata')

In [23]:
path_to_save_directory = os.getcwd()
path_to_save_directory

'/home/jovyan/USGSdata'

In [24]:
import xarray as xr 
import numpy as np 
import pandas as pd 

In [26]:
#grab precipitation (pr) characters of the file name:
prfiles=glob.glob('pr_*')
#grab last 4 characters of the file name:
def last_7chars(x):
    return(x[-7:])

files=sorted(prfiles, key = last_7chars)  

In [27]:
# Extract numbers from position 4 to 8
# for first year in file name list

start_numbers_str = files[0][3:7] # Positions are 0-indexed
start_parsed_number = int(start_numbers_str)
print("Start Year:",start_parsed_number)

# for last year in file name list
end_numbers_str = files[len(files)-1][3:7] # Positions are 0-indexed
end_parsed_number = int(end_numbers_str)
print("End Year:",end_parsed_number) 

Start Year: 1979
End Year: 2024


### Set Subset Space and Time Variables

In [28]:
lats, lons = [30, 31], [-88, -87] 
start = start_parsed_number
#end = end_parsed_number   # Option 1: Use last file in catalogue
end = 2021  # Option 2: Set last year to end 2022

In [29]:
year = start
file_in = 'pr_%s.nc' % year
print('Start: Input netcdf data to subset:', file_in)
file_out = 'basin_prcp_%s.csv' % year
print('Output daily timeseries data:', file_out)

Start: Input netcdf data to subset: pr_1979.nc
Output daily timeseries data: basin_prcp_1979.csv


In [30]:
year = end
file_in = 'pr_%s.nc' % year
print('End: Input netcdf data to subset:', file_in)
file_out = 'basin_prcp_%s.csv' % year
print('Output daily timeseries data:', file_out)

End: Input netcdf data to subset: pr_2021.nc
Output daily timeseries data: basin_prcp_2021.csv


### Explore Netcdf Dataset using first data file in list

In [31]:
nc = xr.open_dataset(files[0])

In [32]:
nc

## MAIN: Subset all files [Start year, End year]

In [33]:
for year in range(start, end + 1):
    file = 'pr_%s.nc' % year
    file_out = 'basin_prcp_%s.csv' % year 

    nc = xr.open_dataset(path_to_save_directory + '/' + file) 

    data = nc['precipitation_amount']

    data = data.loc[dict(lon = slice(lons[0], lons[1]))]
    data = data.loc[dict(lat = slice(lats[1], lats[0]))]

    mean_precip = np.mean(data, axis = (1,2))

    out = pd.DataFrame({'day': mean_precip['day'].values, 'prcp': mean_precip.values})
    # np.savetxt(file_out, out, delimiter=',')
    out.to_csv(file_out, index = False) 

## Check software requirements used in this Notebook
### Not necessary if running in Binder. Requirements.txt is already set up.

In [16]:
#!pip freeze | findstr numpy
#!pip freeze | findstr pandas
#!pip freeze | findstr xarray


## Land Acknowledgement
The U.S. Geological Survey (USGS) recognizes the importance of Native knowledge and living in harmony with nature. 

This work was conducted on the land of the Coast Salish peoples, the land which touches the shared waters of all tribes and bands within the Suquamish, Tulalip and Muckleshoot nations.


## Go to Notebook 3 of 3: Combine_USGS_precip.ipynb