In [1]:
import pydap.client
import numpy as np
import pandas as pd
import datetime
from tqdm.notebook import tqdm
End_Year = 2019
Start_Year = 2000

In [2]:
url_dict = {}
url_dict['Precip']='http://thredds.northwestknowledge.net:8080/thredds/dodsC/MET/pr/pr_'
url_dict['Rhumiditymax']='http://thredds.northwestknowledge.net:8080/thredds/dodsC/MET/rmax/rmax_'
url_dict['Rhumiditymin']='http://thredds.northwestknowledge.net:8080/thredds/dodsC/MET/rmin/rmin_'
url_dict['Shumidit']='http://thredds.northwestknowledge.net:8080/thredds/dodsC/MET/sph/sph_'
url_dict['Sradiation']='http://thredds.northwestknowledge.net:8080/thredds/dodsC/MET/srad/srad_'
url_dict['WindFromDir']='http://thredds.northwestknowledge.net:8080/thredds/dodsC/MET/th/th_'
url_dict['WindVelocity']='http://thredds.northwestknowledge.net:8080/thredds/dodsC/MET/vs/vs_'
url_dict['Tempmax']='http://thredds.northwestknowledge.net:8080/thredds/dodsC/MET/tmmx/tmmx_'
url_dict['Tempmin']='http://thredds.northwestknowledge.net:8080/thredds/dodsC/MET/tmmn/tmmn_'
url_dict['BurningIndex']='http://thredds.northwestknowledge.net:8080/thredds/dodsC/MET/bi/bi_'
url_dict['FuelMoist100']='http://thredds.northwestknowledge.net:8080/thredds/dodsC/MET/fm100/fm100_'
url_dict['FuelMoist1000']='http://thredds.northwestknowledge.net:8080/thredds/dodsC/MET/fm1000/fm1000_'
url_dict['EnergyRelease']='http://thredds.northwestknowledge.net:8080/thredds/dodsC/MET/erc/erc_'
# url_dict['DroughtSeverity']='http://thredds.northwestknowledge.net:8080/thredds/dodsC/MET/pdsi/pdsi_'
url_dict['EvapoTranspiration']='http://thredds.northwestknowledge.net:8080/thredds/dodsC/MET/etr/etr_'
url_dict['EvapoTranspir_G']='http://thredds.northwestknowledge.net:8080/thredds/dodsC/MET/pet/pet_'
url_dict['VaporPressDeficit']='http://thredds.northwestknowledge.net:8080/thredds/dodsC/MET/vpd/vpd_'

In [3]:
base_url_part = 'http://thredds.northwestknowledge.net:8080/thredds/dodsC/MET/pr/pr_'
url = base_url_part + str(End_Year) + '.nc'
dataset = pydap.client.open_url(url)

In [4]:
#Slice the Bounding Box we wanted here.
lon_ = dataset['lon']
lon = lon_[:].data
lat_ = dataset['lat']
lat = lat_[:].data

import numpy as np
SD_county_lats = np.where((lat<33.6) & (lat>32.4))[0]
SD_county_lons = np.where((lon<-116.0) & (lon>-118))[0]

lat.shape, lon.shape

((585,), (1386,))

In [5]:
lat_begin, lat_end = SD_county_lats[0], SD_county_lats[-1]
lon_begin, lon_end = SD_county_lons[0], SD_county_lons[-1]
lat_begin, lat_end, lon_begin, lon_end 

(380, 408, 163, 210)

In [None]:
flag=0
final_df = pd.DataFrame()
for Year in tqdm(range(End_Year,Start_Year-1,-1)):
    print("Downloading data for year {}".format(Year))
    url = base_url_part + str(Year) + '.nc'
    dataset = pydap.client.open_url(url)
    #Create the Base DataFrame with Days and Co-ordinates
    days = pd.to_timedelta(dataset['day'].data[:], unit='days')
    days = days + pd.to_datetime("19000101", format="%Y%m%d")
    index = pd.MultiIndex.from_product([days.values,
                                        lat[SD_county_lats].astype('f8'),
                                        lon[SD_county_lons].astype('f8')],
                                       names=['date', 'latitude', 'longitude'])
    base_df = pd.DataFrame(index=index) 
    ##Load each Attriubute below
    for item in tqdm(url_dict.values()):
        print ("Processing for", item)
        url = item + str(Year) + '.nc'
#         print (url)
        dataset = pydap.client.open_url(url)
        for column in dataset.keys():
            if column in ('lat', 'lon', 'crs', 'day'):
                continue
            #Here is where the Data Download happens
            attrs = dataset[column].attributes
            missing_value, scale, bias = attrs['missing_value'], attrs['scale_factor'], attrs['add_offset']
            print("'{}': missing_values: {}, scale: {}, bias: {}".format(column, missing_value, scale, bias))
            raw_data = dataset[column][:, lat_begin:lat_end, lon_begin:lon_end]
            base_date = pd.to_datetime("19000101", format="%Y%m%d")
            values, days, lats, lons = raw_data.data
            print("Data downloaded")
            days = base_date + pd.to_timedelta(days, unit="days")
            col_name = column + "_" + dataset[column].units
            if item == 'Tempmax':
                col_name = "max_" + col_name
            elif item == 'Tempmin':
                col_name = "min_" + col_name
            values = values.astype('d')
            values[values == missing_value] = np.nan
            values = values * scale + bias
            base_df.loc[(days, lats, lons), col_name] = values.ravel()
    display(base_df.head(3))
    final_df = pd.concat([final_df, base_df])

In [None]:
final_df.describe()

In [None]:
final_df.info()

In [None]:
%%time
final_df.to_parquet("gridMet.parquet.gz", compression='gzip')