In [None]:
import requests
from bs4 import BeautifulSoup
import netCDF4
import pandas as pd
import os
import glob


## This cell navigates to the CUDEM data catalog for the 1/9 arcsecond data and extracts the names of all downloadable files. It then iterates on each name, adding it to a base url and downloading it to the input directory  with wget

In [None]:
#Download CUDEM Files


os.chdir('/home/will/Desktop/USGS/CUDEM/Nc')
#Extract link to file names from 1/9 arc second resolution server


def get_url_paths(url, ext='', params={}):
    response = requests.get(url, params=params)
    if response.ok:
        response_text = response.text
    else:
        return response.raise_for_status()
    soup = BeautifulSoup(response_text, 'html.parser')
    parent = [url + node.get('href') for node in soup.find_all('a') if node.get('href').endswith(ext)]
    return parent

url = 'https://www.ngdc.noaa.gov/thredds/catalog/tiles/tiled_19as/catalog.html'
ext = 'nc'
result = get_url_paths(url, ext)

#Make links into df
link_df = pd.DataFrame(columns = ['link'])
link_df['link']=result

#split link field by '/' delimiter to get file name

link_df['filename']=link_df['link'].str.split('/', expand=True)[9]

#download iterating through each file name

base_url='https://www.ngdc.noaa.gov/thredds/fileServer/tiles/tiled_19as/' #base url for download
i=0
for i in range(0,len(link_df)):
    file_name=link_df['filename'][i]
    dwnld_link=base_url+file_name
    !wget {dwnld_link}
    i=i+1
    



## This cell converts all of the downloaded cudem nc's into .csv files with fields for latitude, longitude, depth, and crs

In [None]:
#Convert downloaded nc files to csv
csv_folder='/home/will/Desktop/USGS/CUDEM/'#enter path for csv folder

for filename in os.listdir(os.getcwd()):
    if filename.endswith(".nc"):
        nc = netCDF4.Dataset(os.path.join(os.getcwd(), filename), mode='r')
        file_name_no_ext=os. path. splitext(filename)[0]
        out_name=csv_folder+file_name_no_ext+'.csv'
        df = pd.DataFrame(columns = ['latitude','longitude','depth'])
        df['latitude']=nc.variables['lat'][:]
        df['longitude']= nc.variables['lon'][:]
        df['depth']=nc.variables['Band1'][:]
        df['crs']=nc.variables['crs'][:]
        df.to_csv(out_name)
    else:
        continue

## This cell converts combines all of the  csv's into one dataframe

In [None]:
#merge csv's into one df


os.chdir(csv_folder)

#create list of files in folder
extension = 'csv'
all_filenames = [i for i in glob.glob('*.{}'.format(extension))]

#combine all files in the list
combined_csv = pd.concat([pd.read_csv(f) for f in all_filenames ])

cudem_df=combined_csv 

#round lat lon to 4th decimal place
cudem_df['latitude']=cudem_df['latitude'].round(decimals=3)
cudem_df['longitude']=cudem_df['longitude'].round(decimals=3)

#create lat lon combined column
cudem_df['latlon']= cudem_df['latitude'].map(str)+','+cudem_df['longitude'].map(str)

## This cell calls your sample data and converts it to a dataframe

In [None]:
#call in sample dataset

# from https://stackoverflow.com/questions/38511444/python-download-files-from-google-drive-using-url

def download_file_from_google_drive(id, destination):
    URL = "https://docs.google.com/uc?export=download"

    session = requests.Session()

    response = session.get(URL, params = { 'id' : id }, stream = True)
    token = get_confirm_token(response)

    if token:
        params = { 'id' : id, 'confirm' : token }
        response = session.get(URL, params = params, stream = True)

    save_response_content(response, destination)    

def get_confirm_token(response):
    for key, value in response.cookies.items():
        if key.startswith('download_warning'):
            return value

    return None

def save_response_content(response, destination):
    """
    response = filename for input
    destination = filename for output
    """    
    CHUNK_SIZE = 32768

    with open(destination, "wb") as f:
        for chunk in response.iter_content(CHUNK_SIZE):
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)


DATASET_ID = '1G9fuC_TjtwTr3JWA85gW7228_Ffxsw0G'


destination = csv_folder+'sample_data.csv'
download_file_from_google_drive(DATASET_ID, destination)
sample_df= pd.read_csv(destination)

## This cell merges both output dataframe, effectively adding a depth field to each sample that matches a CUDEM lat/lon point

In [None]:
#round sample lat lon to 4th decimal place
sample_df['latitude']=sample_df['latitude'].round(decimals=3)
sample_df['longitude']=sample_df['longitude'].round(decimals=3)


#create lat lon combined column
sample_df['latlon']= sample_df['latitude'].map(str)+','+sample_df['longitude'].map(str)

#merge dataframes on latlon column

merged_df=pd.merge(sample_df,cudem_df)