# Adding SNOTEL meta-data and moving meta-data to new bucket

We will use these utility to add the SNOTEL station information into the meta-data for USGS sites. Then we will transfer the meta-data file to a new GCP project called `hydro-earthnet-db` into a bucket called `flow_hydro_2_data`.

In [4]:
import json
import requests
from math import radians, sin, cos, asin, sqrt

def haversine(lon1, lat1, lon2, lat2):
  """
  Calculate the great circle distance between two points
  on the earth (specified in decimal degrees). In kilometers
  """
  # convert decimal degrees to radians
  lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

  # haversine formula
  dlon = lon2 - lon1
  dlat = lat2 - lat1
  a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
  c = 2 * asin(sqrt(a))
  r = 6371 # Radius of earth in kilometers. Use 3956 for miles
  return c * r

def add_snotel_station_meta(meta_data_path, threshold = 200):
  """
  meta_data_path:
  threshold: is max distance to associate SNOTEL site in kilometers
  """
  with open(meta_data_path) as f:
    meta_data_file = json.load(f)
  lat = meta_data_file["latitude"]
  long1 = meta_data_file["logitude"]
  base_url = "https://powderlines.kellysoftware.org/api/closest_stations?lat={}&lng={}"
  response = requests.get(base_url.format(lat, long1)) # https://powderlines.kellysoftware.org/api/closest_stations?lat=39.7392&lng=-104.9903&count=3&data=true&days=3
  sno_response_json = json.loads(response.text)
  lat_close_station = sno_response_json[0]["station_information"]["location"]["lat"]
  print(sno_response_json)
  long_close_station = sno_response_json[0]["station_information"]["location"]["lng"]
  dis = haversine(lat, long1, lat_close_station, long_close_station)
  print(dis)
  if dis > 330:
    print("No close SNOTEL site distance is: " + str(dis))
  else:
    meta_data_file["snotel"] = sno_response_json[0]["station_information"]
  return meta_data_file

Lets copy the existing meta data files

In [1]:
from google.colab import auth
from datetime import datetime
auth.authenticate_user()
!gsutil -m cp -r gs://predict_cfs/gage_revised/final_metadata .

Copying gs://predict_cfs/gage_revised/final_metadata/01010000.json...
Copying gs://predict_cfs/gage_revised/final_metadata/01010070.json...
/ [0 files][    0.0 B/  4.1 MiB]                                                / [0 files][    0.0 B/  4.1 MiB]                                                Copying gs://predict_cfs/gage_revised/final_metadata/01010500.json...
/ [0 files][    0.0 B/  4.1 MiB]                                                Copying gs://predict_cfs/gage_revised/final_metadata/01012960.json...
Copying gs://predict_cfs/gage_revised/final_metadata/01013500.json...
Copying gs://predict_cfs/gage_revised/final_metadata/01011000.json...
Copying gs://predict_cfs/gage_revised/final_metadata/01014000.json...
/ [0 files][    0.0 B/  8.5 MiB]                                                / [0 files][    0.0 B/  8.5 MiB]                                                Copying gs://predict_cfs/gage_revised/final_metadata/01015800.json...
Copying gs://predict_cfs/gage_revis

In [None]:
!mkdir result_meta

In [None]:
import os
def upload_file(bucket_name:str, file_name:str, upload_name:str, client):
    print(upload_name)
    bucket = client.get_bucket(bucket_name)
    blob = bucket.blob(file_name)
    blob.upload_from_filename(upload_name)

def add_snotel_data(data_dir1):
  for f_n in os.listdir(data_dir1):
    print(f_n)
    f = os.path.join(data_dir1, f_n)
    # checking if it is a file
    meta_data = add_snotel_station_meta(f)
    with open(os.path.join("result_meta", f_n), "w") as outfile:
      json.dump(meta_data, outfile)
    # upload_file("flow_hydro_2_data", "meta_data", file_name, )

In [None]:
add_snotel_data("final_metadata")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
07103977.json
[{'station_information': {'name': 'Glen Cove', 'triplet': '1057:CO:SNTL', 'elevation': 11391, 'location': {'lat': 38.87602, 'lng': -105.07605}}}, {'station_information': {'name': 'Echo Lake', 'triplet': '936:CO:SNTL', 'elevation': 10694, 'location': {'lat': 39.65539, 'lng': -105.59358}}}, {'station_information': {'name': 'Michigan Creek', 'triplet': '937:CO:SNTL', 'elevation': 10702, 'location': {'lat': 39.43579, 'lng': -105.91072}}}]
40.36127877058243
07349650.json
[{'station_information': {'name': 'Sierra Blanca', 'triplet': '1034:NM:SNTL', 'elevation': 10268, 'location': {'lat': 33.40682, 'lng': -105.79467}}}, {'station_information': {'name': 'Tolby', 'triplet': '934:NM:SNTL', 'elevation': 10220, 'location': {'lat': 36.47498, 'lng': -105.19534}}}, {'station_information': {'name': 'Wesner Springs', 'triplet': '854:NM:SNTL', 'elevation': 11151, 'location': {'lat': 35.77584, 'lng': -105.54337}}}]
1365.980428

In [None]:
!gsutil -m cp  -r result_meta gs://flow_hydro_2_data/meta_data

## Test case/sanity check

In [3]:
!gsutil cp gs://flow_hydro_2_data/meta_data/result_meta/11189500.json .

Copying gs://flow_hydro_2_data/meta_data/result_meta/11189500.json...
/ [1 files][ 45.0 KiB/ 45.0 KiB]                                                
Operation completed over 1 objects/45.0 KiB.                                     


In [6]:
with open("/content/11189500.json") as f:
  kern = json.load(f)

In [8]:
kern["snotel"]

{'name': 'Bristlecone Trail',
 'triplet': '1111:NV:SNTL',
 'elevation': 8890,
 'location': {'lat': 36.31575, 'lng': -115.69543}}

Issue with selecting closest SNOTEL site based on distance for forecasting. Here Bristlcone is the closest to Kern Gage but most of its drainage would come from the Sierra Nevada snowpack further north.