# Adding Soil Moisture Information (SCAN) to Meta-Data

This notebook performs ETL to add list of SCAN sites to the USGS gage meta-data JSON file.

In [None]:
from google.colab import auth
from datetime import datetime
import math
import pandas as pd
auth.authenticate_user()
!gsutil -m cp -r gs://flow_hydro_2_data/meta_data/sentinel_11_03 .

In [None]:
scan_df = pd.read_csv("/content/scan1.csv")

In [None]:
scan_df

Unnamed: 0,ntwk,state,site_name,ts,start,lat,lon,elev,county,huc
0,SCAN,AK,Checkers Creek (2213),,2014-September,65.40,-164.71,326,Nome,Quartz Creek (190501050503)
1,SCAN,AK,Kanuti Lake (2212),,2014-August,66.18,-151.74,524,Yukon-koyukuk,Old Dummy Lake-Kanuti River (190901040707)
2,SCAN,AK,Moose Inc (2062),,2002-May,59.68,-151.39,120,Kenai Peninsula,Swift Creek-Frontal Kachemak Bay (190203010806)
3,SCAN,AK,Schor Garden (2063),,2002-May,59.69,-151.39,340,Kenai Peninsula,Swift Creek-Frontal Kachemak Bay (190203010806)
4,SCAN,AL,AAMU-JTG (2057),,2002-February,34.78,-86.55,860,Madison,Acuff Spring-Flint River (060300020403)
...,...,...,...,...,...,...,...,...,...,...
207,SCAN,WA,Cook Farm Field D (2198),,2013-July,46.78,-117.08,2727,Whitman,Missouri Flat Creek (170601080203)
208,SCAN,WA,Lind #1 (2021),,1993-September,47.00,-118.57,1640,Adams,Town of Lind-Lind Coulee (170200150807)
209,SCAN,WI,UW Platteville (2196),,2013-September,42.71,-90.39,1075,Lafayette,Pats Creek-Galena River (070600050302)
210,SCAN,WI,Wabeno #1 (2003),,1993-October,45.47,-88.58,1580,Forest,Otter Creek (040301050401)


In [None]:
scan_df["scan_id"] = scan_df["site_name"].apply(lambda x: x[x.find("(")+1:x.find(")")])

In [None]:
scan_df = scan_df.set_index('scan_id')

In [None]:
import json
import requests
import pandas as pd

def add_scan_station_meta(meta_data_path, scan_df, threshold = 200):
  """
  meta_data_path:
  threshold: is max distance to associate SNOTEL site in kilometers
  """
  with open(meta_data_path) as f:
    meta_data_file = json.load(f)
  lat = meta_data_file["latitude"]
  long1 = meta_data_file["logitude"]
  scan_df["distance"] = scan_df.apply(lambda x: haversine(long1, lat, x.lon, x.lat), axis=1)
  scan_df = scan_df.sort_values(by="distance")
  meta_data_file["closest_scan_id"] = json.loads(scan_df.head(3).to_json())["distance"]
  return meta_data_file, scan_df

  """print(dis)
  if dis > 330:
    print("No close SNOTEL site distance is: " + str(dis))
  else:
    meta_data_file["snotel"] = sno_response_json[0]["station_information"]
  return meta_data_file"""

In [None]:
from math import radians, cos, sin, asin, sqrt

def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance in kilometers between two points
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a))
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles. Determines return value units.
    return c * r

In [None]:
import json
meta_file, scan = add_scan_station_meta("/content/sentinel_11_03/01010000.json", scan_df)

In [None]:
!mkdir scan_1_12_2024

In [None]:
import os

for meta_file in os.listdir("/content/sentinel_11_03"):
  f, r = add_scan_station_meta(os.path.join("/content/sentinel_11_03", meta_file), scan_df)
  with open(os.path.join("scan_1_12_2024", meta_file), "w") as outfile:
    json.dump(f, outfile)

Copying the output files to GCS.

In [None]:
!gsutil -m cp -r /content/scan_1_12_2024 gs://flow_hydro_2_data/scan

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Copying file:///content/scan_1_12_2024/02481400.json [Content-Type=application/json]...
Copying file:///content/scan_1_12_2024/07124300.json [Content-Type=application/json]...
Copying file:///content/scan_1_12_2024/10255890.json [Content-Type=application/json]...
Copying file:///content/scan_1_12_2024/15129120.json [Content-Type=application/json]...
Copying file:///content/scan_1_12_2024/01585219.json [Content-Type=application/json]...
Copying file:///content/scan_1_12_2024/443409068471801.json [Content-Type=application/json]...
Copying file:///content/scan_1_12_2024/10296500.json [Content-Type=application/json]...
Copying file:///content/scan_1_12_2024/07173300.json [Content-Type=application/json]...
Copying file:///content/scan_1_12_2024/07375650.json [Content-Type=application/json]...
Copying file:///content/scan_1_12_2024/02455000.json [Content-Type=application/json]...
Copying file:///content/scan_1_12_2024/11143010.