# Modules

In [None]:
import geopandas as gpd
import shapely
import pandas as pd
import numpy as np
from math import pi,cos,radians
import pyproj
from shapely.ops import cascaded_union
from shapely.geometry import Point
import shapely.geometry as shpg
import salem
%matplotlib inline

# Paths

In [None]:
lp_p = 'C:\\Users\\jlandman\\Desktop\\LeBris_Paul\\ElevationChange_GlaciersSupp1km2_LessThan20%DataVoid.shp' # LeBris/Paul shape
foga_p = 'C:\\Users\\jlandman\\Desktop\\DOI-WGMS-FoG-2015-11\\WGMS-FoG-2015-11-A-GENERAL-INFORMATION.csv'

# Read files

In [None]:
lp = gpd.read_file(lp_p)
lp['ID'] = lp['ID'].astype(int)
foga = pd.read_csv(foga_p, encoding='iso-8859-1')

# Make an empty FoG-D DataFrame

In [None]:
fogd_templ = pd.DataFrame([], [['POLITICAL_UNIT', 'GLACIER_NAME', 'WGMS_ID', 'YEAR', 'LOWER_BOUND', 'UPPER_BOUND', 
                                'AREA_SURVEY_YEAR', 'AREA_CHANGE', 'AREA_CHANGE_UNCERTAINTY', 'THICKNESS_CHANGE', 
                                'THICKNESS_CHANGE_UNCERTAINTY', 'VOLUME_CHANGE', 'VOLUME_CHANGE_UNCERTAINTY', 'SURVEY_DATE',
                                'SURVEY_DATE_PLATFORM_METHOD', 'REFERENCE_DATE', 'REFERENCE_DATE_PLATFORM_METHOD',
                                'INVESTIGATOR', 'SPONSORING_AGENCY', 'REFERENCE', 'REMARKS']])

# Determine uncertainty for given FoG coordinate

In [None]:
def coord_unc(x,y):
    """ Determine the uncertainity in meters for a given FoG coordinate at its lat/lon (assuming earth was a sphere)
    
    Parameters
    -----------
    x: longitude in degrees
    y: latitude in degrees
    
    Returns
    --------
    dx, dy: uncertainty in meters
    """
    # assert given numbers are floats
    # and determine number of decimals given
    if isinstance(x, int):
        xd = 0
    elif isinstance(x, float):
        xd = str(x)[::-1].find('.')
    else:
        raise TypeError('Given x must be float or integer.)')
        
    if isinstance(y, int):
        yd = 0
    elif isinstance(y, float):
        yd = str(y)[::-1].find('.')
    else:
        raise TypeError('Given y must be float or integer.)')
    
    # maximum error in degrees
    xe = 10**(-xd)
    ye = 10**(-yd)
    
    # maximum error in meters
    dy = ye * ((2.* pi * 6378000.) / 360.)
    dx = xe * (2. * pi * cos(radians(y)) * 6378000.) / 360.
    
    print(dx, dy)
    return dx, dy

In [None]:
def transform_coords(x,y,in_proj=None,out_proj=None):
    
    assert in_proj is not None,'Input projection may not be None.'
    assert out_proj is not None,'Output projection may not be None.'
    from_proj = pyproj.Proj(init='epsg:'+str(in_proj))
    to_proj = pyproj.Proj(init='epsg:'+str(out_proj))
    
    #x1, y1 = from_proj(x,y)
    return pyproj.transform(from_proj, to_proj, x,y)

# Build a buffer around all FoG points in the area, determined by the uncertainty of the coordinates

#### We define the two Projs we are dealing with

In [None]:
utm7n = pyproj.Proj(init='epsg:32605') # WGS84 UTM 5N -> LeBris/Paul
latlon = pyproj.Proj(init='epsg:4326') # WGS84 lat/lon -> FoG

#### Select the FoG points with the extent rectangle by LeBris/Paul

In [None]:
lp_extent = lp.total_bounds

In [None]:
# convert UTM5N to WGS84 lat/lon points
x_lp = [lp_extent[0], lp_extent[2]]
y_lp = [lp_extent[1], lp_extent[3]]
x1,y1 = utm7n(x_lp, y_lp)
x_lpll, y_lpll = pyproj.transform(utm7n,latlon,x_lp,y_lp)

In [None]:
foga = foga[(foga.LONGITUDE >= x_lpll[0]) & (foga.LONGITUDE <= x_lpll[1]) & (foga.LATITUDE >= y_lpll[0]) & (foga.LATITUDE <= y_lpll[1])]

In [None]:
len(foga)

#### Create a buffer around them, determined by the uncertainty of the coordinates

In [None]:
# convert WGS84 lat/lon points to UTM5N
xs = foga.LONGITUDE.values
ys = foga.LATITUDE.values
x1,y1 = latlon(xs, ys)
xm, ym = pyproj.transform(latlon,utm7n,xs,ys)

In [None]:
buffers = [Point(x,y).buffer(coord_unc(transform_coords(x,y,32605,4326)[0],transform_coords(x,y,32605,4326)[1])[1]) for x,y in zip(xm,ym)]
#buffers = [Point(x,y).buffer(np.nanmax(coord_unc(x,y))) for x,y in zip(xs,ys)]

In [None]:
# make the buffer one geometry
buffer_union = cascaded_union(buffers)

In [None]:
buffer_union

In [None]:
# create a series of true/false of those that do not intersect with the buffer should be tried to be directly imported into FoG (there WILL be mistakes) !!!
disjoint = lp.disjoint(buffer_union)

In [None]:
disjoint_ix = disjoint[disjoint == True].index
intersect_ix = disjoint[disjoint == False].index

In [None]:
disjoint_ix
intersect_ix

In [None]:
# get the real geometries
fast_to_FoG = lp[lp.index.isin(disjoint_ix)]
check_closely = lp[lp.index.isin(intersect_ix)]

In [None]:
assert len(fast_to_FoG)+len(check_closely)==len(lp)

In [None]:
len(fast_to_FoG)

In [None]:
fast_to_FoG.head()

In [None]:
check_closely

In [None]:
p = fast_to_FoG.plot(c='g')
p.plot(check_closely, c='r')

In [None]:
template_a = pd.read_excel('C:\\users\\jlandman\\Desktop\\FoG_Subm_for_pandas.xls', sheetname='A - GENERAL INFORMATION')
template_b = pd.read_excel('C:\\users\\jlandman\\Desktop\\FoG_Subm_for_pandas.xls', sheetname='B - STATE')
template_d = pd.read_excel('C:\\users\\jlandman\\Desktop\\FoG_Subm_for_pandas.xls', sheetname='D - CHANGE')

In [None]:
assign_dict = {(3373, 'AIALIK'): np.nan,    # no equivalent 
               (3543, 'ALLEN'): np.nan,     # no equivalent
              (92, 'APPLEGATE'): 119,
              (102, 'BAKER'): 280,
              (105, 'BALTIMORE'): 214,
              (165, 'BARNARD'): 184,
              (168, 'BARRY'): 971,
              (1390, 'BARTLETT'): np.nan,  # no equ
              (3372, 'BEAR'): np.nan,      # no equ
              (3376, 'BEAR LAKE'): np.nan, # no eq 
              (97, 'BELOIT'): 302,
              (3377, 'BENCH'): np.nan,
              (98,'BLACKSTONE'): 980,
              (157,'BRILLIANT'): np.nan,
              (162,'BRYN MAWR'): 977,
              (169,'CASCADE'): 973,
              (100,'CATARACT'): 33,
              (180,'CHENEGA'): np.nan,
              (3379,'CHERNOF'): np.nan,
              (152,'CHILDS'): np.nan,
              (176,'CLAREMONT NORTH'): 311,
              (177,'CLAREMONT WEST'): 312,
              (3544,'COLONY'): 203,
              (156,'COLUMBIA (627)'): 976,
              (178,'CONTACT'): 325,   # is okay as only front variation present in FoG
              (167,'COXE'): 978,
              (3381,'DESERTED'): np.nan,
              (101,'DETACHED'): np.nan,
              (3382,'DINGLESTADT'): 364,
              (3383,'DOUBLE'): 774,   # ATTENTION: Name is different: DOUBLE in FoG, Big RIver Glacier in LP and Big River Lobe Double Glac in GLIMS
              (85,'EKLUTNA'): np.nan,
              (3790,'EXCELSIOR'): 981,
              (86,'EXIT'): np.nan,
              (91,'FALLING'): np.nan,
              (3386,'FORK TLIKAKILA'): 777,
              (3791,'GREWINGK'): np.nan,
              (172,'HARRIMAN'): 975,
              (160,'HARVARD'): np.nan,
              (3390,'HOLGATE'): 982,
              (166,'HOLYOKE'): 185,
              (3391,'KACHEMAK'): np.nan,
              (3310,'KNIK'): 204,
              (4333,'KNIK NORTH'): np.nan,  # part of KNIK (only one polygon in LP)
              (4332,'KNIK SOUTH'): np.nan,  # part of KNIK (only one polygon in LP)
              (95,'LAWRENCE'): 298,
              (173,'LEARNARD'): 100,
              (3394,'LITTLE DINGLESTADT'): 360,
              (3545,'MARCUS BAKER'): 177,
              (96,'MARQUETTE'): 301,
              (3546,'MATANUSKA E'): np.nan,  # one polygon together with MATANUSKA W in LP
              (3547,'MATANUSKA W'): np.nan,  # one polygon together with MATANUSKA E in LP
              (3396,'MC CARTY'): np.nan,
              (158,'MEARES'): np.nan,
              (3548,'NELCHINA'): np.nan,
              (179,'NELLIE JUAN'): np.nan,
              (3414,'NORTH FORK TLIKAKILA'): 769,
              (3399,'NORTHEASTERN'): 366,    # not absolutely sure if this is the one
              (3793,'NORTHWESTERN'): np.nan,
              (3794,'NUKA'): np.nan,         # VERY DIFFICULT CASE
              (103,'PENNIMAN EAST'): np.nan, # not really clear where the transition is
              (104,'PENNIMAN WEST'): 279,    # probably true but not 100% clear 
              (174,'PORTAGE'): 292,
              (3335,'RED'): np.nan,           
              (99,'ROARING'): np.nan,
              (108,'SADDLEBAG'): np.nan,
              (4334,'SCOTT'): np.nan,
              (170,'SERPENTINE'): 979,
              (3413,'SHAMROCK'): 781,
              (151,'SHERIDAN'): np.nan,
              (107,'SHERMAN'): np.nan,
              (155,'SHOUP'): 970,
              (3401,'SKILAK'): np.nan,
              (161,'SMITH'): np.nan,
              (3549,'SOUTH FORK TSINA'): 179,  # very strange naming, but this is it
              (1391,'SPENCER'): np.nan,        # FoG coordinate was wrong
              (171,'SURPRISE'): 974,
              (3403,'TANAINA'): 792,
              (93,'TAYLOR US'): 310,
              (3405,'TAZLINA'): np.nan,
              (175,'TEBENKOF'): np.nan,
              (3550,'TONSINA'): 195,
              (1389,'TRAIL'): 307,
              (3551,'TSINA'): 178,
              (3407,'TURQUOISE'): 787,
              (3408,'TUSTUMENA'): np.nan,
              (3409,'TUXEDNI'): 928, 
              (1387,'UNNAMED US0623'): np.nan,    # seems to be part of ID 976 in LP as B table says only 4km long
              (106,'UNNAMED US624'): np.nan,      # unclear which is meant; probably no equivalent
              (154,'VALDEZ'): 211,
              (163,'VASSAR'): 215,
              (164,'WELLESLEY'): 972,
              (94,'WOLVERINE'): np.nan,
              (3580,'WOODWORTH'): np.nan,
              (153,'WORTHINGTON'): 181,
              (3412,'WORTHMANNS'): np.nan,
              (159,'YALE'): np.nan,
              (3797,'YALIK'):409}

In [None]:
# constants
political_unit = 'US'
geogr_loc = 'Western Alaska Range'
year = 2006
survey_date = 20069999
ref_date = 19509999
surv_plat_meth = 'sP'
ref_plat_meth = 'aM'
remarks_b = 'Outlines derived by manual/automated digitizing based on large-scale 7.5 min topographic quadrangle maps. Some corrected manually.'
remarks_d = '1950s: Outlines and elevation derived from topograhic maps, 2006: elevation derived from SPOT5 HRS (SPIRIT)'
investigator = 'Raymond LE BRIS and Frank PAUL'
spons_agenc = 'Dept. of Geography, University of Zurich, Winterthurerstrasse 190, 8057 Zurich, Sitzerland'
ref = 'Le Bris, R. and Paul, F. (2015); Annals of Glaciology, 56(70), pp.184-192.'

In [None]:
new_ids = range(10000, 11000, 1)

for ind, row in lp.iterrows():
    
    if row.ID not in assign_dict.values():
        wgms_id = new_ids[0]
        new_ids = new_ids[1:]

        template_a.loc[ind, 'POLITCAL_UNIT'] = political_unit
        template_a.loc[ind, 'WGMS_ID'] = wgms_id
        template_a.loc[ind, 'GLACIER_NAME'] = row.glacier_na
        template_a.loc[ind, 'GEOGRAPHICAL_LOCATION_GENERAL']  = geogr_loc

        repres_point = row.geometry.representative_point()
        repres_point_ll = transform_coords(repres_point.x, repres_point.y,32605,4326) 

        template_a.loc[ind, 'LATITUDE'] = round(repres_point_ll[1], 6)
        template_a.loc[ind, 'LONGITUDE'] = round(repres_point_ll[0], 6)
        template_a.loc[ind, 'REMARKS'] =  ''


        template_b.loc[ind, 'POLITCAL_UNIT'] = political_unit
        template_b.loc[ind, 'GLACIER_NAME'] = row.glacier_na
        template_b.loc[ind, 'WGMS_ID'] = wgms_id
        template_b.loc[ind, 'YEAR'] = year
        template_b.loc[ind, 'AREA'] = row.AREA
        template_b.loc[ind, 'SURVEY_DATE'] = survey_date
        template_b.loc[ind, 'SURVEY_PLATFORM_METHOD'] = surv_plat_meth
        template_b.loc[ind, 'INVESTIGATOR'] = investigator
        template_b.loc[ind, 'SPONSORING_AGENCY'] = spons_agenc
        template_b.loc[ind, 'REFERENCE'] = ref
        template_b.loc[ind, 'REMARKS'] = remarks_b

        template_d.loc[ind, 'POLITICAL_UNIT'] = political_unit
        template_d.loc[ind, 'GLACIER_NAME'] = row.glacier_na
        template_d.loc[ind, 'WGMS_ID'] = wgms_id
        template_d.loc[ind, 'YEAR'] = year
        template_d.loc[ind, 'LOWER_BOUND'] = np.nan
        template_d.loc[ind, 'UPPER_BOUND'] = np.nan
        template_d.loc[ind, 'AREA_SURVEY_YEAR'] = year
        template_d.loc[ind, 'THICKNESS_CHANGE'] = row.dh_mean * 1000.  # unit: mm
        template_d.loc[ind, 'VOLUME_CHANGE'] = row.dh_mean * row.AREA * 1000.  # unit: m*km2*1000 = 1000m3
        template_d.loc[ind, 'REFERENCE_DATE'] = ref_date
        template_d.loc[ind, 'SURVEY_DATE'] = survey_date
        template_d.loc[ind, 'SURVEY_DATE_PLATFORM_METHOD'] = surv_plat_meth
        template_d.loc[ind, 'REFERENCE_DATE'] = ref_date
        template_d.loc[ind, 'REFERENCE_DATE_PLATFORM_METHOD'] = ref_plat_meth
        template_d.loc[ind, 'INVESTIGATOR'] = investigator
        template_d.loc[ind, 'SPONSORING_AGENCY'] = spons_agenc
        template_d.loc[ind, 'REFERENCE'] = ref
        template_d.loc[ind, 'REMARKS'] = remarks_d
    
    else:
        wgms_id = list(assign_dict.keys())[list(assign_dict.values()).index(row.ID)][0]
        gname = list(assign_dict.keys())[list(assign_dict.values()).index(row.ID)][1]
        
        template_b.loc[ind, 'POLITCAL_UNIT'] = political_unit
        template_b.loc[ind, 'GLACIER_NAME'] = gname
        template_b.loc[ind, 'WGMS_ID'] = wgms_id
        template_b.loc[ind, 'YEAR'] = year
        template_b.loc[ind, 'AREA'] = row.AREA
        template_b.loc[ind, 'SURVEY_DATE'] = survey_date
        template_b.loc[ind, 'SURVEY_PLATFORM_METHOD'] = surv_plat_meth
        template_b.loc[ind, 'INVESTIGATOR'] = investigator
        template_b.loc[ind, 'SPONSORING_AGENCY'] = spons_agenc
        template_b.loc[ind, 'REFERENCE'] = ref
        template_b.loc[ind, 'REMARKS'] = remarks_b

        template_d.loc[ind, 'POLITICAL_UNIT'] = political_unit
        template_d.loc[ind, 'GLACIER_NAME'] = row.glacier_na
        template_d.loc[ind, 'WGMS_ID'] = wgms_id
        template_d.loc[ind, 'YEAR'] = year
        template_d.loc[ind, 'LOWER_BOUND'] = np.nan
        template_d.loc[ind, 'UPPER_BOUND'] = np.nan
        template_d.loc[ind, 'AREA_SURVEY_YEAR'] = year
        template_d.loc[ind, 'THICKNESS_CHANGE'] = row.dh_mean * 1000.  # unit: mm
        template_d.loc[ind, 'VOLUME_CHANGE'] = row.dh_mean * row.AREA * 1000.  # unit: m*km2*1000 = 1000m3
        template_d.loc[ind, 'REFERENCE_DATE'] = ref_date
        template_d.loc[ind, 'SURVEY_DATE'] = survey_date
        template_d.loc[ind, 'SURVEY_DATE_PLATFORM_METHOD'] = surv_plat_meth
        template_d.loc[ind, 'REFERENCE_DATE'] = ref_date
        template_d.loc[ind, 'REFERENCE_DATE_PLATFORM_METHOD'] = ref_plat_meth
        template_d.loc[ind, 'INVESTIGATOR'] = investigator
        template_d.loc[ind, 'SPONSORING_AGENCY'] = spons_agenc
        template_d.loc[ind, 'REFERENCE'] = ref
        template_d.loc[ind, 'REMARKS'] = remarks_d
                   
template_d.head()

In [None]:
template_a.to_excel('C:\\users\\jlandman\\Desktop\\RL_FP_to_FoG_automatic_A.xls', index=False)
template_b.to_excel('C:\\users\\jlandman\\Desktop\\RL_FP_to_FoG_automatic_B.xls', index=False)
template_d.to_excel('C:\\users\\jlandman\\Desktop\\RL_FP_to_FoG_automatic_D.xls', index=False)

In [None]:
len(template_a)

In [None]:
len(template_b)

In [None]:
len(template_d)

In [None]:
np.count_nonzero(~np.isnan(list(assign_dict.values())))

In [None]:
template_a.head()

# Assign GLIMS IDs

In [None]:
glims = salem.utils.read_shapefile('C:\\Users\\jlandman\\Desktop\\glims_db_20160429\\glims_polygons_alaska.shp')

In [None]:
glims_id_success = {}
glims_id_fail = {}

ct=0
for k, row in template_a.iterrows():
    gp = shpg.Point(row.LONGITUDE, row.LATITUDE)
    rectangle = shpg.Polygon([(row.LONGITUDE-0.1, row.LATITUDE-0.1), (row.LONGITUDE-0.1, row.LATITUDE+0.1), (row.LONGITUDE+0.1, row.LATITUDE-0.1), (row.LONGITUDE+0.1, row.LATITUDE-0.1)])
    subset = glims[glims.intersects(rectangle)]
    #if isinstance(subset, pd.DataFrame):
    for i, r in subset.iterrows():
        ct+=1
        if r.geometry.contains(gp):
            glims_id_success[row.WGMS_ID] = r['glac_id']
        else:
            glims_id_fail[row.WGMS_ID] = np.nan
        print(ct)

In [None]:
len(glims_id)

In [None]:
len(template_a)