# Explore GLATHIDA

In [None]:
import pandas as pd
import geopandas as gpd
import os
import numpy as np
import shapely.geometry as shpg
import matplotlib.pyplot as plt
import shapely.ops
from salem import datasets
from cleo import Map
%pylab inline

## Files

In [None]:
gtd_dir = '..\\GlaThiDa_2014'
rgi_dir = '..\\rgi50\\11_rgi50_CentralEurope'
output_dir = '..\\GLATHIDA_LINKS'
f_T = os.path.join(gtd_dir, 'T.csv')
f_rgi = os.path.join(rgi_dir, '11_rgi50_CentralEurope.shp')
corr_file = '..\\Manual_links_working_version_20151214.csv'

## Params 

In [None]:
lon_range = [0, 20]
lat_range = [40, 50]

## Distance function

In [None]:
def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between one point 
    on the earth and an array of points (specified in decimal degrees)
    """
    
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])

    # haversine formula 
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a)) 
    r = 6371000 # Radius of earth in meters
    return c * r

## Select IDS that are candidates

In [None]:
pda = pd.read_csv(f_T, header=0, encoding='iso8859_15', sep=';', low_memory=False)

In [None]:
# Remove a couple of NaNs
print(len(pda))
pda = pda.dropna(subset=['MEAN_THICKNESS', 'AREA'])
print(len(pda))
all_ids = pda.GlaThiDa_ID.values

In [None]:
pda['GlaThiDa_ID'] = pda.GlaThiDa_ID.values.astype(np.int)

In [None]:
alps_ids = pda[(pda.LON >= lon_range[0]) & (pda.LON <= lon_range[1]) &
               (pda.LAT >= lat_range[0]) & (pda.LAT <= lat_range[1])].GlaThiDa_ID.values
print len(alps_ids)

Read RGI

In [None]:
pdrgi = gpd.read_file(f_rgi)

# Create an overview plot with ALL glaciers

In [None]:
from matplotlib.backends.backend_pdf import PdfPages

curr = 1  # current plot
total = len(alps_ids)  # total number of plots ()


with PdfPages(os.path.join(output_dir, 'Glathida_Glaciers_Alps_ALL.pdf')) as pdf:
    for gid in alps_ids:
        glacier = pda[pda.GlaThiDa_ID == gid].iloc[0]
        lon, lat = glacier.LON, glacier.LAT
        pdrgi['DIST'] = haversine(lon, lat, pdrgi.CenLon.values, pdrgi.CenLat.values)
        sortrgi = pdrgi.sort(columns='DIST')
        
        # For GoogleMap we need a lon lat range to generate the map
        mmlon = [lon, lon]
        mmlat = [lat, lat]
        
        for i in np.arange(0,5):
            rgig = sortrgi.iloc[i]
            # In case the glacier is a MultiPolygon we account for this here:
            if rgig.geometry.type == 'Polygon':
                x, y = rgig.geometry.exterior.xy
            elif rgig.geometry.type == 'MultiPolygon':
                # buffer is necessary as some multi-polygons are self-intersecting
                allparts = [p.buffer(0) for p in rgig.geometry] 
                rgig.geometry = shapely.ops.cascaded_union(allparts)
                x, y = rgig.geometry.exterior.xy

            mmlon = [np.min(np.append(mmlon, x)), np.max(np.append(mmlon, x))]
            mmlat = [np.min(np.append(mmlat, y)), np.max(np.append(mmlat, y))]
        
        # Make a local map where to plot the polygons
        local = datasets.GoogleVisibleMap(x=mmlon, y=mmlat) # also possible:  maptype='terrain'
        local_map = Map(local.grid, countries=False, nx=640)
        local_map.set_lonlat_countours()
        
        
        # Prepare the figure
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
        #ax1.set_title(glacier.GLACIER_NAME)
        ax1.set_title('{}: '.format(gid) + glacier.POLITICAL_UNIT + '-' + glacier.GLACIER_NAME + 
                     ' {:.2f}km2'.format(glacier.AREA))
        #ax2.set_title("Haversine distance: %.2f m" % (haversine(lon, lat, lon_wgms, lat_wgms)))

        # Plot glaciers
        colors = ['red', 'orange', 'green', 'blue', 'purple', 'magenta']
        for i in np.arange(0,5):
            rgig = sortrgi.iloc[i]
            # In case the glacier is a MultiPolygon we (again) account for this here:
            if rgig.geometry.type == 'Polygon':
                x, y = rgig.geometry.exterior.xy
            elif rgig.geometry.type == 'MultiPolygon':
                # buffer is necessary as some multi-polygons are self-intersecting
                allparts = [p.buffer(0) for p in rgig.geometry] 
                rgig.geometry = shapely.ops.cascaded_union(allparts)
                x, y = rgig.geometry.exterior.xy
            
            #  print centroid of matching glacier
            if i == 0:
                local_map.set_geometry(shpg.Point(rgig.CenLon, rgig.CenLat), edgecolor='k', marker='x', linewidth=4, markersize=100, zorder=50, text='RGI')
            
            # RGI polygon label
            if rgig.Name == None:
                plabel =  str(rgig.RGIId)+'\n'+str(rgig.Area)+'km2'
            else:
                plabel =  str(rgig.RGIId)+'\n'+str(rgig.Area)+'km2\n'+str(rgig.Name)
            
            local_map.set_geometry(rgig.geometry.exterior, color=colors[i], linewidth=3, label=plabel) #adjusted for RGI 5.0
        local_map.set_geometry(shpg.Point(rgig.CenLon, rgig.CenLat), c='k', marker='x', markersize=30, zorder=51) #again adjusted fpr RGI 5.0
        # Plot the GlaThiDa point
        local_map.set_geometry(shpg.Point(lon, lat), color='g', marker='x', linewidth=4, markersize=100, zorder=50, text='GlaT')
        
        local_map.set_rgb(local.get_vardata())
        local_map.visualize(ax=ax1, addcbar=False)

        local = datasets.GoogleVisibleMap(x=mmlon, y=mmlat, maptype='terrain')
        local_map.set_rgb(local.get_vardata())
        local_map.visualize(ax=ax2, addcbar=False)
        plt.subplots_adjust(left=0.04, right=0.80, top=0.94, bottom=0.07)
        plt.legend(bbox_to_anchor=(1.02, 1.), fontsize=18, loc=2, borderaxespad=0, frameon=False, numpoints=1,scatterpoints=1)
        pdf.savefig(fig)
        plt.close()
        
        if curr % 5 == 0:
            print "%s / %s plots done." % (curr, total)
        curr += 1

# Try an automated remark that checks if the found RGI polygon corresponds to the GlaThiDa glacier

Check whether:

+ the area of the first found RGI polygon is closer to the GlaThiDa area than the others
+ the name of the RGI polygon (if exists) corresponds to the GlaThiDa name

In [None]:
pda['AutoRemark'] = np.nan

for gid in alps_ids:
        glacier = pda[pda.GlaThiDa_ID == gid].iloc[0]
        lon, lat = glacier.LON, glacier.LAT
        pdrgi['DIST'] = haversine(lon, lat, pdrgi.CenLon.values, pdrgi.CenLat.values)
        sortrgi = pdrgi.sort(columns='DIST')
        areas = []
        for i in np.arange(0,5):
            rgig = sortrgi.iloc[i]
            areas.append(abs(rgig.Area - glacier.AREA))

        # check if absolute area difference of GlaThiDa and closest RGI polygon are closer than than differences to the
        # second, third etc.
        if all([x > areas[0] for x in areas[1:]]):
            area_same = True
        else:
            area_same = False
        
        # check if names are same, if name exists in RGI
        if rgig.Name != None:
            #print rgig.Name, glacier.NAME
            if rgig.Name.lower() in glacier.GLACIER_NAME.lower() or glacier.GLACIER_NAME.lower() in rgig.Name.lower():
                name_same = True
            else:
                name_same = False
        else:
            name_same = False
            
        print area_same, name_same
        
        if area_same == True and name_same == True:
            pda.AutoRemark[pda.GlaThiDa_ID == gid] = 'A'
        
        if (area_same == True and name_same == False) or (area_same == False and name_same == True):
            pda.AutoRemark[pda.GlaThiDa_ID == gid] = 'B'
            
        if area_same == False and name_same == False:
            pda.AutoRemark[pda.GlaThiDa_ID == gid] = 'C'


In [None]:
pda_out = pda[pda.GlaThiDa_ID.isin(alps_ids)]
pda_out = pda_out.set_index(keys='GlaThiDa_ID')
pda_out = pda_out[['POLITICAL_UNIT','GLACIER_NAME','SOURCE_ID','ID','LAT','LON','REMARKS','AutoRemark']]
pda_out['ManualRemark'] = np.nan
pda_out.to_csv('Automated_links_GlaThiDa_to_RGI_ALPS.csv', encoding='iso8859_15')

# Read the file with the manual corrections and linkings

In [None]:
corr = pd.read_csv(corr_file)
# select all 'safe' equivalents ("RGI_equivalent" is also NaN when glacier is a duplicate)
corr_safe_equivalents = corr[~pd.isnull(corr.RGI_equivalent)]
for gla_id in corr_safe_equivalents.GlaThiDa_ID.values:
    corr_safe_equivalents.loc[corr_safe_equivalents['GlaThiDa_ID'] == gla_id, 'GlaThiDa_area'] = pda[pda.GlaThiDa_ID == gla_id].AREA.values[0]
for rgi_id in corr_safe_equivalents.RGI_equivalent.values:
    corr_safe_equivalents.loc[corr_safe_equivalents['RGI_equivalent'] == rgi_id, 'RGI_area'] = pdrgi[pdrgi.RGIId == rgi_id].Area.values[0]

# select all duplicates
corr_duplicates = corr[corr.Duplicate != 'False']
corr_duplicates = corr_duplicates[~pd.isnull(corr_duplicates.RGI_equivalent)]
corr_duplicates['Duplicate'] = corr_duplicates.Duplicate.values.astype(np.float64)

problems = (corr[pd.isnull(corr.RGI_equivalent)])
problems = problems[problems.Duplicate == 'False']
len(problems)


problems.to_csv(os.path.join(output_dir,'Problems_finding_equivalents.csv'))

In [None]:
len(corr_safe_equivalents)

### Write out the result files for Fabien: 

+ One with safe linkings of GlaThiDa ID, RGI ID and only the comment (if there are duplicates, take the most recent version of GlaThiDa entry => this has been checked manually)
+ One with RGI ID linked to all duplicates in GlaThiDa
+ One with safe linkings of RGI area and GlaThiDa area

In [None]:
corr_safe_equivalents[['GlaThiDa_ID','GLACIER_NAME','RGI_equivalent']].to_csv(os.path.join(output_dir,'Safe_equivalents_RGI_GlaThiDa.csv'))

In [None]:
corr_duplicates = corr_duplicates[['RGI_equivalent','GlaThiDa_ID','GLACIER_NAME','Duplicate']]

#print corr_safe_equivalents
df_out = pd.DataFrame([])

for dup_number in corr_duplicates.Duplicate.values:
    try:
        RGI_Id = corr_safe_equivalents[corr_safe_equivalents.GlaThiDa_ID == dup_number].RGI_equivalent.values[0]
    except IndexError:
        print 'GlaThiDa ID %s (%s) has no RGI equivalent' % (dup_number, corr_duplicates.loc[corr_duplicates.GlaThiDa_ID == dup_number, 'GLACIER_NAME'])
        continue
    corr_duplicates.loc[corr_duplicates['Duplicate'] == dup_number, 'RGI_equivalent'] = RGI_Id
    
corr_duplicates.to_csv(os.path.join(output_dir,'Duplicates_RGI_GlaThiDa.csv'))


In [None]:
corr_safe_equivalents[['GlaThiDa_ID','GLACIER_NAME','RGI_equivalent','GlaThiDa_area', 'RGI_area']].to_csv(os.path.join(output_dir,'Area_comparison_RGI_GlaThiDa.csv'))

## See if these data follow a volume area scaling

In [None]:
pda = pda[pda.GlaThiDa_ID.isin(alps_ids)]

In [None]:
pda['VOLUME'] = pda['AREA'] * pda['MEAN_THICKNESS'] * 1e-3

In [None]:
pda.plot(x='AREA', y='VOLUME', kind='scatter', logx=True, logy=True)
plt.show();

## Check the number of duplicates 

In [None]:
gn = pda.groupby('GLACIER_NAME')
duplicates = dict()
for n, d in gn:
    if len(d) > 1:
        print()
        print(d.set_index('GlaThiDa_ID')[['GLACIER_NAME', 'AREA', 'MEAN_THICKNESS', 'VOLUME', 'SURVEY_DATE']])

In [None]:
alps_ids = pda.GlaThiDa_ID.values
alps_ids

## For all IDs, see if we find a RGI equivalent

See if GlaThiDa point is in one of the ten closest RGI polygons:

In [None]:
not_found = []
found = dict()
for gid in alps_ids:
    glacier = pda[pda.GlaThiDa_ID == gid].iloc[0]
    lon, lat = glacier.LON, glacier.LAT
    pdrgi['DIST'] = haversine(lon, lat, pdrgi.CenLon.values, pdrgi.CenLat.values)
    sortrgi = pdrgi.sort(columns='DIST')
    how_long = 0
    shpp = shpg.Point(lon, lat)
    while True:
        if how_long > 10:
            not_found.append(gid)
            break
        if sortrgi.iloc[how_long].geometry.intersects(shpp):
            found[gid] = sortrgi.iloc[how_long].RGIId
            break
        how_long += 1

In [None]:
len(found), len(not_found)

29 glaciers could not be attributed to their ten closest RGI polygons.

## For those not found, no simple rule... Have a look 

For those that were not found with the above method: Look whether distance to Centroid is bigger than 2000m
# Why only 2000m?

In [None]:
still_not_found = []
final_not_found = []
for gid in not_found:
    glacier = pda[pda.GlaThiDa_ID == gid].iloc[0]
    lon, lat = glacier.LON, glacier.LAT
    pdrgi['DIST'] = haversine(lon, lat, pdrgi.CenLon.values, pdrgi.CenLat.values)
    sortrgi = pdrgi.sort(columns='DIST')
    if sortrgi['DIST'].iloc[0] > 2000:
        # Extreme case
        print(sortrgi['DIST'].iloc[0], glacier.POLITICAL_UNIT, glacier.GLACIER_NAME)
        final_not_found.append(gid)
    else: 
        # Keep the others for the closer look
        still_not_found.append(gid)

OK so for three glaciers its clear we cant do much. And what about the others?

In [None]:
pda_left = pda[pda.GlaThiDa_ID.isin(still_not_found)]
pda_left[['POLITICAL_UNIT','GLACIER_NAME','LON','LAT', 'REMARKS']]

## For those left they are close to shapes. Make a plot

In [None]:
from matplotlib.backends.backend_pdf import PdfPages

curr = 1  # current plot
total = len(still_not_found)  # total number of plots ()

with PdfPages(os.path.join(output_dir, 'explore_glathida_notfound_glaciers_Alps.pdf')) as pdf:
    for gid in still_not_found:
        glacier = pda[pda.GlaThiDa_ID == gid].iloc[0]
        lon, lat = glacier.LON, glacier.LAT
        pdrgi['DIST'] = haversine(lon, lat, pdrgi.CenLon.values, pdrgi.CenLat.values)
        sortrgi = pdrgi.sort(columns='DIST')
        
        # For GoogleMap we need a lon lat range to generate the map
        mmlon = [lon, lon]
        mmlat = [lat, lat]
        
        for i in np.arange(0,5):
            rgig = sortrgi.iloc[i]
            # In case the glacier is a MultiPolygon we account for this here:
            if rgig.geometry.type == 'Polygon':
                x, y = rgig.geometry.exterior.xy
            elif rgig.geometry.type == 'MultiPolygon':
                # buffer is necessary as some multi-polygons are self-intersecting
                allparts = [p.buffer(0) for p in rgig.geometry] 
                rgig.geometry = shapely.ops.cascaded_union(allparts)
                x, y = rgig.geometry.exterior.xy

            mmlon = [np.min(np.append(mmlon, x)), np.max(np.append(mmlon, x))]
            mmlat = [np.min(np.append(mmlat, y)), np.max(np.append(mmlat, y))]
        
        # Make a local map where to plot the polygons
        local = datasets.GoogleVisibleMap(x=mmlon, y=mmlat) # also possible:  maptype='terrain'
        local_map = Map(local.grid, countries=False, nx=640)
        local_map.set_lonlat_countours()
        
        
        # Prepare the figure
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
        ax1.set_title(glacier.GLACIER_NAME)
        #ax2.set_title("Haversine distance: %.2f m" % (haversine(lon, lat, lon_wgms, lat_wgms)))

        # Plot glaciers
        colors = ['red', 'orange', 'green', 'blue', 'purple', 'magenta']
        for i in np.arange(0,5):
            rgig = sortrgi.iloc[i]
            # In case the glacier is a MultiPolygon we (again) account for this here:
            if rgig.geometry.type == 'Polygon':
                x, y = rgig.geometry.exterior.xy
            elif rgig.geometry.type == 'MultiPolygon':
                # buffer is necessary as some multi-polygons are self-intersecting
                allparts = [p.buffer(0) for p in rgig.geometry] 
                rgig.geometry = shapely.ops.cascaded_union(allparts)
                x, y = rgig.geometry.exterior.xy
            
            #  print centroid of matching glacier
            if i == 0:
                local_map.set_geometry(shpg.Point(rgig.CenLon, rgig.CenLat), edgecolor='k', marker='x', linewidth=4, markersize=100, zorder=50, text='RGI')

            local_map.set_geometry(rgig.geometry.exterior, color=colors[i], linewidth=3, label=rgig.RGIId) #adjusted for RGI 5.0
        local_map.set_geometry(shpg.Point(rgig.CenLon, rgig.CenLat), c='k', markersize=30, zorder=51) #again adjusted fpr RGI 5.0
        # Plot the GlaThiDa point
        local_map.set_geometry(shpg.Point(lon, lat), edgecolor='g', marker='x', linewidth=4, markersize=100, zorder=50, text='GlaT')
        
        local_map.set_rgb(local.get_vardata())
        local_map.visualize(ax=ax1, addcbar=False)

        local = datasets.GoogleVisibleMap(x=mmlon, y=mmlat, maptype='terrain')
        local_map.set_rgb(local.get_vardata())
        local_map.visualize(ax=ax2, addcbar=False)
        plt.subplots_adjust(left=0.04, right=0.80, top=0.94, bottom=0.07)
        plt.legend(bbox_to_anchor=(1.02, 1.), fontsize=18, loc=2, borderaxespad=0, frameon=False, numpoints=1, scatterpoints=1)
        pdf.savefig(fig)
        plt.close()
        
        if curr % 5 == 0:
            print "%s / %s plots done." % (curr, total)
        curr += 1

### Based on these plots and some googling, we can link some glaciers to their geometries 

In [None]:
still_not_found

In [None]:
links = {95:0, 108:0, 238:0, 254:0, 280:0, 289:0, 291:0, 355:0, 356:2, 497:0, 518:0, 557:0}

In [None]:
for gid in still_not_found:
    glacier = pda[pda.GlaThiDa_ID == gid].iloc[0]
    lon, lat = glacier.LON, glacier.LAT
    pdrgi['DIST'] = haversine(lon, lat, pdrgi.CenLon.values, pdrgi.CenLat.values)
    sortrgi = pdrgi.sort(columns='DIST')
    sortrgi = sortrgi[(sortrgi.Area >= glacier.AREA/3.) & (sortrgi.Area <= glacier.AREA*3.)]
    if gid in links:
        found[gid] = sortrgi.iloc[links[gid]].RGIId
    else: 
        # Keep the others for the closer look
        final_not_found.append(gid)

In [None]:
print(len(found), len(final_not_found))

## Exclude the glaciers with extreme difference in area 

In [None]:
odf = pd.DataFrame()
odf['GlaThiDa_ID'] = found.keys()
odf['RGI_ID'] = found.values()
odf['NAME'] = [pda[pda.GlaThiDa_ID == gid].iloc[0].GLACIER_NAME for gid in found.keys()]
odf['GTD_AREA'] = [pda[pda.GlaThiDa_ID == gid].iloc[0].AREA for gid in found.keys()]
odf['MEAN_THICKNESS'] = [pda[pda.GlaThiDa_ID == gid].iloc[0].MEAN_THICKNESS for gid in found.keys()]
odf['VOLUME'] = [pda[pda.GlaThiDa_ID == gid].iloc[0].VOLUME for gid in found.keys()]
odf['SURVEY_DATE'] = [pda[pda.GlaThiDa_ID == gid].iloc[0].SURVEY_DATE for gid in found.keys()]
odf['RGI_AREA'] = [pdrgi[pdrgi.RGIId == gid].iloc[0].Area for gid in found.values()]
fig = plt.figure(figsize=(5, 5), dpi=200)
ax = fig.add_subplot(111)
odf.plot(x='GTD_AREA', y='RGI_AREA', kind='scatter', ax=ax);
ax.set_xlim([0, 25]);
ax.set_ylim([0, 25]);

In [None]:
odf['isclose'] =np.isclose(odf['RGI_AREA'], odf['GTD_AREA'], rtol=0.15, atol=0.4)
fig = plt.figure(figsize=(5, 5), dpi=200)
ax = fig.add_subplot(111)
groups = odf.groupby('isclose').groups
odf.iloc[groups[True]].plot(x='GTD_AREA', y='RGI_AREA', kind='scatter', ax=ax, color='DarkBlue', label='True');
odf.iloc[groups[False]].plot(x='GTD_AREA', y='RGI_AREA', kind='scatter', ax=ax, color='DarkRed', label='False');
ax.set_xlim([0, 25]);
ax.set_ylim([0, 25]);

In [None]:
odf = odf.iloc[groups[True]]
fig = plt.figure(figsize=(5, 5), dpi=200)
ax = fig.add_subplot(111)
odf.plot(x='GTD_AREA', y='RGI_AREA', kind='scatter', ax=ax);
ax.set_xlim([0, 25]);
ax.set_ylim([0, 25]);

In [None]:
len(odf)

In [None]:
gn = odf.groupby('NAME')
kept = []
for n, d in gn:
    if len(d) > 1:
#         print()
#         print(d.set_index('GlaThiDa_ID')[['NAME', 'GTD_AREA', 'RGI_AREA', 'VOLUME', 'SURVEY_DATE']])
        subd = d.iloc[[np.argmin(np.abs(d.RGI_AREA.values - d.GTD_AREA.values))]]
        # OK its easier to do some manually. For some glaciers we'd better take another criteria:
        if n in ['KLEINFLEISSKEES']:
            subd = d[d.SURVEY_DATE.isin([20049999])]
        if n in ['SCHLADMINGER GLETSCHER']:
            subd = d[d.SURVEY_DATE.isin([20079999])]
        if n in ['SCHLATENKEES']:
            subd = d[d.SURVEY_DATE.isin([20019999])]
#         print(subd.set_index('GlaThiDa_ID')[['NAME', 'GTD_AREA', 'RGI_AREA', 'VOLUME', 'SURVEY_DATE']])
        kept.append(subd.GlaThiDa_ID.values[0])     
    else:
        kept.append(d.GlaThiDa_ID.values[0])

In [None]:
odf = odf[odf.GlaThiDa_ID.isin(kept)]
len(odf), np.sum(odf.duplicated('NAME')), np.sum(odf.duplicated('RGI_ID'))

### There are still some RGI duplicates....

In [None]:
gn = odf.groupby('RGI_ID')
kept = []
for n, d in gn:
    if len(d) > 1:
        print()
        print(d.set_index('GlaThiDa_ID')[['NAME', 'GTD_AREA', 'RGI_AREA', 'VOLUME', 'SURVEY_DATE']])

Ah ok its because the names are not unique, too 

In [None]:
gn = odf.groupby('RGI_ID')
kept = []
for n, d in gn:
    if len(d) > 1:
        # Simply take the newest
        d = d.iloc[[np.argmax(d.SURVEY_DATE.values)]]
#         print()
#         print(d.set_index('GlaThiDa_ID')[['NAME', 'GTD_AREA', 'RGI_AREA', 'VOLUME', 'SURVEY_DATE']])
        kept.append(d.GlaThiDa_ID.values[0])
    else:
        kept.append(d.GlaThiDa_ID.values[0])

In [None]:
odf = odf[odf.GlaThiDa_ID.isin(kept)]
len(odf), np.sum(odf.duplicated('NAME')), np.sum(odf.duplicated('RGI_ID'))

In [None]:
odf = odf.set_index('GlaThiDa_ID').sort_index()

## Final plot of found

In [None]:
from matplotlib.backends.backend_pdf import PdfPages

curr = 1  # current plot
total = len(odf.index.values)  # total number of plots ()

with PdfPages(os.path.join(output_dir, 'final_glathida_glaciers_found.pdf')) as pdf:
    for gid, rgiid in zip(odf.index.values, odf.RGI_ID.values):
        glacier = pda[pda.GlaThiDa_ID == gid].iloc[0]
        lon, lat = glacier.LON, glacier.LAT
        pdrgi['DIST'] = haversine(lon, lat, pdrgi.CenLon.values, pdrgi.CenLat.values)
        sortrgi = pdrgi.sort(columns='DIST')
        
        # For GoogleMap we need a lon lat range to generate the map
        mmlon = [lon, lon]
        mmlat = [lat, lat]
        
        for i in np.arange(0,5):
            rgig = sortrgi.iloc[i]
            # In case the glacier is a MultiPolygon we account for this here:
            if rgig.geometry.type == 'Polygon':
                x, y = rgig.geometry.exterior.xy
            elif rgig.geometry.type == 'MultiPolygon':
                # buffer is necessary as some multi-polygons are self-intersecting
                allparts = [p.buffer(0) for p in rgig.geometry] 
                rgig.geometry = shapely.ops.cascaded_union(allparts)
                x, y = rgig.geometry.exterior.xy

            mmlon = [np.min(np.append(mmlon, x)), np.max(np.append(mmlon, x))]
            mmlat = [np.min(np.append(mmlat, y)), np.max(np.append(mmlat, y))]
            
        # Prepare the figure
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
        ax1.set_title('{}: '.format(gid) + glacier.POLITICAL_UNIT + '-' + glacier.GLACIER_NAME + 
                     ' {:.2f}km2'.format(glacier.AREA))
        
        local = datasets.GoogleVisibleMap(x=mmlon, y=mmlat) 
        local_map = Map(local.grid, countries=False, nx=640)
        local_map.set_lonlat_countours()

            
        # Plot glaciers
        colors = ['red', 'orange', 'green', 'blue', 'purple', 'magenta']
        for i in np.arange(0,5):
            rgig = sortrgi.iloc[i]
            # In case the glacier is a MultiPolygon we (again) account for this here:
            if rgig.geometry.type == 'Polygon':
                x, y = rgig.geometry.exterior.xy
            elif rgig.geometry.type == 'MultiPolygon':
                # buffer is necessary as some multi-polygons are self-intersecting
                allparts = [p.buffer(0) for p in rgig.geometry] 
                rgig.geometry = shapely.ops.cascaded_union(allparts)
                x, y = rgig.geometry.exterior.xy
            
            #  print centroid of matching glacier
            if i == 0:
                local_map.set_geometry(shpg.Point(rgig.CenLon, rgig.CenLat), edgecolor='k', marker='x', linewidth=4, markersize=100, zorder=50, text='matching')
            
            px, py = rgig.CenLon, rgig.CenLat 
            local_map.set_geometry(shpg.Point(px, py), markersize=6, linewidth=0, color='black') 
            local_map.set_geometry(rgig.geometry.exterior, color=colors[i], linewidth=3, label=rgig.RGIId) 
            
        # Plot selected glacier
        rgig = pdrgi[pdrgi.RGIId == rgiid].iloc[0]
        px, py = rgig.CenLon, rgig.CenLat 
        local_map.set_geometry(shpg.Point(px, py), marker='x', markersize=6, linewidth=0, color='red')

        # Plot the point
        px, py = lon, lat #local.transform(lon, lat)
        local_map.set_geometry(shpg.Point(px, py), marker='o', markersize=7, linewidth=0, color='red')
        
        local_map.set_rgb(local.get_vardata())
        local_map.visualize(ax=ax1, addcbar=False)

        local = datasets.GoogleVisibleMap(x=mmlon, y=mmlat, maptype='terrain')
        local_map.set_rgb(local.get_vardata())
        local_map.visualize(ax=ax2, addcbar=False)
        plt.subplots_adjust(left=0.04, right=0.80, top=0.94, bottom=0.07)
        plt.legend(bbox_to_anchor=(1.02, 1.), fontsize=18, loc=2, borderaxespad=0, frameon=False, numpoints=1,
                       scatterpoints=1)
        pdf.savefig(fig)
        plt.close()
        
        if curr % 5 == 0:
            print "%s / %s plots done." % (curr, total)
        curr += 1

## Final plot of not found 

Not really usfull because there are the duplicates and stuff

In [None]:
from matplotlib.backends.backend_pdf import PdfPages

curr = 1  # current plot
total = len(set(all_ids) - set(odf.index.values))  # total number of plots ()

with PdfPages(os.path.join(output_dir, 'final_glathida_glaciers_not_found.pdf')) as pdf:
    for gid in all_ids:
        if gid in odf.index.values:
            continue
            
        glacier = pda[pda.GlaThiDa_ID == gid].iloc[0]
        lon, lat = glacier.LON, glacier.LAT
        pdrgi['DIST'] = haversine(lon, lat, pdrgi.CenLon.values, pdrgi.CenLat.values)
        sortrgi = pdrgi.sort(columns='DIST')
        
        # For GoogleMap we need a lon lat range to generate the map
        mmlon = [lon, lon]
        mmlat = [lat, lat]
        
        for i in np.arange(0,5):
            rgig = sortrgi.iloc[i]
            # In case the glacier is a MultiPolygon we account for this here:
            if rgig.geometry.type == 'Polygon':
                x, y = rgig.geometry.exterior.xy
            elif rgig.geometry.type == 'MultiPolygon':
                # buffer is necessary as some multi-polygons are self-intersecting
                allparts = [p.buffer(0) for p in rgig.geometry] 
                rgig.geometry = shapely.ops.cascaded_union(allparts)
                x, y = rgig.geometry.exterior.xy

            mmlon = [np.min(np.append(mmlon, x)), np.max(np.append(mmlon, x))]
            mmlat = [np.min(np.append(mmlat, y)), np.max(np.append(mmlat, y))]
        
        # Make a local map where to plot the polygons
        local = datasets.GoogleVisibleMap(x=mmlon, y=mmlat) # also possible:  maptype='terrain'
        local_map = Map(local.grid, countries=False, nx=640)
        local_map.set_lonlat_countours()
        
        
        # Prepare the figure
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
        ax1.set_title(glacier.GLACIER_NAME)
        #ax2.set_title("Haversine distance: %.2f m" % (haversine(lon, lat, lon_wgms, lat_wgms)))

        # Plot glaciers
        colors = ['red', 'orange', 'green', 'blue', 'purple', 'magenta']
        for i in np.arange(0,5):
            rgig = sortrgi.iloc[i]
            # In case the glacier is a MultiPolygon we (again) account for this here:
            if rgig.geometry.type == 'Polygon':
                x, y = rgig.geometry.exterior.xy
            elif rgig.geometry.type == 'MultiPolygon':
                # buffer is necessary as some multi-polygons are self-intersecting
                allparts = [p.buffer(0) for p in rgig.geometry] 
                rgig.geometry = shapely.ops.cascaded_union(allparts)
                x, y = rgig.geometry.exterior.xy
            
            #  print centroid of matching glacier
            if i == 0:
                local_map.set_geometry(shpg.Point(rgig.CenLon, rgig.CenLat), edgecolor='k', marker='x', linewidth=4, markersize=100, zorder=50, text='RGI')

            local_map.set_geometry(rgig.geometry.exterior, color=colors[i], linewidth=3, label=rgig.RGIId) #adjusted for RGI 5.0
        local_map.set_geometry(shpg.Point(rgig.CenLon, rgig.CenLat), c='k', markersize=30, zorder=51) #again adjusted fpr RGI 5.0
        # Plot the GlaThiDa point
        local_map.set_geometry(shpg.Point(lon, lat), edgecolor='g', marker='x', linewidth=4, markersize=100, zorder=50, text='GlaT')
        
        local_map.set_rgb(local.get_vardata())
        local_map.visualize(ax=ax1, addcbar=False)

        local = datasets.GoogleVisibleMap(x=mmlon, y=mmlat, maptype='terrain')
        local_map.set_rgb(local.get_vardata())
        local_map.visualize(ax=ax2, addcbar=False)
        plt.subplots_adjust(left=0.04, right=0.80, top=0.94, bottom=0.07)
        plt.legend(bbox_to_anchor=(1.02, 1.), fontsize=18, loc=2, borderaxespad=0, frameon=False, numpoints=1,
                       scatterpoints=1)
        pdf.savefig(fig)
        plt.close()
        
        if curr % 5 == 0:
            print "%s / %s plots done." % (curr, total)
        curr += 1

## Write out the data

In [None]:
odf.to_csv(os.path.join(output_dir, 'GLATHIDA_to_RGI_Alps.csv'))