# File Setup

Notes:
* All the shoreline change estimates are with respect to the year 2003

In [1]:
import os
os.environ['OMP_NUM_THREADS'] = '1'
import pandas as pd
import numpy as np
import math
import geopandas as gpd
import datetime
import itertools
import shapely 
from shapely.geometry import LineString, shape
from scipy import interpolate
import pyproj
import xarray as xr
import os
import pickle
from shapely.geometry import LineString

transformer = \
    pyproj.Transformer.from_crs(pyproj.CRS("EPSG:32760"),pyproj.CRS("EPSG:4326")) 

transformer2 = \
    pyproj.Transformer.from_crs(pyproj.CRS("EPSG:28356"),pyproj.CRS("EPSG:4326")) 


def multipoint_to_linestring(multipoint):
    return LineString(multipoint)

# Load coastSat Data

In [2]:
geojson_file = "Preprocessed_datasets/target_file.geojson"
gdf_nanumea = gpd.read_file(geojson_file)

gdf_nanumea['layer'] = ['Nanumea_TOB_'+x.split('-')[0] for x in gdf_nanumea['date']]
gdf_nanumea['id'] = 1
gdf_nanumea['geometry'] = gdf_nanumea['geometry'].apply(multipoint_to_linestring)

  ret = geos_linestring_from_py(coordinates)


In [3]:
# EPSG:3832 32660
transformer3 = \
    pyproj.Transformer.from_crs(pyproj.CRS("EPSG:2102"),pyproj.CRS("EPSG:4326")) 

gdf_nanumea['geometry_2'] = [LineString([transformer3.transform(y,x) for x,y in geo.coords]) for geo in gdf_nanumea.geometry]
gdf_nanumea['geometry_3'] = [LineString([transformer3.transform(x,y) for x,y in geo.coords]) for geo in gdf_nanumea.geometry]

  values = construct_1d_object_array_from_listlike(values)


In [30]:
dates_dict = {(int(x.split('-')[0])+(int(x.split('-')[1])-0.5)/12):datetime.datetime(int(x.split('-')[0]),int(x.split('-')[1]),int(x.split('-')[2].split('T')[0])) for x in gdf_nanumea.date}
with open('dates_dict.pkl', 'wb') as f:
    pickle.dump(dates_dict, f)

# Load the shoreline data

In [4]:
# Load the data using geopandas

base_dir = r"/home/shannonb/Tuvalu_shoreline_change"

proxies = [
    # r'TOB',
    # r'VL',
    r'WM'
]

atolls = [
    'Nanumea'
]

locations_dict = {
    'Nanumea':[-5.667723, 176.094928]
}

combinations = list(itertools.product(atolls,proxies))

# Define the years the shoreline change datafile is for. This is atoll specific
years_dict = {
    'Nanumea':'2003_2021'
}

geopandas_dict = {}

# There is one shape file per proxy, per year, per atoll
for combination in combinations:
    atoll = combination[0]
    proxy = combination[1]
    # year = years_dict[combination[0]]
    #test = gpd.read_file(base_dir+'/Preprocessed_datasets/Shoreline_shapefiles/{}/{}_{}_{}.shp'.format(proxy,atoll,proxy,year)).rename(columns={'Layer':'layer','Area':'area','Perimeter':'perimeter'})
    
    geopandas_dict.update({
        (atoll,proxy):gdf_nanumea
    })



# Format Shoreline data

In [5]:
# converting the geopandas dataframes into a pandas dataframe

df_xy_dict = {}

for combination in combinations:
    atoll = combination[0]
    proxy = combination[1]
    year = years_dict[combination[0]]
    
    gdf_test = geopandas_dict[atoll,proxy].copy()
    
    # # There are some typos in the raw data which need to be corrected/accounted for
    # gdf_test.loc[gdf_test.layer=='Nanumea_WM_2015_','layer'] = 'Nanumea_WM_2015'

    gdf_test['layer'] = gdf_test['layer'].fillna(value=1)
    gdf_test['id'] = gdf_test['id'].fillna(value=1)
    gdf_test['id'] = gdf_test.id.astype(int)
    
    gdf_test = gdf_test[~gdf_test[['layer','geometry']].isna().any(axis=1)]

    years_list = []
    gdf_right_years_dict = {}

    for layer,group in gdf_test.groupby('layer'):
        if (proxy=='VL')&(atoll!='Nanumaga'):
            year = int(layer.split('_')[1])
        else:
            year = int(layer.split('_')[-1])
        years_list.append(year)

        gdf_right_years_dict.update({
            year:group
        })

    gdf_test = pd.concat(gdf_right_years_dict)
    gdf_test.reset_index(drop=False,inplace=True)
    
    gdf_test['level_0'] = gdf_test.level_0.astype(int)
    gdf_test['id'] = gdf_test.id.astype(int)
    gdf_test = gdf_test.rename(columns={'level_0':'year'}).drop('level_1',axis=1)
    
    # NB: ids 1 and 3 in the wrong way for one year in raw data
    if (atoll=='Nanumea')&(proxy=='TOB'):
        gdf_test.loc[gdf_test.year==2009,'id'] = gdf_test.loc[gdf_test.year==2009,'id'].replace(1,1000)
        gdf_test.loc[gdf_test.year==2009,'id'] = gdf_test.loc[gdf_test.year==2009,'id'].replace(3,1)
        gdf_test.loc[gdf_test.year==2009,'id'] = gdf_test.loc[gdf_test.year==2009,'id'].replace(1000,3)
        

    # Convert polygons to linestrings if there are any (ome inconsisent formating)
    if type(gdf_test.loc[0,'geometry'])==shapely.geometry.Polygon:
        gdf_test['geometry'] = [x.boundary for x in gdf_test.geometry]
        
    # Based on the length of the line segments, calculate the number of shoreline segments for this islet
    shape = gdf_test.loc[0,'geometry']
    list_of_line_segments = list(shape.coords)
    total_seg_length = 0
    for seg_1,seg_2 in zip(list_of_line_segments[:-1],list_of_line_segments[1:]):
        total_seg_length+=((seg_1[0]-seg_2[0])**2+(seg_1[1]-seg_2[1])**2)**0.5
        
    line_segment_length_after_interp = 35
    total_number_of_line_segments = 200#int(np.round(total_seg_length/35))
        
    # Interpolate the linestrings so that they are all of the same length (x)
    x = total_number_of_line_segments #number of points to interpolate
    for i in np.arange(0,len(gdf_test),1):
        gdf_test.loc[i,'geometry'] = \
            shapely.geometry.linestring.LineString(
                [gdf_test.loc[i,'geometry'].interpolate((j/x), normalized=True) for j in range(1, x)]
            )

    dict_of_df_xy = {}
    # Format the coordinates, and put into a dict
    for idx,row in gdf_test.iterrows():
        linestring = row.geometry
        XY_list = []
        
        if type(linestring)==shapely.geometry.linestring.LineString:
            XY_list = XY_list+[(x,y) for x,y in linestring.coords]
        elif type(linestring)==shapely.geometry.multilinestring.MultiLineString:
            XY_list = XY_list+[(x,y) for x,y in linestring[0].coords]
                
        df_xy = pd.DataFrame(XY_list)
        df_xy.columns = ['lon','lat']
        df_xy['id'] = int(row.id)
        df_xy['year'] = row.year+(int(row.date.split('-')[1])-0.5)/12#int(row.year) #year column now is a decimal
        
        dict_of_df_xy.update({
            idx:df_xy
        })

    df_xy = pd.concat(dict_of_df_xy)

    df_xy = df_xy.reset_index(drop=True)
    df_xy['x'] = df_xy.lon
    df_xy['y'] = df_xy.lat
    
    df_xy[('lon,lat')] = [transformer2.transform(x,y) for x,y in zip(df_xy.lon,df_xy.lat)]
    df_xy['lon'] = [x[1] for x in df_xy[('lon,lat')]]
    df_xy['lat'] = [x[0] for x in df_xy[('lon,lat')]]
    
    print(np.unique(df_xy.id))
    print(np.unique(gdf_test.id))
    
    df_xy_dict.update({
        (atoll,proxy):{
            'df_xy':df_xy,
            'gdf_test':gdf_test
        }
    })
    

[1]
[1]


# Creating Transects

In [6]:
# This section is for creating shoreline transects, it's mostly trig

In [7]:
def create_shoreline_df(df_xy):
    
    df_temp = df_xy[~df_xy.index.isin(list(df_xy.head(1).index))].append(df_xy[df_xy.index.isin(list(df_xy.head(1).index))])

    df_shoreline = pd.DataFrame({'x':df_xy.x,
                                 'x+n':list(df_temp.x),
                                 'y':df_xy.y,
                                 'y+n':list(df_temp.y),
                                 'lon':df_xy.lon,
                                 'lat':df_xy.lat})

    # Calculate the gradient of the line between the two shoreline points
    df_shoreline['m_shoreline'] = (df_shoreline['y']-df_shoreline['y+n'])/(df_shoreline['x']-df_shoreline['x+n'])
    df_shoreline.dropna(axis=0,inplace=True)
    df_shoreline = df_shoreline[(df_shoreline['x']-df_shoreline['x+n'])!=0]

    # Find the inverse of the gradient (because we are wanting the line that is perpendicular to the shoreline)
    df_shoreline['m_transect'] = -df_shoreline['m_shoreline']**-1

    df_shoreline['x_avg'] = [x/2 for x in (df_shoreline['x']+df_shoreline['x+n'])]
    df_shoreline['y_avg'] = [y/2 for y in (df_shoreline['y']+df_shoreline['y+n'])]

    df_shoreline['c_shoreline'] = df_shoreline['y_avg']-df_shoreline['m_shoreline']*df_shoreline['x_avg']
    df_shoreline['c_transect'] = df_shoreline['y_avg']-df_shoreline['m_transect']*df_shoreline['x_avg']

    H = 1000 # length of the transect line
    df_shoreline['delta_y'] = [abs(H*math.sin(math.atan(m))) for m in df_shoreline['m_transect']]
    df_shoreline['delta_x'] = [abs(H*math.cos(math.atan(m))) for m in df_shoreline['m_transect']]
    
    df_shoreline = df_shoreline[df_shoreline.m_shoreline!=0]
    
    coords_dict = {}
    for index,row in df_shoreline.iterrows():
        
        if (row.y<row['y+n'])&(row.m_shoreline>0):
            coords_dict.update({
                index:{
                'x_new':row.x_avg-row.delta_x,
                'y_new':row.y_avg+row.delta_y
                }
            })
        elif (row.y>row['y+n'])&(row.m_shoreline>0):
            coords_dict.update({
                index:{
                'x_new':row.x_avg+row.delta_x,
                'y_new':row.y_avg-row.delta_y
                }
            })
        elif (row.y>row['y+n'])&(row.m_shoreline<0):
            coords_dict.update({
                index:{
                'x_new':row.x_avg+row.delta_x,
                'y_new':row.y_avg+row.delta_y
                }
            })
        elif (row.y<row['y+n'])&(row.m_shoreline<0):
            coords_dict.update({
                index:{
                'x_new':row.x_avg-row.delta_x,
                'y_new':row.y_avg-row.delta_y
                }
            })
        else:
            asdf
        
    df_coords = pd.DataFrame.from_dict(coords_dict,orient='index')
    df_shoreline = df_shoreline.join(df_coords)

    return(df_shoreline)


def calc_shoreline_change(df_shoreline_1,df_shoreline_2):
    '''
        Function for finding shoreline change between two years
    '''
    ### Now you need to find the distance between the two shorelines using the transects
    shoreline_2_updated_dict = {}

    # loop over each transect
    for idx,row in df_shoreline_2.iterrows():
        
        df_intersection = df_shoreline_1.copy()
        
        df_intersection['x_intersect_location'] = (df_intersection.c_shoreline-row.c_transect)/(row.m_transect-df_intersection.m_shoreline)
        df_intersection['y_intersect_location'] = row['m_transect']*df_intersection['x_intersect_location']+row['c_transect']
        df_intersection['intersect_distance'] = np.sqrt((df_intersection['y_intersect_location']-row['y_avg'])**2+\
        (df_intersection['x_intersect_location']-row['x_avg'])**2)

        # Find which shoreline segments would intersect with the shoreline
        df_intersection = df_intersection[[(x<x_int)&(x_n>x_int)|(x>x_int)&(x_n<x_int)for x,x_int,x_n in zip(df_intersection.x,df_intersection.x_intersect_location,df_intersection['x+n'])]]
        df_intersection = df_intersection[[(y<y_int)&(y_n>y_int)|(y>y_int)&(y_n<y_int) for y,y_int,y_n in zip(df_intersection.y,df_intersection.y_intersect_location,df_intersection['y+n'])]]
#         There may be multiple, so find the closest one (that *should* be the right one)
        
        try:
            df_intersection = df_intersection[(df_intersection['intersect_distance']==np.min(df_intersection['intersect_distance']))]
            intersect_distance = df_intersection.intersect_distance.reset_index(drop=True)[0]
            outside = (((df_intersection.x_intersect_location<row.x_avg)&(row.x_new<df_intersection.x_intersect_location))|\
((df_intersection.x_intersect_location>row.x_avg)&(row.x_new>df_intersection.x_intersect_location))).reset_index(drop=True)[0]
            if outside:
                intersect_distance = -intersect_distance
        except:
            intersect_distance = 0
            
        row['intersect_distance'] = intersect_distance
        
        shoreline_2_updated_dict.update({
            idx:row
        })

    df_shoreline_2 = pd.DataFrame.from_dict(shoreline_2_updated_dict,orient='index')

    df_shoreline_2['transect_angle'] = \
        np.arctan((df_shoreline_2.y_new-df_shoreline_2.y_avg)/(df_shoreline_2.x_new-df_shoreline_2.x_avg))*180/np.pi

    return(df_shoreline_2)


def calc_shoreline_slope_change(df_shoreline):
    '''
        Calc the direction the shoreline is facing
    '''
    
    df_shoreline = pd.concat([df_shoreline,df_shoreline])

    df_shoreline = df_shoreline.reset_index(drop=True)

    df_shoreline_dict = {}

    for (idx_1,row_1),(idx_2,row_2),(idx_3,row_3) in zip(
        df_shoreline[4:].iterrows(),
        df_shoreline[2:-2].iterrows(),
        df_shoreline[:-4].iterrows()):
        
        row_2['avg_slope_change'] = np.mean([
            (180*np.arctan((row_1.m_shoreline-row_2.m_shoreline)/(1+row_1.m_shoreline*row_2.m_shoreline))/np.pi),
            (180*np.arctan((row_2.m_shoreline-row_3.m_shoreline)/(1+row_2.m_shoreline*row_3.m_shoreline))/np.pi)
        ])

        df_shoreline_dict.update({
            idx_2:row_2
        })

    # only look at points where there is a significant change
    df_shoreline = pd.DataFrame.from_dict(df_shoreline_dict,orient='index')
    
    # Drop the duplicates
    df_shoreline = df_shoreline.drop_duplicates()
    df_shoreline.reset_index(drop=True,inplace=True)
    
    #### Now also add in the angle (degrees) that the shoreline is facing
    # Quadrant 1
    df_shoreline.loc[(df_shoreline.x_new>df_shoreline.x_avg)&(df_shoreline.y_new>df_shoreline.y_avg),'shoreline_direction']=\
        [180*math.atan(np.abs(x_new-x_avg)/np.abs(y_new-y_avg))/np.pi for y_new,y_avg,x_new,x_avg in zip(
                df_shoreline.y_new,
                df_shoreline.y_avg,
                df_shoreline.x_new,
                df_shoreline.x_avg
                ) if (x_new>x_avg)&(y_new>y_avg)]

    # Quadrant 2
    df_shoreline.loc[(df_shoreline.x_new>df_shoreline.x_avg)&(df_shoreline.y_new<df_shoreline.y_avg),'shoreline_direction']=\
        [90+180*math.atan(np.abs(y_new-y_avg)/np.abs(x_new-x_avg))/np.pi for y_new,y_avg,x_new,x_avg in zip(
                df_shoreline.y_new,
                df_shoreline.y_avg,
                df_shoreline.x_new,
                df_shoreline.x_avg
                ) if (x_new>x_avg)&(y_new<y_avg)]

    # Quadrant 3
    df_shoreline.loc[(df_shoreline.x_new<df_shoreline.x_avg)&(df_shoreline.y_new<df_shoreline.y_avg),'shoreline_direction']=\
        [180+180*math.atan(np.abs(x_new-x_avg)/np.abs(y_new-y_avg))/np.pi for y_new,y_avg,x_new,x_avg in zip(
                df_shoreline.y_new,
                df_shoreline.y_avg,
                df_shoreline.x_new,
                df_shoreline.x_avg
                ) if (x_new<x_avg)&(y_new<y_avg)]

    # Quadrant 4
    df_shoreline.loc[(df_shoreline.x_new<df_shoreline.x_avg)&(df_shoreline.y_new>df_shoreline.y_avg),'shoreline_direction']=\
        [270+180*math.atan(np.abs(y_new-y_avg)/np.abs(x_new-x_avg))/np.pi for y_new,y_avg,x_new,x_avg in zip(
                df_shoreline.y_new,
                df_shoreline.y_avg,
                df_shoreline.x_new,
                df_shoreline.x_avg
                ) if (x_new<x_avg)&(y_new>y_avg)]

    return(df_shoreline)

In [8]:
# For each year, proxy and islet combination, find the shoreline change
shorelines_dict = {}

# Each combination of atoll and proxy combination
for key,item in df_xy_dict.items():
    df_xy = item['df_xy']
    gdf_xy = item['gdf_test']
    
    # Now get all the ids for this atoll
    ids = np.unique(df_xy.id)
    
    for ID in ids:
        df_xy_islet = df_xy[df_xy.id==ID]
        gdf_xy_islet = gdf_xy[gdf_xy.id==ID]
            
        # Get all the years
        years = np.sort(np.unique(df_xy_islet.year))
        years_beginning = years[:-1]
        years_end = years[1:]
        
        # Loop over the years, defining the beginning and the end year
        for year_end in years[1:]:
            
            df_xy_islet_beginning = df_xy_islet[(df_xy_islet.year==years[0])]
            df_xy_islet_end = df_xy_islet[(df_xy_islet.year==year_end)]
            
            df_shoreline_1 = create_shoreline_df(df_xy_islet_beginning).reset_index(drop=True)
            df_shoreline_2 = create_shoreline_df(df_xy_islet_end).reset_index(drop=True)

            df_shoreline_2 = calc_shoreline_change(df_shoreline_1,df_shoreline_2)

            df_shoreline_2 = calc_shoreline_slope_change(df_shoreline_2)
            
            # Adding in the final year of the shoreline change dictionary
            df_shoreline_2['year1'] = years[0]
            df_shoreline_2['year2'] = year_end
            
            # Filter out the anomalies of anything greater than 100 m change
            df_shoreline_2 = df_shoreline_2[np.abs(df_shoreline_2.intersect_distance)<100]
            
            shorelines_dict.update({
                (key[0],key[1],ID,years[0],year_end):df_shoreline_2.to_dict()
            })
            print((key[0],key[1],ID,years[0],year_end))

            

('Nanumea', 'WM', 1, 2002.375, 2003.0416666666667)
('Nanumea', 'WM', 1, 2002.375, 2013.9583333333333)
('Nanumea', 'WM', 1, 2002.375, 2014.0416666666667)
('Nanumea', 'WM', 1, 2002.375, 2014.2083333333333)
('Nanumea', 'WM', 1, 2002.375, 2014.2916666666667)
('Nanumea', 'WM', 1, 2002.375, 2014.375)
('Nanumea', 'WM', 1, 2002.375, 2014.7083333333333)
('Nanumea', 'WM', 1, 2002.375, 2014.875)
('Nanumea', 'WM', 1, 2002.375, 2014.9583333333333)
('Nanumea', 'WM', 1, 2002.375, 2015.125)
('Nanumea', 'WM', 1, 2002.375, 2015.2916666666667)
('Nanumea', 'WM', 1, 2002.375, 2015.5416666666667)
('Nanumea', 'WM', 1, 2002.375, 2015.625)
('Nanumea', 'WM', 1, 2002.375, 2015.7916666666667)
('Nanumea', 'WM', 1, 2002.375, 2015.9583333333333)
('Nanumea', 'WM', 1, 2002.375, 2016.0416666666667)
('Nanumea', 'WM', 1, 2002.375, 2016.375)
('Nanumea', 'WM', 1, 2002.375, 2016.7916666666667)
('Nanumea', 'WM', 1, 2002.375, 2016.875)
('Nanumea', 'WM', 1, 2002.375, 2017.0416666666667)
('Nanumea', 'WM', 1, 2002.375, 2017.2916

# Saved the Processed data

In [12]:
# # Correct the wrong dates in the TOB, year 2008 should be 2007
# for islet_id in [1,2,3]:
#     df = shorelines_dict[('Nanumea', 'TOB', islet_id, 2003, 2008)].copy()
#     df = pd.DataFrame.from_dict(df,orient='index').T
#     df['year2'] = [2007]*len(df)
#     del shorelines_dict[('Nanumea', 'TOB', islet_id, 2003, 2008)]
#     shorelines_dict[('Nanumea', 'TOB', islet_id, 2003, 2007)] = df.to_dict()

In [13]:
with open('Processed_data_and_output/shorelines_dict.json','wb') as fp:
    pickle.dump(shorelines_dict,fp)