The purpose of this notebook is to load the shoreline files, create shoreline segements and transects, and save the data for further processing in nb2

The data is output as a json file (dictionary), where the key is (location, proxy, islet_id, start_date, end_date), where the shoreline change is calculated as a change between the end and start dates.
Dates are represented in decimal years.

All the shoreline change values are calculated with respect to the first year in the dataset

# File Setup

In [1]:
import os
os.environ['OMP_NUM_THREADS'] = '1'
import pandas as pd
import numpy as np
import math
import geopandas as gpd
import datetime
import itertools
import shapely 
from shapely.geometry import LineString, shape
# from scipy import interpolate
import pyproj
import xarray as xr
import pickle
from shapely.geometry import LineString

# Load Dataset and Transform Coordinates

In [2]:
atoll_name = 'Nanumea'
proxy = 'WM'
intial_epsg_code = 'EPSG:32760'

In [3]:
# Load file (designed for geojsons produced by CoastSat)
geojson_file = "Preprocessed_datasets/target_file.geojson"
gdf_shoreline = gpd.read_file(geojson_file)

# Formatting
gdf_shoreline['layer'] = ['Nanumea_TOB_'+str(x).split('-')[0] for x in gdf_shoreline['date']]

# Tuvalu data from SPC has id numbers for different atolls.
# If dataset has more than one islet, you can manually define which rows pertain to which islet,
# then the code will treat them separately thereafter
gdf_shoreline['id'] = 1

gdf_shoreline['year'] = [x.year for x in gdf_shoreline.date]

def multipoint_to_linestring(multipoint):
    ''' Helper function for converting multipoints (output from CoastSat) to a linestring '''
    return LineString(multipoint.geoms)
    
gdf_shoreline['geometry'] = gdf_shoreline['geometry'].apply(multipoint_to_linestring)

In [4]:
# convert coordinates
transformer = \
    pyproj.Transformer.from_crs(pyproj.CRS("EPSG:32760"),pyproj.CRS("EPSG:4326")) 

gdf_shoreline['geometry'] = [LineString([transformer.transform(y,x) for x,y in geo.coords]) for geo in gdf_shoreline.geometry]

In [5]:
# Check the values in the 'geometry' column look coorect for WGS 84
gdf_shoreline

Unnamed: 0,date,satname,geoaccuracy,cloud_cover,geometry,layer,id,year
0,2002-05-19 22:10:40,L7,5.065,0.022809,"LINESTRING (-65.239 150.088, -65.239 150.087, ...",Nanumea_TOB_2002,1,2002
1,2003-01-30 22:10:30,L7,5.787,0.000000,"LINESTRING (-65.239 150.087, -65.239 150.087, ...",Nanumea_TOB_2003,1,2003
2,2013-12-03 22:23:36,L8,7.148,0.095328,"LINESTRING (-65.234 150.076, -65.234 150.076, ...",Nanumea_TOB_2013,1,2013
3,2014-01-20 22:23:07,L8,7.566,0.000000,"LINESTRING (-65.211 150.003, -65.211 150.003, ...",Nanumea_TOB_2014,1,2014
4,2014-03-09 22:22:32,L8,8.448,0.035028,"LINESTRING (-65.239 150.087, -65.239 150.087, ...",Nanumea_TOB_2014,1,2014
...,...,...,...,...,...,...,...,...
180,2023-12-05 22:37:43,S2,PASSED,0.002276,"LINESTRING (-65.210 150.003, -65.210 150.003, ...",Nanumea_TOB_2023,1,2023
181,2023-12-07 22:22:11,L9,7.724,0.027839,"LINESTRING (-65.239 150.087, -65.239 150.087, ...",Nanumea_TOB_2023,1,2023
182,2023-12-15 22:37:42,S2,PASSED,0.117506,"LINESTRING (-65.235 150.065, -65.235 150.065, ...",Nanumea_TOB_2023,1,2023
183,2023-12-23 22:22:19,L9,6.601,0.000000,"LINESTRING (-65.239 150.087, -65.239 150.087, ...",Nanumea_TOB_2023,1,2023


In [6]:
# Create a dictionary of dates for future reference
dates_dict = {(int(x.year)+(int(x.month)-0.5)/12):datetime.datetime(int(x.year),int(x.month),int(x.day)) for x in gdf_shoreline.date}
with open('dates_dict.pkl', 'wb') as f:
    pickle.dump(dates_dict, f)

geopandas_dict = {
        (atoll_name,proxy):gdf_shoreline
    }

combinations = list(geopandas_dict.keys())

# Load the Tuvalu shoreline data

In [7]:
# Section 3 is only needed if you're reading the SPC satellite image data 

In [8]:
# # Load the data using geopandas
# base_dir = os.getcwd()

# proxies = [
#     # r'TOB',
#     # r'VL',
#     r'WM'
# ]

# atolls = [
#     'Nanumea',
#     # 'Nanumanga'
# ]

# combinations = list(itertools.product(atolls,proxies))

# # Define the years the shoreline change datafile is for. This is atoll specific
# years_dict = {
#     'Nanumea':'2003_2021'
# }

# geopandas_dict = {}

# # There is one shape file per proxy, per year, per atoll
# for combination in combinations:
#     atoll = combination[0]
#     proxy = combination[1]
#     year = years_dict[combination[0]]
#     gdf_shoreline = gpd.read_file(base_dir+'/Preprocessed_datasets/Shoreline_shapefiles/{}/{}_{}_{}.shp'.format(proxy,atoll,proxy,year)).rename(columns={'Layer':'layer','Area':'area','Perimeter':'perimeter'})
    
#     geopandas_dict.update({
#         (atoll,proxy):gdf_shoreline
#     })



# Format Shoreline data

In [9]:
# converting the geopandas dataframes into a pandas dataframe
# reformat spc satellite images (formatting inconsistencies)
### uncomment code for reformatting spc data
# Convert gis file formats to shoreline segments

# Number of sgements to divide the island into (equal length)
total_number_of_line_segments = 200

segmented_shoreline_dict = {}

# Loop over all dictionary of all geopandas dataframes
for key,item in geopandas_dict.items():
    atoll = key[0]
    proxy = key[1]
    
    gdf_shoreline = item.copy()

    gdf_shoreline['layer'] = gdf_shoreline['layer'].fillna(value=1)
    gdf_shoreline['id'] = gdf_shoreline['id'].fillna(value=1)
    gdf_shoreline['id'] = gdf_shoreline.id.astype(int)
    gdf_shoreline = gdf_shoreline[~gdf_shoreline[['layer','geometry']].isna().any(axis=1)]
    
    # # There are some typos/inconsistencies in the Tuvalu data which need to be corrected/accounted for
    # gdf_shoreline.loc[gdf_shoreline.layer=='Nanumea_WM_2015_','layer'] = 'Nanumea_WM_2015'

    # # Some years are formatted differently
    # gdf_right_years_dict = {}
    # for layer,group in gdf_shoreline.groupby('layer'):
    #     if (proxy=='VL')&(atoll!='Nanumaga'):
    #         year = int(layer.split('_')[1])
    #     else:
    #         year = int(layer.split('_')[-1])

    #     gdf_right_years_dict.update({
    #         year:group
    #     })

    # gdf_shoreline = pd.concat(gdf_right_years_dict)
    # gdf_shoreline.reset_index(drop=False,inplace=True)
    
    # gdf_shoreline['level_0'] = gdf_shoreline.level_0.astype(int)
    # gdf_shoreline['id'] = gdf_shoreline.id.astype(int)
    # gdf_shoreline = gdf_shoreline.rename(columns={'level_0':'year'}).drop('level_1',axis=1)
    
    # # NB: ids 1 and 3 in the wrong way for one year in raw data
    # if (atoll=='Nanumea')&(proxy=='TOB'):
    #     gdf_shoreline.loc[gdf_shoreline.year==2009,'id'] = gdf_shoreline.loc[gdf_shoreline.year==2009,'id'].replace(1,1000)
    #     gdf_shoreline.loc[gdf_shoreline.year==2009,'id'] = gdf_shoreline.loc[gdf_shoreline.year==2009,'id'].replace(3,1)
    #     gdf_shoreline.loc[gdf_shoreline.year==2009,'id'] = gdf_shoreline.loc[gdf_shoreline.year==2009,'id'].replace(1000,3)
        

    # Convert polygons to linestrings if there are any (for Tuvalu satellite data from spc)
    # if type(gdf_shoreline.loc[0,'geometry'])==shapely.geometry.Polygon:
    #     gdf_shoreline['geometry'] = [x.boundary for x in gdf_shoreline.geometry]
        
    # Interpolate the linestrings so that they are all of the same length (x)
    x = total_number_of_line_segments #number of points to interpolate
    for i in np.arange(0,len(gdf_shoreline),1):
        gdf_shoreline.loc[i,'geometry'] = \
            shapely.geometry.linestring.LineString(
                [gdf_shoreline.loc[i,'geometry'].interpolate((j/x), normalized=True) for j in range(1, x)]
            )

    dict_of_df_xy = {}
    
    # Format the coordinates, and put into a pandas dataframe
    for idx,row in gdf_shoreline.iterrows():
        linestring = row.geometry
        XY_list = []
        
        if type(linestring)==shapely.geometry.linestring.LineString:
            XY_list = XY_list+[(x,y) for x,y in linestring.coords]
        elif type(linestring)==shapely.geometry.multilinestring.MultiLineString:
            XY_list = XY_list+[(x,y) for x,y in linestring[0].coords]
                
        df_xy = pd.DataFrame(XY_list)
        df_xy.columns = ['lon','lat']
        df_xy['id'] = int(row.id)
        df_xy['year'] = row.year+(int(row.date.month)-0.5)/12#int(row.year) #year column now is a decimal
        
        dict_of_df_xy.update({
            idx:df_xy
        })

    df_xy = pd.concat(dict_of_df_xy)

    # Format new pandas dataframe
    df_xy = df_xy.reset_index(drop=True)
    df_xy['x'] = df_xy.lon
    df_xy['y'] = df_xy.lat
    
    df_xy[('lon,lat')] = [transformer.transform(x,y) for x,y in zip(df_xy.lon,df_xy.lat)]
    df_xy['lon'] = [x[1] for x in df_xy[('lon,lat')]]
    df_xy['lat'] = [x[0] for x in df_xy[('lon,lat')]]
    
    segmented_shoreline_dict.update({
        (atoll,proxy):{
            'df_xy':df_xy,
            'gdf_shoreline':gdf_shoreline
        }
    })
    

# Creating Transects

In [10]:
def create_shoreline_df(df_xy):
    ''' Define x and y points at each side the the straight shoreline segment'''
    df_temp = pd.concat([df_xy[~df_xy.index.isin(list(df_xy.head(1).index))],df_xy[df_xy.index.isin(list(df_xy.head(1).index))]])

    df_shoreline = pd.DataFrame({'x':df_xy.x,
                                 'x+n':list(df_temp.x),
                                 'y':df_xy.y,
                                 'y+n':list(df_temp.y),
                                 'lon':df_xy.lon,
                                 'lat':df_xy.lat})

    # Calculate the gradient of the line between the two shoreline points
    df_shoreline['m_shoreline'] = (df_shoreline['y']-df_shoreline['y+n'])/(df_shoreline['x']-df_shoreline['x+n'])
    df_shoreline.dropna(axis=0,inplace=True)
    df_shoreline = df_shoreline[(df_shoreline['x']-df_shoreline['x+n'])!=0]

    # Find the inverse of the gradient (because we are wanting the line that is perpendicular to the shoreline)
    df_shoreline['m_transect'] = -df_shoreline['m_shoreline']**-1

    df_shoreline['x_avg'] = [x/2 for x in (df_shoreline['x']+df_shoreline['x+n'])]
    df_shoreline['y_avg'] = [y/2 for y in (df_shoreline['y']+df_shoreline['y+n'])]

    df_shoreline['c_shoreline'] = df_shoreline['y_avg']-df_shoreline['m_shoreline']*df_shoreline['x_avg']
    df_shoreline['c_transect'] = df_shoreline['y_avg']-df_shoreline['m_transect']*df_shoreline['x_avg']

    H = 1000 # length of the transect line
    df_shoreline['delta_y'] = [abs(H*math.sin(math.atan(m))) for m in df_shoreline['m_transect']]
    df_shoreline['delta_x'] = [abs(H*math.cos(math.atan(m))) for m in df_shoreline['m_transect']]
    
    df_shoreline = df_shoreline[df_shoreline.m_shoreline!=0]
    
    coords_dict = {}
    for index,row in df_shoreline.iterrows():
        
        if (row.y<row['y+n'])&(row.m_shoreline>0):
            coords_dict.update({
                index:{
                'x_new':row.x_avg-row.delta_x,
                'y_new':row.y_avg+row.delta_y
                }
            })
        elif (row.y>row['y+n'])&(row.m_shoreline>0):
            coords_dict.update({
                index:{
                'x_new':row.x_avg+row.delta_x,
                'y_new':row.y_avg-row.delta_y
                }
            })
        elif (row.y>row['y+n'])&(row.m_shoreline<0):
            coords_dict.update({
                index:{
                'x_new':row.x_avg+row.delta_x,
                'y_new':row.y_avg+row.delta_y
                }
            })
        elif (row.y<row['y+n'])&(row.m_shoreline<0):
            coords_dict.update({
                index:{
                'x_new':row.x_avg-row.delta_x,
                'y_new':row.y_avg-row.delta_y
                }
            })
        else:
            asdf
        
    df_coords = pd.DataFrame.from_dict(coords_dict,orient='index')
    df_shoreline = df_shoreline.join(df_coords)

    return(df_shoreline)


def calc_shoreline_change(df_shoreline_1,df_shoreline_2):
    '''
        Function for finding shoreline change between two years
    '''
    ### Now you need to find the distance between the two shorelines using the transects
    shoreline_2_updated_dict = {}

    # loop over each transect
    for idx,row in df_shoreline_2.iterrows():
        
        df_intersection = df_shoreline_1.copy()
        
        df_intersection['x_intersect_location'] = (df_intersection.c_shoreline-row.c_transect)/(row.m_transect-df_intersection.m_shoreline)
        df_intersection['y_intersect_location'] = row['m_transect']*df_intersection['x_intersect_location']+row['c_transect']
        df_intersection['intersect_distance'] = np.sqrt((df_intersection['y_intersect_location']-row['y_avg'])**2+\
        (df_intersection['x_intersect_location']-row['x_avg'])**2)

        # Find which shoreline segments would intersect with the shoreline
        df_intersection = df_intersection[[(x<x_int)&(x_n>x_int)|(x>x_int)&(x_n<x_int)for x,x_int,x_n in zip(df_intersection.x,df_intersection.x_intersect_location,df_intersection['x+n'])]]
        df_intersection = df_intersection[[(y<y_int)&(y_n>y_int)|(y>y_int)&(y_n<y_int) for y,y_int,y_n in zip(df_intersection.y,df_intersection.y_intersect_location,df_intersection['y+n'])]]
#         There may be multiple, so find the closest one (that *should* be the right one)
        
        try:
            df_intersection = df_intersection[(df_intersection['intersect_distance']==np.min(df_intersection['intersect_distance']))]
            intersect_distance = df_intersection.intersect_distance.reset_index(drop=True)[0]
            outside = (((df_intersection.x_intersect_location<row.x_avg)&(row.x_new<df_intersection.x_intersect_location))|\
((df_intersection.x_intersect_location>row.x_avg)&(row.x_new>df_intersection.x_intersect_location))).reset_index(drop=True)[0]
            if outside:
                intersect_distance = -intersect_distance
        except:
            intersect_distance = 0
            
        row['intersect_distance'] = intersect_distance
        
        shoreline_2_updated_dict.update({
            idx:row
        })

    df_shoreline_2 = pd.DataFrame.from_dict(shoreline_2_updated_dict,orient='index')

    df_shoreline_2['transect_angle'] = \
        np.arctan((df_shoreline_2.y_new-df_shoreline_2.y_avg)/(df_shoreline_2.x_new-df_shoreline_2.x_avg))*180/np.pi

    return(df_shoreline_2)


def calc_shoreline_slope_change(df_shoreline):
    '''
        Calc the direction the shoreline is facing
    '''
    
    df_shoreline = pd.concat([df_shoreline,df_shoreline])

    df_shoreline = df_shoreline.reset_index(drop=True)

    df_shoreline_dict = {}

    for (idx_1,row_1),(idx_2,row_2),(idx_3,row_3) in zip(
        df_shoreline[4:].iterrows(),
        df_shoreline[2:-2].iterrows(),
        df_shoreline[:-4].iterrows()):
        
        row_2['avg_slope_change'] = np.mean([
            (180*np.arctan((row_1.m_shoreline-row_2.m_shoreline)/(1+row_1.m_shoreline*row_2.m_shoreline))/np.pi),
            (180*np.arctan((row_2.m_shoreline-row_3.m_shoreline)/(1+row_2.m_shoreline*row_3.m_shoreline))/np.pi)
        ])

        df_shoreline_dict.update({
            idx_2:row_2
        })

    # only look at points where there is a significant change
    df_shoreline = pd.DataFrame.from_dict(df_shoreline_dict,orient='index')
    
    # Drop the duplicates
    df_shoreline = df_shoreline.drop_duplicates()
    df_shoreline.reset_index(drop=True,inplace=True)
    
    #### Now also add in the angle (degrees) that the shoreline is facing
    # Quadrant 1
    df_shoreline.loc[(df_shoreline.x_new>df_shoreline.x_avg)&(df_shoreline.y_new>df_shoreline.y_avg),'shoreline_direction']=\
        [180*math.atan(np.abs(x_new-x_avg)/np.abs(y_new-y_avg))/np.pi for y_new,y_avg,x_new,x_avg in zip(
                df_shoreline.y_new,
                df_shoreline.y_avg,
                df_shoreline.x_new,
                df_shoreline.x_avg
                ) if (x_new>x_avg)&(y_new>y_avg)]

    # Quadrant 2
    df_shoreline.loc[(df_shoreline.x_new>df_shoreline.x_avg)&(df_shoreline.y_new<df_shoreline.y_avg),'shoreline_direction']=\
        [90+180*math.atan(np.abs(y_new-y_avg)/np.abs(x_new-x_avg))/np.pi for y_new,y_avg,x_new,x_avg in zip(
                df_shoreline.y_new,
                df_shoreline.y_avg,
                df_shoreline.x_new,
                df_shoreline.x_avg
                ) if (x_new>x_avg)&(y_new<y_avg)]

    # Quadrant 3
    df_shoreline.loc[(df_shoreline.x_new<df_shoreline.x_avg)&(df_shoreline.y_new<df_shoreline.y_avg),'shoreline_direction']=\
        [180+180*math.atan(np.abs(x_new-x_avg)/np.abs(y_new-y_avg))/np.pi for y_new,y_avg,x_new,x_avg in zip(
                df_shoreline.y_new,
                df_shoreline.y_avg,
                df_shoreline.x_new,
                df_shoreline.x_avg
                ) if (x_new<x_avg)&(y_new<y_avg)]

    # Quadrant 4
    df_shoreline.loc[(df_shoreline.x_new<df_shoreline.x_avg)&(df_shoreline.y_new>df_shoreline.y_avg),'shoreline_direction']=\
        [270+180*math.atan(np.abs(y_new-y_avg)/np.abs(x_new-x_avg))/np.pi for y_new,y_avg,x_new,x_avg in zip(
                df_shoreline.y_new,
                df_shoreline.y_avg,
                df_shoreline.x_new,
                df_shoreline.x_avg
                ) if (x_new<x_avg)&(y_new>y_avg)]

    return(df_shoreline)

In [11]:
# For each year, proxy and islet combination, run functions above to calc shoreline change
shorelines_dict = {}

# Each combination of atoll and proxy combination
for key,item in segmented_shoreline_dict.items():
    df_xy = item['df_xy']
    gdf_xy = item['gdf_shoreline']
    
    # Now get all the ids for this atoll
    ids = np.unique(df_xy.id)
    
    for ID in ids:
        df_xy_islet = df_xy[df_xy.id==ID]
        gdf_xy_islet = gdf_xy[gdf_xy.id==ID]
            
        # Get all the years
        years = np.sort(np.unique(df_xy_islet.year))
        years_beginning = years[:-1]
        years_end = years[1:]
        
        # Loop over the years, defining the beginning and the end year
        for year_end in years[1:]:
            
            df_xy_islet_beginning = df_xy_islet[(df_xy_islet.year==years[0])]
            df_xy_islet_end = df_xy_islet[(df_xy_islet.year==year_end)]
            
            df_shoreline_1 = create_shoreline_df(df_xy_islet_beginning).reset_index(drop=True)
            df_shoreline_2 = create_shoreline_df(df_xy_islet_end).reset_index(drop=True)

            df_shoreline_2 = calc_shoreline_change(df_shoreline_1,df_shoreline_2)

            df_shoreline_2 = calc_shoreline_slope_change(df_shoreline_2)
            
            # Adding in the final year of the shoreline change dictionary
            df_shoreline_2['year1'] = years[0]
            df_shoreline_2['year2'] = year_end
            
            # Filter out the anomalies of anything greater than 100 m change
            df_shoreline_2 = df_shoreline_2[np.abs(df_shoreline_2.intersect_distance)<100]
            
            shorelines_dict.update({
                (key[0],key[1],ID,years[0],year_end):df_shoreline_2.to_dict()
            })
            print((key[0],key[1],ID,years[0],year_end))
            

('Nanumea', 'WM', 1, 2002.375, 2003.0416666666667)
('Nanumea', 'WM', 1, 2002.375, 2013.9583333333333)
('Nanumea', 'WM', 1, 2002.375, 2014.0416666666667)
('Nanumea', 'WM', 1, 2002.375, 2014.2083333333333)
('Nanumea', 'WM', 1, 2002.375, 2014.2916666666667)
('Nanumea', 'WM', 1, 2002.375, 2014.375)
('Nanumea', 'WM', 1, 2002.375, 2014.7083333333333)
('Nanumea', 'WM', 1, 2002.375, 2014.875)
('Nanumea', 'WM', 1, 2002.375, 2014.9583333333333)
('Nanumea', 'WM', 1, 2002.375, 2015.125)
('Nanumea', 'WM', 1, 2002.375, 2015.2916666666667)
('Nanumea', 'WM', 1, 2002.375, 2015.5416666666667)
('Nanumea', 'WM', 1, 2002.375, 2015.625)
('Nanumea', 'WM', 1, 2002.375, 2015.7916666666667)
('Nanumea', 'WM', 1, 2002.375, 2015.9583333333333)
('Nanumea', 'WM', 1, 2002.375, 2016.0416666666667)
('Nanumea', 'WM', 1, 2002.375, 2016.375)
('Nanumea', 'WM', 1, 2002.375, 2016.7916666666667)
('Nanumea', 'WM', 1, 2002.375, 2016.875)
('Nanumea', 'WM', 1, 2002.375, 2017.0416666666667)
('Nanumea', 'WM', 1, 2002.375, 2017.2916

# Saved the Processed data

In [12]:
with open('Processed_data_and_output/shorelines_dict.json','wb') as fp:
    pickle.dump(shorelines_dict,fp)