In [1]:
#Setting up the code and installing required packages 
from util import *
from glob import glob
import matplotlib.pyplot as plt
from shapely import wkt
from shapely.geometry import LineString, Polygon
pd.set_option("display.max_columns", None)

In [2]:
#Navigate to intersects shapefile and separates out the year, finds how many years from 1800 and how many years to 2100
gdf = gpd.read_file(f"Data/Merged Intersects_UniqueID/NorthIsland_Intersects.shp")
gdf["Date"] = pd.to_datetime(gdf.ShorelineI, dayfirst=True, format='mixed')
gdf["Year"] = gdf.Date.dt.year
gdf["YearsSinceBase"] = (gdf.Date - pd.Timestamp(1800, 1, 1)).dt.days / 365.25
gdf["YearsUntilFuture"] = (
    pd.Timestamp(2100, 1, 1) - gdf.Date
    ).dt.days / 365.25
gdf.Date = gdf.Date.astype(str)
gdf["TransectID"] = gdf.Unique_ID.astype(np.int64)
gdf

Unnamed: 0,ShorelineI,BaselineID,Distance,IntersectX,IntersectY,Uncertaint,Unique_ID,Date,geometry,Year,YearsSinceBase,YearsUntilFuture,TransectID
0,01/03/2011,0.0,-37.16,1.730591e+06,5.908710e+06,8.59,1.006327e+11,2011-03-01,POINT Z (1730590.982 5908710.362 0.000),2011,211.156742,88.837782,100632729920
1,08/19/1960,0.0,-51.70,1.730600e+06,5.908699e+06,3.76,1.006327e+11,1960-08-19,POINT Z (1730600.339 5908699.234 0.000),1960,160.626968,139.367556,100632729920
2,01/04/2017,0.0,-32.09,1.730588e+06,5.908714e+06,2.94,1.006327e+11,2017-04-01,POINT Z (1730587.720 5908714.241 0.000),2017,217.242984,82.751540,100632729920
3,01/02/2004,0.0,-48.27,1.730598e+06,5.908702e+06,10.05,1.006327e+11,2004-02-01,POINT Z (1730598.132 5908701.858 0.000),2004,204.079398,95.915127,100632729920
4,01/04/2022,0.0,-23.18,1.730582e+06,5.908721e+06,2.07,1.006327e+11,2022-04-01,POINT Z (1730581.983 5908721.063 0.000),2022,222.242300,77.752225,100632729920
...,...,...,...,...,...,...,...,...,...,...,...,...,...
914496,02/21/2021,0.0,-104.61,1.800540e+06,5.395670e+06,1.02,1.004660e+11,2021-02-21,POINT Z (1800539.730 5395670.271 0.000),2021,221.136208,78.858316,100466049969
914497,09/24/1995,0.0,-69.89,1.800538e+06,5.395636e+06,5.64,1.004660e+11,1995-09-24,POINT Z (1800538.365 5395635.582 0.000),1995,195.723477,104.271047,100466049969
914498,04/12/1973,0.0,-46.92,1.800527e+06,5.395613e+06,4.73,1.004661e+11,1973-12-04,POINT Z (1800527.469 5395613.018 0.000),1973,173.919233,126.075291,100466050980
914499,02/21/2021,0.0,-104.80,1.800530e+06,5.395671e+06,1.02,1.004661e+11,2021-02-21,POINT Z (1800529.745 5395670.850 0.000),2021,221.136208,78.858316,100466050980


In [3]:
def get_transects(intersects):
  p1 = intersects.geometry[intersects.Distance.idxmin()].coords[0]
  p2 = intersects.geometry[intersects.Distance.idxmax()].coords[0]
  azimuth = math.degrees(math.atan2(p1[0]-p2[0], p1[1]-p2[1]))
  if azimuth < 0:
      azimuth += 360
  return pd.Series({"Azimuth": azimuth, "geometry": LineString([p1, p2])})

lines = gdf.groupby("TransectID")[["geometry", "Distance"]].apply(get_transects)
lines.crs = gdf.crs
lines

Unnamed: 0_level_0,Azimuth,geometry
TransectID,Unnamed: 1_level_1,Unnamed: 2_level_1
100000185990,280.491478,"LINESTRING Z (1571642.421 6190470.978 0.000, 1..."
100000186642,271.468801,"LINESTRING Z (1571640.943 6190457.642 0.000, 1..."
100000186861,253.739795,"LINESTRING Z (1571638.826 6190440.101 0.000, 1..."
100000207668,213.055823,"LINESTRING Z (1571707.731 6190268.688 0.000, 1..."
100000208692,213.055823,"LINESTRING Z (1571716.675 6190264.098 0.000, 1..."
...,...,...
100792445903,124.911831,"LINESTRING Z (1570490.029 6190785.870 0.000, 1..."
100792446903,124.911831,"LINESTRING Z (1570496.406 6190793.614 0.000, 1..."
100792447904,124.911831,"LINESTRING Z (1570503.353 6190800.960 0.000, 1..."
100792448907,124.911831,"LINESTRING Z (1570509.491 6190808.871 0.000, 1..."


In [4]:
lines["dist_to_neighbour"] = lines.distance(lines.shift(-1))
breakpoints = lines.dist_to_neighbour[lines.dist_to_neighbour > 105]
lines["group"] = pd.Series(range(len(breakpoints)), index=breakpoints.index)
lines["group"] = lines.group.bfill().fillna(len(breakpoints)).astype(int)
transect_metadata = lines[["Azimuth", "group"]].to_dict(orient="index")

In [5]:
#Linear regression is run here. See util.py for the breakdown on linear_models
linear_models = fit(gdf, transect_metadata)
linear_models

Unnamed: 0,TransectID,slope,intercept,group,r2_score,mae,mse,rmse
0,100000185990,-0.174749,16.121851,0,0.988985,0.483579,0.270081,0.519693
1,100000186642,-0.152778,10.912255,0,0.999725,0.066454,0.005100,0.071417
2,100000186861,-0.212911,20.574028,0,0.985677,0.673000,0.523105,0.723260
3,100000207668,-0.052468,-16.374386,1,0.373987,1.779999,3.659311,1.912933
4,100000208692,-0.065189,-12.810327,1,0.600416,1.394473,2.245846,1.498615
...,...,...,...,...,...,...,...,...
139133,100792445903,-0.078854,-9.157746,815,0.296304,3.389827,14.491849,3.806816
139134,100792446903,-0.090686,-7.312214,815,0.314161,3.620688,17.618604,4.197452
139135,100792447904,-0.124058,-1.742677,815,0.425647,3.624603,20.380086,4.514431
139136,100792448907,-0.131000,-1.856677,815,0.650471,2.443211,9.049437,3.008228


In [None]:
#Only run if rolling average is needed. Otherwise SKIP THIS
#linear_models = fit(gdf, transect_metadata)
#rolled_slopes = linear_models.groupby("group").slope.rolling(10, min_periods=1).mean().dropna().reset_index(level=0)
#linear_models.slope = rolled_slopes.slope
#linear_models.dropna(inplace=True)
#linear_models

In [6]:
#Coordinates of the projected shoreline are plotted here

#Changed coordinate function by making old_x and old_y negative 
def calculate_new_coordinates(old_x, old_y, bearing, distance):
    bearing_radians = math.radians(bearing)
    new_x = old_x + (distance * math.sin(bearing_radians))
    new_y = old_y + (distance * math.cos(bearing_radians))
    point = Point(new_x, new_y)
    assert not point.is_empty
    return point

#Removed other model equations and changed Azimuth addtion from 180 to 360 deg
def predict(
    df: pd.DataFrame,
    linear_models: pd.DataFrame,
    transect_metadata: dict,
):
    """_summary_

    Args:
        df (pd.DataFrame): dataframe with columns: TransectID, Date, Distance, YearsSinceBase
        linear_models (pd.DataFrame): dataframe with columns: TransectID, slope, intercept
        transect_metadata (dict): dict lookup of TransectID to Azimuth & group
        
    Returns:
        pd.DataFrame: resulting prediction points for the year 2100
    """
    results = []
    for i, row in linear_models.iterrows():
        transect_ID = row.TransectID
        transect_df = df[df.TransectID == transect_ID]
        latest_row = transect_df[transect_df.Date == transect_df["Date"].max()].iloc[0]
        future_year = int(row.get("FUTURE_YEAR", FUTURE_YEAR))
        result = row.to_dict()
        result.update({
            "TransectID": transect_ID,
            "BaselineID": latest_row.BaselineID,
            "group": row.group,
            "Year": future_year,
            "ocean_point": calculate_new_coordinates(
                latest_row.geometry.x,
                latest_row.geometry.y,
                transect_metadata[transect_ID]["Azimuth"] + 180,
                500,
            ),
        })
        
        model = "linear"
        slope = row.slope
        intercept = row.intercept

        predicted_distance = slope * (future_year - 1800) + intercept
        distance_difference = latest_row.Distance - predicted_distance
        result[f"{model}_model_point"] = calculate_new_coordinates(
            latest_row.geometry.x,
            latest_row.geometry.y,
            transect_metadata[transect_ID]["Azimuth"],
            distance_difference,
        )
        result[f"{model}_model_predicted_distance"] = predicted_distance
        result[f"{model}_model_distance"] = distance_difference
        results.append(result)
    results = gpd.GeoDataFrame(results)
    return results

In [7]:
#Projection file is created here with the stats and coordinate points in table format
results = predict(gdf, linear_models, transect_metadata)
results

Unnamed: 0,TransectID,slope,intercept,group,r2_score,mae,mse,rmse,BaselineID,Year,ocean_point,linear_model_point,linear_model_predicted_distance,linear_model_distance
0,1.000002e+11,-0.174749,16.121851,0.0,0.988985,0.483579,0.270081,0.519693,3.0,2100,POINT (1572134.0619555558 6190379.93384971),POINT (1571628.1604695972 6190473.619315173),-36.302945,14.502945
1,1.000002e+11,-0.152778,10.912255,0.0,0.999725,0.066454,0.005100,0.071417,3.0,2100,POINT (1572140.778315408 6190444.825603218),POINT (1571628.075590545 6190457.971828776),-34.921239,12.871239
2,1.000002e+11,-0.212911,20.574028,0.0,0.985677,0.673000,0.523105,0.723260,3.0,2100,POINT (1572118.8258997381 6190580.100564536),POINT (1571621.2106361897 6190434.962778325),-43.299233,18.349233
3,1.000002e+11,-0.052468,-16.374386,1.0,0.373987,1.779999,3.659311,1.912933,4.0,2100,POINT (1571980.4593803054 6190687.758016147),POINT (1571705.836069961 6190265.775861311),-32.114827,3.474827
4,1.000002e+11,-0.065189,-12.810327,1.0,0.600416,1.394473,2.245846,1.498615,4.0,2100,POINT (1571989.4032340937 6190683.167728195),POINT (1571714.0805994999 6190260.110996108),-32.366923,4.756923
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139133,1.007924e+11,-0.078854,-9.157746,815.0,0.296304,3.389827,14.491849,3.806816,2.0,2100,POINT (1570080.012465478 6191072.027326234),POINT (1570492.7059926013 6190784.001631035),-32.814094,3.264094
139134,1.007924e+11,-0.090686,-7.312214,815.0,0.314161,3.620688,17.618604,4.197452,2.0,2100,POINT (1570086.3896089632 6191079.771231939),POINT (1570499.8324995565 6190791.222543467),-34.517914,4.177914
139135,1.007924e+11,-0.124058,-1.742677,815.0,0.425647,3.624603,20.380086,4.514431,2.0,2100,POINT (1570093.3357900744 6191087.117996782),POINT (1570509.1996709246 6190796.879658838),-38.960220,7.130220
139136,1.007924e+11,-0.131000,-1.856677,815.0,0.650471,2.443211,9.049437,3.008228,2.0,2100,POINT (1570099.4739040367 6191095.02872516),POINT (1570516.7207107306 6190803.825220215),-41.156646,8.816646


In [8]:
#Spatial reference added to the results
results.set_geometry("linear_model_point", inplace=True, crs=2193)
results

Unnamed: 0,TransectID,slope,intercept,group,r2_score,mae,mse,rmse,BaselineID,Year,ocean_point,linear_model_point,linear_model_predicted_distance,linear_model_distance
0,1.000002e+11,-0.174749,16.121851,0.0,0.988985,0.483579,0.270081,0.519693,3.0,2100,POINT (1572134.0619555558 6190379.93384971),POINT (1571628.160 6190473.619),-36.302945,14.502945
1,1.000002e+11,-0.152778,10.912255,0.0,0.999725,0.066454,0.005100,0.071417,3.0,2100,POINT (1572140.778315408 6190444.825603218),POINT (1571628.076 6190457.972),-34.921239,12.871239
2,1.000002e+11,-0.212911,20.574028,0.0,0.985677,0.673000,0.523105,0.723260,3.0,2100,POINT (1572118.8258997381 6190580.100564536),POINT (1571621.211 6190434.963),-43.299233,18.349233
3,1.000002e+11,-0.052468,-16.374386,1.0,0.373987,1.779999,3.659311,1.912933,4.0,2100,POINT (1571980.4593803054 6190687.758016147),POINT (1571705.836 6190265.776),-32.114827,3.474827
4,1.000002e+11,-0.065189,-12.810327,1.0,0.600416,1.394473,2.245846,1.498615,4.0,2100,POINT (1571989.4032340937 6190683.167728195),POINT (1571714.081 6190260.111),-32.366923,4.756923
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139133,1.007924e+11,-0.078854,-9.157746,815.0,0.296304,3.389827,14.491849,3.806816,2.0,2100,POINT (1570080.012465478 6191072.027326234),POINT (1570492.706 6190784.002),-32.814094,3.264094
139134,1.007924e+11,-0.090686,-7.312214,815.0,0.314161,3.620688,17.618604,4.197452,2.0,2100,POINT (1570086.3896089632 6191079.771231939),POINT (1570499.832 6190791.223),-34.517914,4.177914
139135,1.007924e+11,-0.124058,-1.742677,815.0,0.425647,3.624603,20.380086,4.514431,2.0,2100,POINT (1570093.3357900744 6191087.117996782),POINT (1570509.200 6190796.880),-38.960220,7.130220
139136,1.007924e+11,-0.131000,-1.856677,815.0,0.650471,2.443211,9.049437,3.008228,2.0,2100,POINT (1570099.4739040367 6191095.02872516),POINT (1570516.721 6190803.825),-41.156646,8.816646


In [9]:
#Line and polygon shapefiles are created here 
def prediction_results_to_line_polygon(results: gpd.GeoDataFrame):
    lines = []
    polygons = []
    for group_name, group_data in results.groupby(["BaselineID", "group"]):
        if len(group_data) > 1:
            # Convert the points to LineString
            line = LineString(list(group_data.geometry))
            lines.append(line)
            # Convert the points to a closed Polygon
            polygon = Polygon(list(group_data.geometry) + list(group_data.ocean_point)[::-1])
            polygons.append(polygon)
    lines = gpd.GeoSeries(lines, crs=2193)
    polygons = gpd.GeoSeries(polygons, crs=2193)
    return lines, polygons

In [10]:
lines, poly = prediction_results_to_line_polygon(results)

In [None]:
#Saving line and polygon projection file to Z drive. Change file location accordingly  
lines, poly = prediction_results_to_line_polygon(results)
lines.to_file("Z:\Lalita\RNC Cont\......\BigBay_projection_output_lines.shp")
poly.to_file("Z:\Lalita\RNC Cont\.......\BigBay_projection_output_polygon.shp")

In [11]:
#Saving line and polygon projection file to folder in VS Code. Change file location accordingly
lines, poly = prediction_results_to_line_polygon(results)
lines.to_file("Projections\NorthIsland_projection_output_lines.shp")
poly.to_file("Projections\NorthIsland_projection_output_polygon.shp")

SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 11-12: malformed \N character escape (3394349576.py, line 3)

In [None]:
#Quick visualisation of projected polygon and historic shorelines 
m = poly.explore(tiles="Esri.WorldImagery")
gpd.GeoDataFrame(results.drop(columns=["ocean_point", "linear_model_point"]), geometry=results.linear_model_point).explore(m=m)
gdf.explore("Year", legend=True, m=m)