In [60]:
#Setting up the code and installing required packages 
from util import *
from glob import glob
import matplotlib.pyplot as plt
from shapely import wkt
from shapely.geometry import LineString, Polygon
pd.set_option("display.max_columns", None)

In [49]:
#Navigate to intersects shapefile and separates out the year, finds how many years from 1800 and how many years to 2100
gdf = gpd.read_file(f"Data/Merged Intersects_UniqueID/SouthIsland_Intersects.shp")
gdf["Date"] = pd.to_datetime(gdf.ShorelineI, dayfirst=True, format='mixed')
gdf["Year"] = gdf.Date.dt.year
gdf["YearsSinceBase"] = (gdf.Date - pd.Timestamp(1800, 1, 1)).dt.days / 365.25
gdf["YearsUntilFuture"] = (
    pd.Timestamp(2100, 1, 1) - gdf.Date
    ).dt.days / 365.25
gdf.Date = gdf.Date.astype(str)
gdf["TransectID"] = gdf.Unique_ID.astype(np.int64)
gdf

Unnamed: 0,ShorelineI,BaselineID,Distance,IntersectX,IntersectY,Uncertaint,Unique_ID,Date,geometry,Year,YearsSinceBase,YearsUntilFuture,TransectID
0,02/10/1958,0.0,-87.22,1.197815e+06,4.775669e+06,5.38,2.030544e+11,1958-10-02,POINT Z (1197814.973 4775669.042 0.000),1958,158.746064,141.248460,203054437032
1,02/25/1978,0.0,-82.80,1.197813e+06,4.775673e+06,3.78,2.030544e+11,1978-02-25,POINT Z (1197813.318 4775673.144 0.000),1978,178.146475,121.848049,203054437032
2,02/05/2007,0.0,-59.09,1.197804e+06,4.775695e+06,2.20,2.030544e+11,2007-05-02,POINT Z (1197804.453 4775695.130 0.000),2007,207.326489,92.668036,203054437032
3,02/10/1958,0.0,-88.83,1.197825e+06,4.775671e+06,5.38,2.030544e+11,1958-10-02,POINT Z (1197824.850 4775671.287 0.000),1958,158.746064,141.248460,203054438164
4,02/25/1978,0.0,-84.02,1.197823e+06,4.775676e+06,3.78,2.030544e+11,1978-02-25,POINT Z (1197823.049 4775675.754 0.000),1978,178.146475,121.848049,203054438164
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6896,02/14/2020,6.0,-38.68,1.201482e+06,4.793256e+06,2.10,2.030574e+11,2020-02-14,POINT Z (1201482.411 4793256.346 0.000),2020,220.114990,79.879535,203057355976
6897,12/10/1958,6.0,-36.92,1.201481e+06,4.793256e+06,10.08,2.030574e+11,1958-10-12,POINT Z (1201480.664 4793256.153 0.000),1958,158.773443,141.221081,203057355976
6898,02/25/1978,6.0,-27.44,1.201470e+06,4.793265e+06,4.21,2.030574e+11,1978-02-25,POINT Z (1201470.141 4793265.053 0.000),1978,178.146475,121.848049,203057356977
6899,02/14/2020,6.0,-39.16,1.201482e+06,4.793266e+06,2.10,2.030574e+11,2020-02-14,POINT Z (1201481.786 4793266.338 0.000),2020,220.114990,79.879535,203057356977


In [50]:
def get_transects(intersects):
  p1 = intersects.geometry[intersects.Distance.idxmin()].coords[0]
  p2 = intersects.geometry[intersects.Distance.idxmax()].coords[0]
  azimuth = math.degrees(math.atan2(p1[0]-p2[0], p1[1]-p2[1]))
  if azimuth < 0:
      azimuth += 360
  return pd.Series({"Azimuth": azimuth, "geometry": LineString([p1, p2])})

lines = gdf.groupby("TransectID")[["geometry", "Distance"]].apply(get_transects)
lines.crs = gdf.crs
lines

Unnamed: 0_level_0,Azimuth,geometry
TransectID,Unnamed: 1_level_1,Unnamed: 2_level_1
203054437032,158.039438,"LINESTRING Z (1197814.973 4775669.042 0.000, 1..."
203054438164,158.039436,"LINESTRING Z (1197824.850 4775671.287 0.000, 1..."
203054439252,158.039436,"LINESTRING Z (1197834.638 4775673.751 0.000, 1..."
203054440300,158.039436,"LINESTRING Z (1197844.143 4775676.919 0.000, 1..."
203054441325,158.039438,"LINESTRING Z (1197853.924 4775679.404 0.000, 1..."
...,...,...
203057472029,68.790774,"LINESTRING Z (1201217.640 4794384.631 0.000, 1..."
203057473033,68.790774,"LINESTRING Z (1201212.688 4794393.436 0.000, 1..."
203057474037,68.790773,"LINESTRING Z (1201207.735 4794402.241 0.000, 1..."
203057475043,68.790774,"LINESTRING Z (1201202.783 4794411.045 0.000, 1..."


In [51]:
lines["dist_to_neighbour"] = lines.distance(lines.shift(-1))
breakpoints = lines.dist_to_neighbour[lines.dist_to_neighbour > 105]
lines["group"] = pd.Series(range(len(breakpoints)), index=breakpoints.index)
lines["group"] = lines.group.bfill().fillna(len(breakpoints)).astype(int)
transect_metadata = lines[["Azimuth", "group"]].to_dict(orient="index")

In [52]:
#Linear regression is run here. See util.py for the breakdown on linear_models
linear_models = fit(gdf, transect_metadata)
linear_models

Unnamed: 0,TransectID,slope,intercept,group,r2_score,mae,mse,rmse
0,203054437032,0.597612,-184.780564,0,0.933275,2.987910,10.179226,3.190490
1,203054438164,0.628408,-191.357185,0,0.935932,3.074316,10.776474,3.282754
2,203054439252,0.646472,-195.000842,0,0.961956,2.403946,6.589148,2.566934
3,203054440300,0.646293,-195.198284,0,0.973917,1.977670,4.459511,2.111755
4,203054441325,0.661851,-198.717390,0,0.982892,1.632746,3.039603,1.743446
...,...,...,...,...,...,...,...,...
1498,203057472029,0.232514,-78.970734,7,0.768937,2.549772,8.506127,2.916526
1499,203057473033,0.213640,-74.644851,7,0.762832,2.282120,7.429935,2.725791
1500,203057474037,0.192542,-70.082153,7,0.752276,2.092911,6.391972,2.528235
1501,203057475043,0.167560,-64.831441,7,0.715088,2.061489,5.857144,2.420154


In [None]:
#Only run if rolling average is needed. Otherwise SKIP THIS
#linear_models = fit(gdf, transect_metadata)
#rolled_slopes = linear_models.groupby("group").slope.rolling(10, min_periods=1).mean().dropna().reset_index(level=0)
#linear_models.slope = rolled_slopes.slope
#linear_models.dropna(inplace=True)
#linear_models

In [53]:
#Coordinates of the projected shoreline are plotted here

#Changed coordinate function by making old_x and old_y negative 
def calculate_new_coordinates(old_x, old_y, bearing, distance):
    bearing_radians = math.radians(bearing)
    new_x = old_x + (distance * math.sin(bearing_radians))
    new_y = old_y + (distance * math.cos(bearing_radians))
    point = Point(new_x, new_y)
    assert not point.is_empty
    return point

#Removed other model equations and changed Azimuth addtion from 180 to 360 deg
def predict(
    df: pd.DataFrame,
    linear_models: pd.DataFrame,
    transect_metadata: dict,
):
    """_summary_

    Args:
        df (pd.DataFrame): dataframe with columns: TransectID, Date, Distance, YearsSinceBase
        linear_models (pd.DataFrame): dataframe with columns: TransectID, slope, intercept
        transect_metadata (dict): dict lookup of TransectID to Azimuth & group
        
    Returns:
        pd.DataFrame: resulting prediction points for the year 2100
    """
    results = []
    for i, row in linear_models.iterrows():
        transect_ID = row.TransectID
        transect_df = df[df.TransectID == transect_ID]
        latest_row = transect_df[transect_df.Date == transect_df["Date"].max()].iloc[0]
        future_year = int(row.get("FUTURE_YEAR", FUTURE_YEAR))
        result = row.to_dict()
        result.update({
            "TransectID": transect_ID,
            "BaselineID": latest_row.BaselineID,
            "group": row.group,
            "Year": future_year,
            "ocean_point": calculate_new_coordinates(
                latest_row.geometry.x,
                latest_row.geometry.y,
                transect_metadata[transect_ID]["Azimuth"] + 180,
                500,
            ),
        })
        
        model = "linear"
        slope = row.slope
        intercept = row.intercept

        predicted_distance = slope * (future_year - 1800) + intercept
        distance_difference = latest_row.Distance - predicted_distance
        result[f"{model}_model_point"] = calculate_new_coordinates(
            latest_row.geometry.x,
            latest_row.geometry.y,
            transect_metadata[transect_ID]["Azimuth"],
            distance_difference,
        )
        result[f"{model}_model_predicted_distance"] = predicted_distance
        result[f"{model}_model_distance"] = distance_difference
        results.append(result)
    results = gpd.GeoDataFrame(results)
    return results

In [54]:
#Projection file is created here with the stats and coordinate points in table format
results = predict(gdf, linear_models, transect_metadata)
results

Unnamed: 0,TransectID,slope,intercept,group,r2_score,mae,mse,rmse,BaselineID,Year,ocean_point,linear_model_point,linear_model_predicted_distance,linear_model_distance
0,2.030544e+11,0.597612,-184.780564,0.0,0.933275,2.987910,10.179226,3.190490,0.0,2100,POINT (1197617.4689350585 4776158.850773319),POINT (1197784.4110155823 4775744.834368717),-5.497036,-53.592964
1,2.030544e+11,0.628408,-191.357185,0.0,0.935932,3.074316,10.776474,3.282754,0.0,2100,POINT (1197626.7964959862 4776162.4586031595),POINT (1197792.6906487774 4775751.041110848),-2.834805,-56.395195
2,2.030544e+11,0.646472,-195.000842,0.0,0.961956,2.403946,6.589148,2.566934,0.0,2100,POINT (1197636.1799686044 4776165.927822924),POINT (1197801.2978931735 4775756.435376581),-1.059153,-58.470847
3,2.030544e+11,0.646293,-195.198284,0.0,0.973917,1.977670,4.459511,2.111755,0.0,2100,POINT (1197645.6403981252 4776169.2061895905),POINT (1197810.6690507247 4775759.93513772),-1.310437,-58.709563
4,2.030544e+11,0.661851,-198.717390,0.0,0.982892,1.632746,3.039603,1.743446,0.0,2100,POINT (1197655.1008452002 4776172.484563475),POINT (1197819.5129964252 4775764.742384209),-0.161945,-60.358055
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1498,2.030575e+11,0.232514,-78.970734,7.0,0.768937,2.549772,8.506127,2.916526,7.0,2100,POINT (1200737.5790139248 4794198.338771458),POINT (1201183.9817034388 4794371.56966614),-9.216406,-21.163594
1499,2.030575e+11,0.213640,-74.644851,7.0,0.762832,2.282120,7.429935,2.725791,7.0,2100,POINT (1200733.4050568482 4794207.445587327),POINT (1201181.6036941495 4794381.373413877),-10.552839,-19.237161
1500,2.030575e+11,0.192542,-70.082153,7.0,0.752276,2.092911,6.391972,2.528235,7.0,2100,POINT (1200729.8510224356 4794216.792958671),POINT (1201179.6314197409 4794391.334611249),-12.319526,-17.540474
1501,2.030575e+11,0.167560,-64.831441,7.0,0.715088,2.061489,5.857144,2.420154,7.0,2100,POINT (1200726.651303354 4794226.277839477),POINT (1201178.113386446 4794401.472080938),-14.563394,-15.736606


In [55]:
#Spatial reference added to the results
results.set_geometry("linear_model_point", inplace=True, crs=2193)
results

Unnamed: 0,TransectID,slope,intercept,group,r2_score,mae,mse,rmse,BaselineID,Year,ocean_point,linear_model_point,linear_model_predicted_distance,linear_model_distance
0,2.030544e+11,0.597612,-184.780564,0.0,0.933275,2.987910,10.179226,3.190490,0.0,2100,POINT (1197617.4689350585 4776158.850773319),POINT (1197784.411 4775744.834),-5.497036,-53.592964
1,2.030544e+11,0.628408,-191.357185,0.0,0.935932,3.074316,10.776474,3.282754,0.0,2100,POINT (1197626.7964959862 4776162.4586031595),POINT (1197792.691 4775751.041),-2.834805,-56.395195
2,2.030544e+11,0.646472,-195.000842,0.0,0.961956,2.403946,6.589148,2.566934,0.0,2100,POINT (1197636.1799686044 4776165.927822924),POINT (1197801.298 4775756.435),-1.059153,-58.470847
3,2.030544e+11,0.646293,-195.198284,0.0,0.973917,1.977670,4.459511,2.111755,0.0,2100,POINT (1197645.6403981252 4776169.2061895905),POINT (1197810.669 4775759.935),-1.310437,-58.709563
4,2.030544e+11,0.661851,-198.717390,0.0,0.982892,1.632746,3.039603,1.743446,0.0,2100,POINT (1197655.1008452002 4776172.484563475),POINT (1197819.513 4775764.742),-0.161945,-60.358055
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1498,2.030575e+11,0.232514,-78.970734,7.0,0.768937,2.549772,8.506127,2.916526,7.0,2100,POINT (1200737.5790139248 4794198.338771458),POINT (1201183.982 4794371.570),-9.216406,-21.163594
1499,2.030575e+11,0.213640,-74.644851,7.0,0.762832,2.282120,7.429935,2.725791,7.0,2100,POINT (1200733.4050568482 4794207.445587327),POINT (1201181.604 4794381.373),-10.552839,-19.237161
1500,2.030575e+11,0.192542,-70.082153,7.0,0.752276,2.092911,6.391972,2.528235,7.0,2100,POINT (1200729.8510224356 4794216.792958671),POINT (1201179.631 4794391.335),-12.319526,-17.540474
1501,2.030575e+11,0.167560,-64.831441,7.0,0.715088,2.061489,5.857144,2.420154,7.0,2100,POINT (1200726.651303354 4794226.277839477),POINT (1201178.113 4794401.472),-14.563394,-15.736606


In [56]:
#Line and polygon shapefiles are created here 
def prediction_results_to_line_polygon(results: gpd.GeoDataFrame):
    lines = []
    polygons = []
    for group_name, group_data in results.groupby(["BaselineID", "group"]):
        if len(group_data) > 1:
            # Convert the points to LineString
            line = LineString(list(group_data.geometry))
            lines.append(line)
            # Convert the points to a closed Polygon
            polygon = Polygon(list(group_data.geometry) + list(group_data.ocean_point)[::-1])
            polygons.append(polygon)
    lines = gpd.GeoSeries(lines, crs=2193)
    polygons = gpd.GeoSeries(polygons, crs=2193)
    return lines, polygons

In [57]:
lines, poly = prediction_results_to_line_polygon(results)

In [None]:
#Saving line and polygon projection file to Z drive. Change file location accordingly  
lines, poly = prediction_results_to_line_polygon(results)
lines.to_file("Z:\Lalita\RNC Cont\......\BigBay_projection_output_lines.shp")
poly.to_file("Z:\Lalita\RNC Cont\.......\BigBay_projection_output_polygon.shp")

In [59]:
#Saving line and polygon projection file to folder in VS Code. Change file location accordingly
lines, poly = prediction_results_to_line_polygon(results)
lines.to_file("Projections\RakiuraStewartIsland_projection_output_lines.shp")
poly.to_file("Projections\RakiuraStewartIsland_projection_output_polygon.shp")

In [58]:
#Quick visualisation of projected polygon and historic shorelines 
m = poly.explore(tiles="Esri.WorldImagery")
gpd.GeoDataFrame(results.drop(columns=["ocean_point", "linear_model_point"]), geometry=results.linear_model_point).explore(m=m)
gdf.explore("Year", legend=True, m=m)