In [36]:
#Setting up the code and installing required packages 
from util import *
from glob import glob
import matplotlib.pyplot as plt
from shapely import wkt
from shapely.geometry import LineString, Polygon
pd.set_option("display.max_columns", None)

In [37]:
#Navigate to intersects shapefile and separates out the year, finds how many years from 1800 and how many years to 2100
gdf = gpd.read_file(f"Data/Merged Intersects_UniqueID/MoturoaRabbitIsland_Intersects.shp")
gdf["Date"] = pd.to_datetime(gdf.ShorelineI, dayfirst=True, format='mixed')
gdf["Year"] = gdf.Date.dt.year
gdf["YearsSinceBase"] = (gdf.Date - pd.Timestamp(1800, 1, 1)).dt.days / 365.25
gdf["YearsUntilFuture"] = (
    pd.Timestamp(2100, 1, 1) - gdf.Date
    ).dt.days / 365.25
gdf.Date = gdf.Date.astype(str)
gdf["TransectID"] = gdf.Unique_ID.astype(np.int64)
gdf

Unnamed: 0,ShorelineI,BaselineID,Distance,IntersectX,IntersectY,Uncertaint,Unique_ID,Date,geometry,Year,YearsSinceBase,YearsUntilFuture,TransectID
0,02/22/1940,0.0,-38.23,1.608786e+06,5.432696e+06,3.77,2.020019e+11,1940-02-22,POINT Z (1608786.396 5432695.864 0.000),1940,140.136893,159.857632,202001935832
1,12/14/2006,0.0,-32.06,1.608782e+06,5.432692e+06,2.23,2.020019e+11,2006-12-14,POINT Z (1608781.748 5432691.812 0.000),2006,206.945927,93.048597,202001935832
2,02/17/2018,0.0,-44.52,1.608791e+06,5.432700e+06,2.30,2.020019e+11,2018-02-17,POINT Z (1608791.138 5432699.998 0.000),2018,218.124572,81.869952,202001935832
3,03/18/2021,0.0,-44.14,1.608791e+06,5.432700e+06,2.30,2.020019e+11,2021-03-18,POINT Z (1608790.853 5432699.750 0.000),2021,221.204654,78.789870,202001935832
4,12/06/1946,0.0,-47.10,1.608793e+06,5.432702e+06,3.78,2.020019e+11,1946-06-12,POINT Z (1608793.088 5432701.698 0.000),1946,146.439425,153.555099,202001935832
...,...,...,...,...,...,...,...,...,...,...,...,...,...
8913,03/18/2021,5.0,-30.04,1.616425e+06,5.429557e+06,2.30,2.020008e+11,2021-03-18,POINT Z (1616425.334 5429556.586 0.000),2021,221.204654,78.789870,202000813532
8914,02/19/2010,5.0,-68.15,1.616417e+06,5.429594e+06,2.23,2.020008e+11,2010-02-19,POINT Z (1616416.770 5429593.716 0.000),2010,210.130048,89.864476,202000813532
8915,12/23/2014,5.0,-60.84,1.616418e+06,5.429587e+06,2.21,2.020008e+11,2014-12-23,POINT Z (1616418.412 5429586.597 0.000),2014,214.970568,85.023956,202000813532
8916,09/22/1969,5.0,-85.79,1.616413e+06,5.429611e+06,4.32,2.020008e+11,1969-09-22,POINT Z (1616412.804 5429610.912 0.000),1969,169.719370,130.275154,202000813532


In [38]:
def get_transects(intersects):
  p1 = intersects.geometry[intersects.Distance.idxmin()].coords[0]
  p2 = intersects.geometry[intersects.Distance.idxmax()].coords[0]
  azimuth = math.degrees(math.atan2(p1[0]-p2[0], p1[1]-p2[1]))
  if azimuth < 0:
      azimuth += 360
  return pd.Series({"Azimuth": azimuth, "geometry": LineString([p1, p2])})

lines = gdf.groupby("TransectID")[["geometry", "Distance"]].apply(get_transects)
lines.crs = gdf.crs
lines

Unnamed: 0_level_0,Azimuth,geometry
TransectID,Unnamed: 1_level_1,Unnamed: 2_level_1
202000000383,178.322359,"LINESTRING Z (1609363.708 5433240.643 0.000, 1..."
202000001396,178.322359,"LINESTRING Z (1609373.709 5433240.754 0.000, 1..."
202000002405,178.322362,"LINESTRING Z (1609383.742 5433239.770 0.000, 1..."
202000003410,178.322359,"LINESTRING Z (1609393.785 5433238.431 0.000, 1..."
202000004413,178.322362,"LINESTRING Z (1609403.853 5433236.259 0.000, 1..."
...,...,...
202002024858,164.341654,"LINESTRING Z (1609348.682 5433239.814 0.000, 1..."
202002025861,164.341654,"LINESTRING Z (1609358.899 5433240.414 0.000, 1..."
202002026862,164.341654,"LINESTRING Z (1609369.189 5433240.754 0.000, 1..."
202002027863,164.341654,"LINESTRING Z (1609379.699 5433240.309 0.000, 1..."


In [39]:
lines["dist_to_neighbour"] = lines.distance(lines.shift(-1))
breakpoints = lines.dist_to_neighbour[lines.dist_to_neighbour > 105]
lines["group"] = pd.Series(range(len(breakpoints)), index=breakpoints.index)
lines["group"] = lines.group.bfill().fillna(len(breakpoints)).astype(int)
transect_metadata = lines[["Azimuth", "group"]].to_dict(orient="index")

In [40]:
#Linear regression is run here. See util.py for the breakdown on linear_models
linear_models = fit(gdf, transect_metadata)
linear_models

Unnamed: 0,TransectID,slope,intercept,group,r2_score,mae,mse,rmse
0,202000000383,1.208532,-309.389473,0,0.825520,13.725727,245.687167,15.674411
1,202000001396,1.212739,-308.860643,0,0.822146,14.052535,253.218385,15.912837
2,202000002405,1.227370,-310.774640,0,0.822093,14.343592,259.460119,16.107766
3,202000003410,1.251225,-314.763557,0,0.827195,14.424609,260.294873,16.133657
4,202000004413,1.282725,-320.682448,0,0.834436,14.421382,259.829008,16.119212
...,...,...,...,...,...,...,...,...
1017,202002024858,1.115506,-289.812127,4,0.824275,12.476494,211.131346,14.530359
1018,202002025861,1.145357,-295.592459,4,0.827967,12.714646,216.933904,14.728676
1019,202002026862,1.175564,-301.420394,4,0.829307,13.195759,226.380538,15.045948
1020,202002027863,1.204117,-307.278683,4,0.825332,13.880520,244.213369,15.627328


In [None]:
#Only run if rolling average is needed. Otherwise SKIP THIS
#linear_models = fit(gdf, transect_metadata)
#rolled_slopes = linear_models.groupby("group").slope.rolling(10, min_periods=1).mean().dropna().reset_index(level=0)
#linear_models.slope = rolled_slopes.slope
#linear_models.dropna(inplace=True)
#linear_models

In [41]:
#Coordinates of the projected shoreline are plotted here

#Changed coordinate function by making old_x and old_y negative 
def calculate_new_coordinates(old_x, old_y, bearing, distance):
    bearing_radians = math.radians(bearing)
    new_x = old_x + (distance * math.sin(bearing_radians))
    new_y = old_y + (distance * math.cos(bearing_radians))
    point = Point(new_x, new_y)
    assert not point.is_empty
    return point

#Removed other model equations and changed Azimuth addtion from 180 to 360 deg
def predict(
    df: pd.DataFrame,
    linear_models: pd.DataFrame,
    transect_metadata: dict,
):
    """_summary_

    Args:
        df (pd.DataFrame): dataframe with columns: TransectID, Date, Distance, YearsSinceBase
        linear_models (pd.DataFrame): dataframe with columns: TransectID, slope, intercept
        transect_metadata (dict): dict lookup of TransectID to Azimuth & group
        
    Returns:
        pd.DataFrame: resulting prediction points for the year 2100
    """
    results = []
    for i, row in linear_models.iterrows():
        transect_ID = row.TransectID
        transect_df = df[df.TransectID == transect_ID]
        latest_row = transect_df[transect_df.Date == transect_df["Date"].max()].iloc[0]
        future_year = int(row.get("FUTURE_YEAR", FUTURE_YEAR))
        result = row.to_dict()
        result.update({
            "TransectID": transect_ID,
            "BaselineID": latest_row.BaselineID,
            "group": row.group,
            "Year": future_year,
            "ocean_point": calculate_new_coordinates(
                latest_row.geometry.x,
                latest_row.geometry.y,
                transect_metadata[transect_ID]["Azimuth"] + 180,
                500,
            ),
        })
        
        model = "linear"
        slope = row.slope
        intercept = row.intercept

        predicted_distance = slope * (future_year - 1800) + intercept
        distance_difference = latest_row.Distance - predicted_distance
        result[f"{model}_model_point"] = calculate_new_coordinates(
            latest_row.geometry.x,
            latest_row.geometry.y,
            transect_metadata[transect_ID]["Azimuth"],
            distance_difference,
        )
        result[f"{model}_model_predicted_distance"] = predicted_distance
        result[f"{model}_model_distance"] = distance_difference
        results.append(result)
    results = gpd.GeoDataFrame(results)
    return results

In [42]:
#Projection file is created here with the stats and coordinate points in table format
results = predict(gdf, linear_models, transect_metadata)
results

Unnamed: 0,TransectID,slope,intercept,group,r2_score,mae,mse,rmse,BaselineID,Year,ocean_point,linear_model_point,linear_model_predicted_distance,linear_model_distance
0,2.020000e+11,1.208532,-309.389473,0.0,0.825520,13.725727,245.687167,15.674411,0.0,2100,POINT (1609346.6188321272 5433824.100738599),POINT (1609357.881075016 5433439.575891781),53.170260,-115.310260
1,2.020000e+11,1.212739,-308.860643,0.0,0.822146,14.052535,253.218385,15.912837,0.0,2100,POINT (1609356.5987620691 5433824.9323994275),POINT (1609367.8243889469 5433441.657726857),54.960971,-116.560971
2,2.020000e+11,1.227370,-310.774640,0.0,0.822093,14.343592,259.460119,16.107766,0.0,2100,POINT (1609366.5719553519 5433825.994767111),POINT (1609377.747633378 5433444.424882774),57.436490,-118.266490
3,2.020000e+11,1.251225,-314.763557,0.0,0.827195,14.424609,260.294873,16.133657,0.0,2100,POINT (1609376.5215832847 5433827.8603260275),POINT (1609387.6508039904 5433447.877233024),60.603962,-119.853962
4,2.020000e+11,1.282725,-320.682448,0.0,0.834436,14.421382,259.829008,16.119212,0.0,2100,POINT (1609386.480434556 5433829.412375073),POINT (1609397.543147072 5433451.699459117),64.135113,-122.125113
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1017,2.020020e+11,1.115506,-289.812127,4.0,0.824275,12.476494,211.131346,14.530359,0.0,2100,POINT (1609192.812522151 5433795.889006715),POINT (1609299.1021333737 5433416.693651319),44.839641,-106.189641
1018,2.020020e+11,1.145357,-295.592459,4.0,0.827967,12.714646,216.933904,14.728676,0.0,2100,POINT (1609202.2866675449 5433799.140045626),POINT (1609307.8732030354 5433422.452960316),48.014585,-108.794585
1019,2.020020e+11,1.175564,-301.420394,4.0,0.829307,13.195759,226.380538,15.045948,0.0,2100,POINT (1609211.6990100113 5433802.611570648),POINT (1609316.6285120966 5433428.26849632),51.248939,-111.228939
1020,2.020020e+11,1.204117,-307.278683,4.0,0.825332,13.880520,244.213369,15.627328,0.0,2100,POINT (1609221.4591104372 5433804.842450825),POINT (1609325.5256009016 5433433.578224156),53.956453,-114.426453


In [43]:
#Spatial reference added to the results
results.set_geometry("linear_model_point", inplace=True, crs=2193)
results

Unnamed: 0,TransectID,slope,intercept,group,r2_score,mae,mse,rmse,BaselineID,Year,ocean_point,linear_model_point,linear_model_predicted_distance,linear_model_distance
0,2.020000e+11,1.208532,-309.389473,0.0,0.825520,13.725727,245.687167,15.674411,0.0,2100,POINT (1609346.6188321272 5433824.100738599),POINT (1609357.881 5433439.576),53.170260,-115.310260
1,2.020000e+11,1.212739,-308.860643,0.0,0.822146,14.052535,253.218385,15.912837,0.0,2100,POINT (1609356.5987620691 5433824.9323994275),POINT (1609367.824 5433441.658),54.960971,-116.560971
2,2.020000e+11,1.227370,-310.774640,0.0,0.822093,14.343592,259.460119,16.107766,0.0,2100,POINT (1609366.5719553519 5433825.994767111),POINT (1609377.748 5433444.425),57.436490,-118.266490
3,2.020000e+11,1.251225,-314.763557,0.0,0.827195,14.424609,260.294873,16.133657,0.0,2100,POINT (1609376.5215832847 5433827.8603260275),POINT (1609387.651 5433447.877),60.603962,-119.853962
4,2.020000e+11,1.282725,-320.682448,0.0,0.834436,14.421382,259.829008,16.119212,0.0,2100,POINT (1609386.480434556 5433829.412375073),POINT (1609397.543 5433451.699),64.135113,-122.125113
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1017,2.020020e+11,1.115506,-289.812127,4.0,0.824275,12.476494,211.131346,14.530359,0.0,2100,POINT (1609192.812522151 5433795.889006715),POINT (1609299.102 5433416.694),44.839641,-106.189641
1018,2.020020e+11,1.145357,-295.592459,4.0,0.827967,12.714646,216.933904,14.728676,0.0,2100,POINT (1609202.2866675449 5433799.140045626),POINT (1609307.873 5433422.453),48.014585,-108.794585
1019,2.020020e+11,1.175564,-301.420394,4.0,0.829307,13.195759,226.380538,15.045948,0.0,2100,POINT (1609211.6990100113 5433802.611570648),POINT (1609316.629 5433428.268),51.248939,-111.228939
1020,2.020020e+11,1.204117,-307.278683,4.0,0.825332,13.880520,244.213369,15.627328,0.0,2100,POINT (1609221.4591104372 5433804.842450825),POINT (1609325.526 5433433.578),53.956453,-114.426453


In [44]:
#Line and polygon shapefiles are created here 
def prediction_results_to_line_polygon(results: gpd.GeoDataFrame):
    lines = []
    polygons = []
    for group_name, group_data in results.groupby(["BaselineID", "group"]):
        if len(group_data) > 1:
            # Convert the points to LineString
            line = LineString(list(group_data.geometry))
            lines.append(line)
            # Convert the points to a closed Polygon
            polygon = Polygon(list(group_data.geometry) + list(group_data.ocean_point)[::-1])
            polygons.append(polygon)
    lines = gpd.GeoSeries(lines, crs=2193)
    polygons = gpd.GeoSeries(polygons, crs=2193)
    return lines, polygons

In [45]:
lines, poly = prediction_results_to_line_polygon(results)

In [None]:
#Saving line and polygon projection file to Z drive. Change file location accordingly  
lines, poly = prediction_results_to_line_polygon(results)
lines.to_file("Z:\Lalita\RNC Cont\......\BigBay_projection_output_lines.shp")
poly.to_file("Z:\Lalita\RNC Cont\.......\BigBay_projection_output_polygon.shp")

In [47]:
#Saving line and polygon projection file to folder in VS Code. Change file location accordingly
lines, poly = prediction_results_to_line_polygon(results)
lines.to_file("Projections\MoturoaRabbitIsland_projection_output_lines.shp")
poly.to_file("Projections\MoturoaRabbitIsland_projection_output_polygon.shp")

In [46]:
#Quick visualisation of projected polygon and historic shorelines 
m = poly.explore(tiles="Esri.WorldImagery")
gpd.GeoDataFrame(results.drop(columns=["ocean_point", "linear_model_point"]), geometry=results.linear_model_point).explore(m=m)
gdf.explore("Year", legend=True, m=m)