# Sample_Assign_Onshore

## This notebook shows how data was estimated as onshore or not without (expand on description)


In [None]:
import ogr, os
import pandas as pd
import geopandas as gpd
import shapefile
import json 
import ipywidgets
from cartopy.feature import NaturalEarthFeature
import cartopy.io.shapereader as shpreader

import numpy as np
#Dataset collection widget
zen=ipywidgets.Select(
    options=['Entire Dataset', 'Estimated Onshore Data', 'Verified Onshore Data', 'Verified Onshore Post 2012 Data'],
    value='Entire Dataset',
    # rows=10,
    description='Dataset:',
    disabled=False
)

display(zen)


In [None]:
url = 'https://zenodo.org/record/5874231/files/' 
if zen.value=='Entire Dataset':
    filename='dataset_10kmcoast.csv'
if zen.value=='Estimated Onshore Dataset':
    filename='Data_EstimatedOnshore.csv'
if zen.value=='Verified Onshore Dataset':
    filename='Data_VerifiedOnshore.csv'
if zen.value=='Verified Onshore Post 2012 Data':
    filename='Data_Post2012_VerifiedOnshore.csv'
    
url=(url+filename)
print('Retrieving Data, Please Wait')
#retrieve data
df=pd.read_csv(url)
print('Sediment Data Retrieved!') 
gdf=gpd.GeoDataFrame(df,geometry=gpd.points_from_xy(df.longitude, df.latitude))
gdf=gdf.set_crs(epsg=4326)
print('Sediment Data Converted to a GeoDataFrame, Next cell retrieves Natural Earth Data')



In [None]:
land = shpreader.natural_earth(resolution='10m',category='physical', name='land')
land=gpd.read_file(land)

print('Natural Earth Data retrieved and converted to a GeoDataFrame')

In [None]:
#Define Bounds for the East Coast to Clip US Coastal Poly
eastcoast=json.loads(("""
     {"type": "FeatureCollection", "features": [{
      "type": "Feature",
      "properties": {},
      "geometry": {
        "type": "Polygon",
        "coordinates": [
          [
            [
              -97.294921875,
              24.766784522874453
            ],
            [
              -94.130859375,
              22.51255695405145
            ],
            [
              -86.30859375,
              21.94304553343818
            ],
            [
              -80.244140625,
              16.130262012034756
            ],
            [
              -65.0390625,
              15.792253570362446
            ],
            [
              -66.610107421875,
              44.66083904265621
            ],
            [
              -66.7694091796875,
              44.8500274926005
            ],
            [
              -66.8902587890625,
              44.779885502772736
            ],
            [
              -67.5,
              47.69497434186282
            ],
            [
              -69.60937499999999,
              47.754097979680026
            ],
            [
              -71.630859375,
              45.521743896993634
            ],
            [
              -101.25,
              30.221101852485987
            ],
            [
              -97.294921875,
              24.766784522874453
            ]
          ]
        ]
      }
    }]}
"""))



eastcoast = gpd.GeoDataFrame.from_features(eastcoast) #convert east coast json to geoadataframe
eastcoast=eastcoast.set_crs(epsg=4326) 
eastcoast_bounds=gpd.clip(land, eastcoast) #clip natural earth data to east coat
eastcoast = eastcoast.reset_index(drop=True)
gdf = gdf.reset_index(drop=True)
gdf=gpd.clip(gdf, eastcoast_bounds)#clip sediment data to east coast bounded Natural Earth Data
df_os=pd.DataFrame(gdf) #convert geodataframe of onshore samples to pandas dataframe
df_os['Onshore']='y' #indicate onshore
df_os=df_os[['Unnamed: 0','Onshore']].copy() #isolate dataframe to just sample id and shore indication

df1=pd.merge(df,df_os, on='Unnamed: 0', how='left')#merge onshore indication with the overall dataset
df1['Onshore']=df1['Onshore'].fillna('n') #mark samples not indicated onshore with n



In [None]:
df1.to_csv('../data.csv') #convert data to CSV
