# Clipping a csv on the Seine River Basin

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
from shapely.geometry import Point

In [None]:
# Function to clip a csv on the Seine River Basin

def clipping(df, coord_x, coord_y, path_basin=None):

    if path_basin==None:
        polygon_df = gpd.read_file('/Users/mahlia/Desktop/SeineProject/bassin/bassin_outlet/Subcatchement_12.shx')
        
    else: polygon_df = gpd.read_file(path_basin)
    polygon_df = polygon_df.to_crs(epsg=2154)

    geometry = [Point(xy) for xy in zip(df[coord_x], df[coord_y])]
    crs = {'init': 'epsg:2154'}  # Assuming WGS84 coordinate system
    gdf = gpd.GeoDataFrame(df, crs=crs, geometry=geometry)

    clipped = gpd.sjoin(gdf, polygon_df, op='within')

    fig, ax = plt.subplots()
    polygon_df.plot(ax=ax, color='red')
    clipped.plot(ax=ax, color='blue', markersize=5)
    plt.show()

    return clipped

# Import needed files

In [None]:
# importing necessary functions from other files

import os
import sys

# to facilitate the use of notebooks
%load_ext autoreload
%autoreload 2

# Get the current directory of the notebook
current_dir = os.getcwd()
parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir, os.pardir))

In [None]:
# get path of file to clip then read it
# change needed path

file_dir = os.path.join(parent_dir, "biol_data", "stations", "metadata_biolstat.xlsx")
print(file_dir)

# chose format to read file

df = pd.read_csv(file_dir, delimiter=";")
df = pd.read_excel(file_dir, sheet_name=1)

In [None]:
# get polygon path then read it

polygon_dir = os.path.join(parent_dir, "Qgis", "bassin_outlet", "Subcatchement_12.shx")
print(polygon_dir)
polygon_df = gpd.read_file(polygon_dir)

In [None]:
# To get the name of coordinates fields
df.keys()

# Clipping

In [None]:
# change names of coordinates fields if needed

coord_x = 'coordonnee_x'
coord_y = 'coordonnee_y'

clipped_df = clipping(df, coord_x = coord_x, coord_y=coord_y, path_basin=None)

In [None]:
# Checking
clipped_df.shape, df.shape

# Export files

In [None]:
# Save to CSV
# change name
# clippedpy to remember it was clipped using python

name_clipped = 'stations_biol'

clipped_df.to_csv(f'{name_clipped}.csv', index=False, sep=";")

In [None]:
# Save to json file
# to facilitate visulatisation on Qgis

# clipped_df.to_file(f"{name_clipped}+.geojson", driver="GeoJSON", geometry="geometry")