In [1]:
%pip install geopandas shapely

Note: you may need to restart the kernel to use updated packages.


In [3]:
import geopandas as gpd

# Load the shapefile
mangrove_gdf = gpd.read_file('/media/mrugesh/58300FD9300FBCCE/Repos/hack-the-box-mangrove-project/ml-backend/data/gmw_v3_2020_vec.shp')

In [4]:
from shapely.geometry import Point

def is_in_mangrove_region(latitude, longitude, mangrove_gdf):
    """
    Checks if a given GPS coordinate is within a mangrove region.

    Args:
        latitude (float): The latitude of the point.
        longitude (float): The longitude of the point.
        mangrove_gdf (GeoDataFrame): The GeoDataFrame containing mangrove polygons.

    Returns:
        bool: True if the point is in a mangrove region, False otherwise.
    """
    # Create a Point object from the GPS coordinates
    point = Point(longitude, latitude)

    # Check if the point is contained within any of the mangrove polygons
    return mangrove_gdf.geometry.contains(point).any()

In [5]:
# Example GPS coordinates (replace with actual input)
gps_latitude = 21.9497
gps_longitude = 88.7995

# Check if the coordinates are in a mangrove region
is_mangrove = is_in_mangrove_region(gps_latitude, gps_longitude, mangrove_gdf)

# Output the result
if is_mangrove:
    print("Output: Mangrove region")
else:
    print("Output: Non-mangrove region")

Output: Mangrove region


In [6]:
%pip install geopandas pyarrow

Note: you may need to restart the kernel to use updated packages.


In [7]:
import geopandas
import os
from shapely.geometry import Point

# --- 1. Setup: Define paths relative to the current folder ---
# The output will be saved in a 'data' subfolder inside your project directory.
OUTPUT_FOLDER = 'data'
os.makedirs(OUTPUT_FOLDER, exist_ok=True) # Creates the 'data' folder if it doesn't exist
processed_file_path = os.path.join(OUTPUT_FOLDER, 'mangrove_data.parquet')

print(f"Will check for processed data at: {processed_file_path}")


# --- 2. Main Logic: Check if the processed file already exists ---
if os.path.exists(processed_file_path):
    # --- FAST PATH: Load the pre-saved data ---
    print("\nProcessed file found! Loading from local Parquet file...")
    mangrove_gdf = geopandas.read_parquet(processed_file_path)
    print("Loading complete.")

else:
    # --- SLOW PATH: Run this only the very first time ---
    print("\nProcessed file not found. Running the full data processing pipeline...")

    # --- TODO: REPLACE THIS SECTION WITH YOUR ACTUAL DATA LOADING ---
    print("   - (Example) Creating dummy raw data...")
    d = {'col1': ['mangrove_A', 'mangrove_B'], 'area_sqkm': [10.5, 22.1], 'geometry': [Point(72.8, 19.0), Point(73.0, 19.1)]}
    raw_gdf = geopandas.GeoDataFrame(d, crs="EPSG:4326")
    print("   - Raw data loaded.")

    print("   - (Example) Processing data...")
    mangrove_gdf = raw_gdf[raw_gdf['area_sqkm'] > 20].copy()
    print("   - Data processing complete.")
    # --- END OF SECTION TO REPLACE ---


    # 3. Save the final, processed GeoDataFrame for all future runs.
    print(f"   - Saving processed data to {processed_file_path}...")
    mangrove_gdf.to_parquet(processed_file_path)
    print("   - Save complete.")


# --- 3. Continue your analysis below ---
print("\n--- Analysis can now proceed ---")
print("Final GeoDataFrame Info:")
mangrove_gdf.info()
print("\nFirst 5 rows:")
print(mangrove_gdf.head())

Will check for processed data at: data/mangrove_data.parquet

Processed file not found. Running the full data processing pipeline...
   - (Example) Creating dummy raw data...
   - Raw data loaded.
   - (Example) Processing data...
   - Data processing complete.
   - Saving processed data to data/mangrove_data.parquet...
   - Save complete.

--- Analysis can now proceed ---
Final GeoDataFrame Info:
<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 1 entries, 1 to 1
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype   
---  ------     --------------  -----   
 0   col1       1 non-null      object  
 1   area_sqkm  1 non-null      float64 
 2   geometry   1 non-null      geometry
dtypes: float64(1), geometry(1), object(1)
memory usage: 32.0+ bytes

First 5 rows:
         col1  area_sqkm         geometry
1  mangrove_B       22.1  POINT (73 19.1)


In [8]:
%pip install "fastapi[all]" geopandas pyarrow

Note: you may need to restart the kernel to use updated packages.


In [9]:
import geopandas as gpd
import os
from shapely.geometry import Point

# --- 1. SETUP: Define your file paths ---
# IMPORTANT: Update this path to where your shapefile is located.
SHAPEFILE_PATH = '/media/mrugesh/58300FD9300FBCCE/Repos/hack-the-box-mangrove-project/ml-backend/data/gmw_v3_2020_vec.shp'

# Define a folder to store the fast-loading processed data.
OUTPUT_FOLDER = 'processed_data'
os.makedirs(OUTPUT_FOLDER, exist_ok=True)
PROCESSED_FILE_PATH = os.path.join(OUTPUT_FOLDER, 'mangrove_data.parquet')

# --- 2. DATA LOADING: Load fast file if available, otherwise process the slow one ---
if os.path.exists(PROCESSED_FILE_PATH):
    # FAST PATH: If we've processed this before, load the quick Parquet file.
    print(f"Loading from fast Parquet file: {PROCESSED_FILE_PATH}")
    mangrove_gdf = gpd.read_parquet(PROCESSED_FILE_PATH)
else:
    # SLOW PATH: First time running. Load the large shapefile.
    print(f"Processed file not found. Loading from shapefile: {SHAPEFILE_PATH}")
    mangrove_gdf = gpd.read_file(SHAPEFILE_PATH)
    
    # Optional: You can add any data cleaning or processing steps here.
    # For example, simplifying geometries or removing unnecessary columns.
    
    # Save the processed data to a Parquet file for future runs.
    print(f"Saving to Parquet file for faster loading next time: {PROCESSED_FILE_PATH}")
    mangrove_gdf.to_parquet(PROCESSED_FILE_PATH)

print("--- Mangrove data loaded successfully! ---")


# --- 3. ANALYSIS FUNCTION: (This is your function, it's already perfect) ---
def is_in_mangrove_region(latitude, longitude, gdf):
    """
    Checks if a given GPS coordinate is within a mangrove region.
    """
    point_to_check = Point(longitude, latitude)
    # Check if the point is contained within any of the mangrove polygons
    return gdf.geometry.contains(point_to_check).any()


# --- 4. EXAMPLE USAGE: Test a coordinate ---
# Coordinates for a known mangrove area (Sundarbans, India)
gps_latitude = 23.1568
gps_longitude = 72.6757

print(f"\nChecking coordinate: (Lat: {gps_latitude}, Lon: {gps_longitude})")

# Check if the coordinates are in a mangrove region
is_mangrove = is_in_mangrove_region(gps_latitude, gps_longitude, mangrove_gdf)

# Output the result
if is_mangrove:
    print("✅ Result: Mangrove region")
else:
    print("❌ Result: Non-mangrove region")

Processed file not found. Loading from shapefile: /media/mrugesh/58300FD9300FBCCE/Repos/hack-the-box-mangrove-project/ml-backend/data/gmw_v3_2020_vec.shp
Saving to Parquet file for faster loading next time: processed_data/mangrove_data.parquet
--- Mangrove data loaded successfully! ---

Checking coordinate: (Lat: 23.1568, Lon: 72.6757)
❌ Result: Non-mangrove region
