In [49]:

import pandas as pd
import numpy as np
import glob
import netCDF4 as nc
from shapely.geometry import LineString, Polygon
import geopandas as gpd

def process_file(nc_path, df_list):
    dataset = nc.Dataset(nc_path)
    
    # Extract time from global attributes
    start_date = getattr(dataset, "start_date", None)  
    start_time = getattr(dataset, "start_time", None)  
    
    # Construct ISO 8601 timestamp if attributes exist
    if start_date and start_time:
        time_str = f"{start_date}T{start_time}Z"
    else:
        time_str = None  # Fallback if missing
    
    # Extract variables
    lat = dataset.variables["lat"][:]
    lon = dataset.variables["lon"][:]
    lon = (lon + 180) % 360 - 180
    wind_speed = dataset.variables["wind_speed"][:]  # m/s
    wind_dir = dataset.variables["wind_dir"][:]  # Degrees
    
    # Convert wind speed from m/s to knots
    wind_speed_knots = wind_speed * 1.94384
    
    # Compute U and V wind components
    u_knots = -wind_speed_knots * np.sin(np.radians(wind_dir))
    v_knots = -wind_speed_knots * np.cos(np.radians(wind_dir))
    
    # Define subsampling step
    step = 5  
    lat_sub = lat[::step, ::step]
    lon_sub = lon[::step, ::step]
    u_sub_knots = u_knots[::step, ::step]
    v_sub_knots = v_knots[::step, ::step]
    speed_sub_knots = wind_speed_knots[::step, ::step]
    dir_sub = wind_dir[::step, ::step]  # Keep direction
    
    # Scale factor for vector length in visualization
    scale_factor = 0.05  
    
    for i in range(lon_sub.shape[0]):
        for j in range(lon_sub.shape[1]):
            lon_start = float(lon_sub[i, j])
            lat_start = float(lat_sub[i, j])
            u = float(u_sub_knots[i, j])
            v = float(v_sub_knots[i, j])
            wind_speed_kts = float(speed_sub_knots[i, j])
            wind_direction = float(dir_sub[i, j])
    
            # Handle NaN values by replacing with None (GeoJSON doesn't support NaN)
            lon_start = None if np.isnan(lon_start) else lon_start
            lat_start = None if np.isnan(lat_start) else lat_start
            u = None if np.isnan(u) else u
            v = None if np.isnan(v) else v
            wind_speed_kts = None if np.isnan(wind_speed_kts) else wind_speed_kts
            wind_direction = None if np.isnan(wind_direction) else wind_direction
    
            # Compute end point (scaled)
            lon_end = lon_start + (u * scale_factor) if lon_start is not None and u is not None else None
            lat_end = lat_start + (v * scale_factor) if lat_start is not None and v is not None else None
    
            # Only create a row if the geometry is valid (i.e., lon_end and lat_end are not None)
            if lon_end is not None and lat_end is not None:
                feature = {
                    "datetime": time_str,  # Add time_str as the datetime property
                    "lat_start": lat_start,
                    "lon_start": lon_start,
                    "lat_end": lat_end,
                    "lon_end": lon_end,
                    "wind_speed_knots": wind_speed_kts,
                    "wind_direction_deg": wind_direction,
                    "u_component_knots": u,
                    "v_component_knots": v
                }
                df_list.append(feature)

# Initialize an empty list to hold rows of the DataFrame
df_list = []
input_folder = "wind/input"

# Process all files and accumulate the features into the df_list
for nc_path in glob.glob(f"{input_folder}/*"):
    process_file(nc_path, df_list)

# Create a pandas DataFrame from the list of features
df = pd.DataFrame(df_list)

# Define AOI as a shapely Polygon
aoi_coords = [[-102.8148701375, 6.1943456775], [-13.3448605043, 6.1943456775], 
              [-13.3448605043, 49.6429910636], [-102.8148701375, 49.6429910636], 
              [-102.8148701375, 6.1943456775]]
aoi_polygon = Polygon(aoi_coords)
aoi_gdf = gpd.GeoDataFrame({'geometry': [aoi_polygon]}, crs="EPSG:4326")

# Create geometries (LineStrings) for each row
df['geometry'] = [LineString([(row['lon_start'], row['lat_start']), (row['lon_end'], row['lat_end'])]) for idx, row in df.iterrows()]
df = df.drop(columns=['lat_start','lon_start','lat_end','lon_end'])
# Convert to GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry='geometry', crs="EPSG:4326")

print(gdf.crs, aoi_gdf.crs)

clipped_gdf = gpd.clip(gdf, aoi_gdf)
print(clipped_gdf.shape, gdf.shape)

clipped_gdf.to_file("wind/wind_vectors_cyclone_beryl.geojson", driver='GeoJSON')





cannot be safely cast to variable data type
  lat = dataset.variables["lat"][:]
cannot be safely cast to variable data type
  lon = dataset.variables["lon"][:]
cannot be safely cast to variable data type
  wind_speed = dataset.variables["wind_speed"][:]  # m/s
cannot be safely cast to variable data type
  wind_dir = dataset.variables["wind_dir"][:]  # Degrees
  u = float(u_sub_knots[i, j])
  v = float(v_sub_knots[i, j])
  wind_speed_kts = float(speed_sub_knots[i, j])
  wind_direction = float(dir_sub[i, j])


EPSG:4326 EPSG:4326
(22783, 6) (263103, 6)
