### **This script filters the NFIP claim dataset based on the year and if caused by a 100-year flood for the CONUS scale**

In [None]:
# Install geopandas if needed
!pip install geopandas

# Imports
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point
import matplotlib.pyplot as plt

# --- Step 1: Load Claim CSV Data ---
file_path = 'Data/claims.csv'
claim_df = pd.read_csv(file_path, delimiter=';')

# --- Step 2: Convert 'dateOfLoss' to datetime ---
claim_df['dateOfLoss'] = pd.to_datetime(claim_df['dateOfLoss'])  # Adjust column name if needed

# --- Step 3: Filter date range and pluvial events (causedBy100yr == 0) ---
start_date = pd.to_datetime('01/01/2005')
end_date = pd.to_datetime('12/31/2024')

# Handle both numeric and string 'causedBy100yr'
if claim_df['causedBy100yr'].dtype == object:
    mask_pluvial = claim_df['causedBy100yr'].str.contains('0', case=False, na=False)
else:
    mask_pluvial = claim_df['causedBy100yr'] == 0

filtered_df = claim_df[
    (claim_df['dateOfLoss'] >= start_date) &
    (claim_df['dateOfLoss'] <= end_date) &
    mask_pluvial
]

# --- Step 4: Read CONUS state boundaries ---
states = gpd.read_file('Data/tl_2024_us_state/tl_2024_us_state.shp')
exclude = ['AK', 'HI', 'PR', 'GU', 'VI', 'MP', 'AS']
conus_states = states[~states['STUSPS'].isin(exclude)]

# --- Step 5: Convert filtered claims to GeoDataFrame ---
geometry = [Point(xy) for xy in zip(filtered_df['longitude'], filtered_df['latitude'])]
claim_gdf = gpd.GeoDataFrame(filtered_df, geometry=geometry, crs='EPSG:4326')

# --- Step 6: Plot ---
fig, ax = plt.subplots(figsize=(12, 8))
conus_states.boundary.plot(ax=ax, edgecolor='black', linewidth=1)
claim_gdf.plot(ax=ax, color='blue', markersize=2, alpha=0.6, label='Filtered Claims')

plt.title("Claims in CONUS since 2005")
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()

# --- Step 7: Save filtered data to CSV (without geometry) ---
output_path = 'Data/Claim_CONUS_2005.csv'
claim_gdf.drop(columns='geometry').to_csv(output_path, index=False)
print(f"Filtered data saved to {output_path}")


**Local Storm Report (LSR) dataset for CONUS was downloaded via https://www.hydroshare.org/resource/6985905f08d24b2297788060373c42a4/**

### **LSR events are filtered only for flash flood**

In [None]:
import pandas as pd

# Load CSV file
file_path = 'Data/lsr_CONUS_2005_2024.csv'
lsr_df = pd.read_csv(file_path, on_bad_lines='skip')

# Filter the DataFrame for rows where the 'TYPETEXT' column contains 'FLASH FLOOD' or 'HEAVY RAIN'
lsr_pluvial_df = lsr_df[lsr_df['TYPETEXT'].str.contains('FLASH FLOOD', case=False, na=False)]

# Save the filtered DataFrame to a new CSV file
output_path = 'Data/lsr_pluvial_CONUS_2005.csv'
lsr_pluvial_df.to_csv(output_path, index=False)

print(f"Filtered data saved to {output_path}")
