In [None]:
import pandas as pd
import numpy as np

# Load the dataset
data = pd.read_csv('NWCATotalV.csv')

# Define vegetation height categories (approximate heights in meters)
vegetation_heights = {
    "FLOATING_AQ": 1.0,   # Floating plants ~ 1m
    "SUBMERGED_AQ": 0.2,  # Submerged aquatic vegetation ~0.2m
    "VSMALL_VEG": 0.3,    # Very small vegetation < 0.5m
    "SMALL_VEG": 1.5,     # Small vegetation ~ 0.5m to 2m
    "MED_VEG": 3.5,       # Medium vegetation ~ 2m to 5m
    "HMED_VEG": 10,       # High-medium vegetation ~ 5m to 15m
    "TALL_VEG": 22,       # Tall vegetation ~ 15m to 30m
    "VTALL_VEG": 35       # Very tall vegetation > 30m
}

# Bimodal distribution parameters
mu1 = 1.0  # Centered around floating/small vegetation (~1m)
mu2 = 15.0  # Centered around tall vegetation (~15m)
sigma1 = 3  # Spread for first mode (floating/small vegetation)
sigma2 = 5  # Spread for second mode (tall vegetation)
weight_mode1 = 0.6  # Weight for first mode
weight_mode2 = 0.4  # Weight for second mode

# Compute weights based on two normal distributions (bimodal)
weights = {}
for key, value in vegetation_heights.items():
    weight1 = np.exp(-((value - mu1) ** 2) / (2 * sigma1 ** 2))
    weight2 = np.exp(-((value - mu2) ** 2) / (2 * sigma2 ** 2))
    # Combine both weights with their respective mode weights
    combined_weight = weight_mode1 * weight1 + weight_mode2 * weight2
    weights[key] = combined_weight

# Normalize weights so they sum to 1
total_weight = sum(weights.values())
weights = {key: value / total_weight for key, value in weights.items()}

# Ensure vegetation columns exist in the dataset
vegetation_columns = [col for col in weights.keys() if col in data.columns]

# Fill missing values with 0
data[vegetation_columns] = data[vegetation_columns].fillna(0)

# Normalize percentages (scale from 0 to 1)
data[vegetation_columns] = data[vegetation_columns] / 100

# Calculate WVR using weighted sum
data["WVR"] = sum(data[col] * weights[col] for col in vegetation_columns)

# Group by SITE_ID and average WVR if multiple entries per SITE_ID
wvr_by_site = data.groupby("SITE_ID", as_index=False)["WVR"].mean()

# Save the result to a new CSV file
custom_output_path = 'WVRTotal_BimodalDist_Corrected.csv'
wvr_by_site.to_csv(custom_output_path, index=False)

custom_output_path
