In [None]:
import pandas as pd
import numpy as np
import os

# =======================
# USER SETTINGS
# =======================
input_file = r"D:\Saipriya\Work_SatishSir\GHMC_Rainfall\GHMC hourly data 2025.csv"
output_folder = "GHMC_rainfall_analysis_outputs/"
os.makedirs(output_folder, exist_ok=True)

# =======================
# LOAD AND PREPROCESS DATA
# =======================
df = pd.read_csv(input_file)

# Clean column names
df.columns = df.columns.str.strip().str.replace('\n', ' ').str.replace(' ', '_')

if 'Hourly__Rainfall_(mm)' in df.columns:
    df.rename(columns={'Hourly__Rainfall_(mm)': 'Hourly_Rain'}, inplace=True)
if 'Day_Cumulative__Rainfall_(mm)' in df.columns:
    df.rename(columns={'Day_Cumulative__Rainfall_(mm)': 'Day_CumRain'}, inplace=True)

# Convert Date & Time
df['DateTime'] = pd.to_datetime(df['Date_&_Time'], format='%d-%m-%Y %H:%M')
df['Date'] = df['DateTime'].dt.date
df['Hour'] = df['DateTime'].dt.hour
df['Month'] = df['DateTime'].dt.month

# Ensure rainfall column numeric
df['Hourly_Rain'] = pd.to_numeric(df['Hourly_Rain'], errors='coerce').fillna(0)

# Select metadata columns (keep them for merging)
meta_cols = ['AWS_ID', 'District', 'Mandal', 'Location', 'Circle', 'Latitude', 'Longitude']

# =======================
# DAILY AGGREGATION
# =======================
daily = df.groupby(meta_cols + ['Date']).agg(
    Daily_Rainfall=('Hourly_Rain', 'sum'),
    Max_Hourly_Rain=('Hourly_Rain', 'max'),
    Min_Hourly_Rain=('Hourly_Rain', lambda x: x[x>0].min() if any(x>0) else 0),
    Hours_Rained=('Hourly_Rain', lambda x: (x>0).sum())
).reset_index()

# Daily Intensity (mm/hr)
daily['Daily_Intensity'] = daily.apply(
    lambda x: x['Daily_Rainfall']/x['Hours_Rained'] if x['Hours_Rained']>0 else 0, axis=1
)

# Rain flag
daily['RainFlag'] = (daily['Daily_Rainfall'] > 0).astype(int)

# Save daily results
daily.to_csv(os.path.join(output_folder, "daily_rainfall_summary.csv"), index=False)

# =======================
# HOURLY STATISTICS (mean pattern per hour)
# =======================
hourly = df.groupby(meta_cols + ['Hour']).agg(
    Mean_Hourly_Rain=('Hourly_Rain', 'mean'),
    Rainy_Hour_Intensity=('Hourly_Rain', lambda x: x[x>0].mean() if any(x>0) else 0),
    Rainy_Hour_Frequency=('Hourly_Rain', lambda x: (x>0).sum())
).reset_index()

hourly.to_csv(os.path.join(output_folder, "hourly_rainfall_summary.csv"), index=False)

# =======================
# EVENT DETECTION (continuous rain periods)
# =======================
df_sorted = df.sort_values(['AWS_ID', 'DateTime']).copy()
df_sorted['RainFlag'] = (df_sorted['Hourly_Rain'] > 0).astype(int)

# Identify rain event starts
df_sorted['EventStart'] = (df_sorted['RainFlag'].diff().fillna(0) == 1).astype(int)
df_sorted['EventID'] = (df_sorted['EventStart'].cumsum() * df_sorted['RainFlag']).astype(int)

# Aggregate event-level stats
events = df_sorted[df_sorted['EventID'] > 0].groupby(meta_cols + ['EventID']).agg(
    Start=('DateTime', 'min'),
    End=('DateTime', 'max'),
    Duration_hrs=('DateTime', lambda x: len(x)),
    Total_Rain=('Hourly_Rain', 'sum'),
    Max_Hourly=('Hourly_Rain', 'max')
).reset_index()

events['Average_Intensity'] = events['Total_Rain'] / events['Duration_hrs']
events.to_csv(os.path.join(output_folder, "rain_events_summary.csv"), index=False)

# =======================
# RAINY DAYS STATISTICS (per AWS)
# =======================
rainy_days = daily[daily['Daily_Rainfall'] > 0].groupby(meta_cols).agg(
    Total_Rainy_Days=('Date', 'count'),
    Mean_Daily_Rain=('Daily_Rainfall', 'mean'),
    Max_Daily_Rain=('Daily_Rainfall', 'max'),
    Mean_Intensity=('Daily_Intensity', 'mean')
).reset_index()

# Longest wet spell (max consecutive rainy days)
def longest_wet_spell(series):
    c = 0
    max_c = 0
    for val in series:
        if val == 1:
            c += 1
            max_c = max(max_c, c)
        else:
            c = 0
    return max_c

wetspell = daily.groupby(meta_cols)['RainFlag'].apply(longest_wet_spell).reset_index(name='Longest_Wet_Spell_days')
rainy_days = rainy_days.merge(wetspell, on=meta_cols, how='left')

rainy_days.to_csv(os.path.join(output_folder, "rainy_days_summary.csv"), index=False)

# =======================
# SUMMARY MESSAGE
# =======================
print(" Rainfall analysis completed successfully!")
print(f"Files generated in: {output_folder}")
print("""
Generated files:
1. daily_rainfall_summary.csv   → Daily totals, intensity, hours rained + spatial info
2. hourly_rainfall_summary.csv  → Hourly mean rainfall and intensity pattern + spatial info
3. rain_events_summary.csv      → Continuous rainfall event stats + spatial info
4. rainy_days_summary.csv       → Rainy days count, intensity, wet spell + spatial info
""")
