# 03 - Precipitation Analysis (CHIRPS)

Analyze CHIRPS daily precipitation data for Delhi.

Dataset: `UCSB-CHG/CHIRPS/DAILY`

In [None]:
import ee
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

# Initialize GEE
GCP_PROJECT_ID = 'gen-lang-client-0669818939'
ee.Initialize(project=GCP_PROJECT_ID)

In [None]:
# Constants
CHIRPS_ID = 'UCSB-CHG/CHIRPS/DAILY'
DELHI_BOUNDS = [76.8, 28.4, 77.4, 28.9]
delhi_region = ee.Geometry.Rectangle(DELHI_BOUNDS)

## 1. Fetch Precipitation Time Series

In [None]:
def get_precipitation_timeseries(region, start_date, end_date):
    """Fetch daily precipitation for a region."""
    collection = ee.ImageCollection(CHIRPS_ID) \
        .filterDate(start_date, end_date) \
        .filterBounds(region)
    
    def extract_precip(image):
        date = image.date().format('YYYY-MM-dd')
        precip = image.reduceRegion(
            reducer=ee.Reducer.mean(),
            geometry=region,
            scale=5000  # CHIRPS native resolution
        ).get('precipitation')
        return ee.Feature(None, {'date': date, 'precipitation': precip})
    
    features = collection.map(extract_precip)
    data = features.getInfo()['features']
    
    records = []
    for feat in data:
        props = feat['properties']
        records.append({
            'date': pd.to_datetime(props['date']),
            'precipitation_mm': props.get('precipitation', 0) or 0
        })
    
    return pd.DataFrame(records).sort_values('date')

# Fetch 2023 monsoon data (June-September)
print('Fetching 2023 monsoon data...')
df_monsoon = get_precipitation_timeseries(delhi_region, '2023-06-01', '2023-09-30')
print(f'Records: {len(df_monsoon)}')
df_monsoon.head()

## 2. Visualize Daily Precipitation

In [None]:
plt.figure(figsize=(14, 5))
plt.bar(df_monsoon['date'], df_monsoon['precipitation_mm'], color='steelblue', alpha=0.7)
plt.axhline(y=df_monsoon['precipitation_mm'].mean(), color='red', linestyle='--', label=f"Mean: {df_monsoon['precipitation_mm'].mean():.1f}mm")
plt.title('Delhi Daily Precipitation - Monsoon 2023')
plt.xlabel('Date')
plt.ylabel('Precipitation (mm)')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('delhi_monsoon_precip_2023.png', dpi=150)
plt.show()

## 3. Compute Rolling Statistics

In [None]:
# Add rolling sums
df_monsoon['rolling_3d'] = df_monsoon['precipitation_mm'].rolling(window=3).sum()
df_monsoon['rolling_7d'] = df_monsoon['precipitation_mm'].rolling(window=7).sum()

# Plot
fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=True)

axes[0].plot(df_monsoon['date'], df_monsoon['rolling_3d'], label='3-day rolling', color='orange')
axes[0].plot(df_monsoon['date'], df_monsoon['rolling_7d'], label='7-day rolling', color='red')
axes[0].set_ylabel('Cumulative Precipitation (mm)')
axes[0].legend()
axes[0].set_title('Rolling Precipitation Sums')

# Identify heavy rain days
heavy_rain_threshold = 50  # mm
heavy_days = df_monsoon[df_monsoon['precipitation_mm'] > heavy_rain_threshold]

axes[1].bar(df_monsoon['date'], df_monsoon['precipitation_mm'], color='steelblue', alpha=0.5)
axes[1].scatter(heavy_days['date'], heavy_days['precipitation_mm'], color='red', s=50, label=f'Heavy rain (>{heavy_rain_threshold}mm)')
axes[1].set_ylabel('Daily Precipitation (mm)')
axes[1].set_xlabel('Date')
axes[1].legend()

plt.tight_layout()
plt.savefig('delhi_precip_analysis.png', dpi=150)
plt.show()

print(f'Heavy rain days: {len(heavy_days)}')
print(f'Max daily rainfall: {df_monsoon["precipitation_mm"].max():.1f}mm')

## 4. Monthly Aggregation

In [None]:
# Monthly totals
df_monsoon['month'] = df_monsoon['date'].dt.month
monthly = df_monsoon.groupby('month')['precipitation_mm'].agg(['sum', 'mean', 'max']).reset_index()
monthly['month_name'] = monthly['month'].map({6: 'June', 7: 'July', 8: 'August', 9: 'September'})

fig, ax = plt.subplots(figsize=(10, 5))
bars = ax.bar(monthly['month_name'], monthly['sum'], color='steelblue')
ax.set_ylabel('Total Precipitation (mm)')
ax.set_title('Monthly Precipitation - Delhi Monsoon 2023')

# Add value labels
for bar, val in zip(bars, monthly['sum']):
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 5, f'{val:.0f}mm', ha='center')

plt.tight_layout()
plt.savefig('delhi_monthly_precip.png', dpi=150)
plt.show()

print(monthly[['month_name', 'sum', 'mean', 'max']])

## 5. Feature Engineering Preview

In [None]:
def compute_rainfall_features(df, reference_idx):
    """Compute rainfall features for a specific day."""
    if reference_idx < 7:
        return None
    
    recent = df.iloc[reference_idx-7:reference_idx+1]
    
    return {
        'rainfall_24h': recent.iloc[-1]['precipitation_mm'],
        'rainfall_3d': recent.tail(3)['precipitation_mm'].sum(),
        'rainfall_7d': recent['precipitation_mm'].sum(),
        'max_daily_7d': recent['precipitation_mm'].max(),
        'wet_days_7d': (recent['precipitation_mm'] > 5).sum()
    }

# Example for a specific day
example_idx = 30  # ~July 1
features = compute_rainfall_features(df_monsoon.reset_index(drop=True), example_idx)
print(f"Rainfall features for {df_monsoon.iloc[example_idx]['date'].date()}:")
for k, v in features.items():
    print(f"  {k}: {v:.1f}")

## Summary

- CHIRPS provides daily precipitation at ~5km resolution
- Monsoon season (June-Sept) shows significant variation
- Rolling sums (3d, 7d) are useful features for flood prediction
- Heavy rain days (>50mm) correlate with flood events