# Calibration of the Sumaré Radar using Rain Gauges

This notebook converts the RGB colors of radar images into rainfall estimates (mm/h), calibrated using co-located rain gauge data.

Authors: Noemi (Undergraduate Research), Vinícius Santos (MSc Student)
Advisor: Prof. Eduardo Bezerra (CEFET/RJ)

---

In [None]:
import os
import cv2
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import joblib

## 1. Initial setup

In [None]:
# Main paths
RADAR_DIR = './radar_sumare/2019/01/30'
PLUV_FILE = './pluviometros.csv'
MODEL_OUT = './rgb_to_rain_model.pkl'

# Approximate geographic bounding box of the Sumaré radar
LAT_MIN, LAT_MAX = -23.10, -22.80
LON_MIN, LON_MAX = -43.45, -43.05

## 2. Utility functions

In [None]:
def latlon_to_pixel(lat, lon, lat_min, lat_max, lon_min, lon_max, nrows, ncols):
    i = int((lat_max - lat) / (lat_max - lat_min) * nrows)
    j = int((lon - lon_min) / (lon_max - lon_min) * ncols)
    return i, j

def extract_rgb_from_radar(image_path, stations_df):
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    nrows, ncols, _ = img.shape
    rgb_values = []
    for _, row in stations_df.iterrows():
        i, j = latlon_to_pixel(row['lat'], row['lon'], LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, nrows, ncols)
        if 0 <= i < nrows and 0 <= j < ncols:
            rgb = img[i, j, :]
            rgb_values.append((rgb[0], rgb[1], rgb[2]))
        else:
            rgb_values.append((np.nan, np.nan, np.nan))
    return rgb_values

## 3. Load rain gauge data

In [None]:
pluv = pd.read_csv(PLUV_FILE, parse_dates=['datetime'])
pluv.head()

## 4. Extract RGB values at rain gauge locations

In [None]:
records = []
radar_images = sorted(glob.glob(os.path.join(RADAR_DIR, '*.png')))
for img_path in tqdm(radar_images, desc='Extracting radar colors'):
    ts_str = os.path.basename(img_path).replace('.png','')
    dt = pd.to_datetime(ts_str, format='%Y_%m_%d_%H_%M')
    sub = pluv.loc[pluv['datetime'] == dt].copy()
    if sub.empty:
        continue
    rgbs = extract_rgb_from_radar(img_path, sub)
    sub[['R','G','B']] = pd.DataFrame(rgbs, index=sub.index)
    records.append(sub)
df = pd.concat(records, ignore_index=True)
df.dropna(subset=['R','G','B'], inplace=True)
print(f'Total samples: {len(df)}')
df.head()

## 5. Fit model RGB → Rainfall (mm/h)

In [None]:
X = df[['R','G','B']].values
y = df['rain_mm_h'].values
model = RandomForestRegressor(n_estimators=200, max_depth=8, random_state=42)
model.fit(X, y)
y_pred = model.predict(X)
mae = mean_absolute_error(y, y_pred)
r2 = r2_score(y, y_pred)
print(f'MAE: {mae:.3f} mm/h, R2: {r2:.3f}')

## 6. Visualization of results

In [None]:
plt.figure(figsize=(6,6))
plt.scatter(y, y_pred, alpha=0.4, edgecolor='k')
plt.xlabel('Observed rainfall (mm/h)')
plt.ylabel('Estimated rainfall from radar (mm/h)')
plt.title('Calibration: RGB → Rainfall')
plt.grid(True)
plt.plot([0, max(y)], [0, max(y)], 'r--')
plt.show()

## 7. Save calibrated model

In [None]:
joblib.dump(model, MODEL_OUT)
print(f'Model saved to: {MODEL_OUT}')

## 8. Generate mean RGB → Rainfall table

In [None]:
table = (
    df.groupby(['R','G','B'])['rain_mm_h']
    .mean()
    .reset_index()
    .rename(columns={'rain_mm_h':'rain_est_mmph'})
)
table.to_csv('rgb_to_rain_table.csv', index=False)
print('RGB → rainfall table saved as rgb_to_rain_table.csv')
table.head()

## Deliverables
- An empirical RGB→rain model calibrated with real rain gauge data.
- A model (`rgb_to_rain_model.pkl`) to process new radar images.
- A table `rgb_to_rain_table.csv` that provides a discrete conversion lookup.