## Rainfall events


classify rain events, each event can be defined as:
1. The event starts when the rain gauge is not zero
2. The event ends when the rain gauge is zero
3. The event has to be at least 30 minutes long
5. The event has to have at least 2 mm of rain in total
6. The event is 'relaxed' in case of zero gaps of 15 minutes

- there is a standard for this

In [None]:
def classify_rain_events(rain_series):
    # Thresholds
    min_event_duration = 30  # in minutes
    min_event_rainfall = 5  # in mm
    max_zero_gap = 15  # in minutes for relaxed events
    
    # Convert rain to a DataFrame with time as the index
    rain_data = rain_series.reset_index()  # Assumes 'time' is in the index if needed
    rain_data.columns = ['time', 'rain']
    
    # Ensure time is in datetime format if not already
    rain_data['time'] = pd.to_datetime(rain_data['time'])
    
    # Initialize variables for tracking events
    events = []
    current_event = []
    zero_streak = 0
    
    for i, row in tqdm(rain_data.iterrows(), total=len(rain_data), desc='Classifying rain events'):
        # Check if it's raining
        if row['rain'] > 0:
            if not current_event:
                current_event_start_time = row['time']
            current_event.append(row)
            zero_streak = 0  # Reset zero streak since it's raining
        elif row['rain'] == 0 and current_event:
            zero_streak += 1
            if zero_streak <= max_zero_gap:
                current_event.append(row)
            else:
                # End event before the zero streak starts
                event_end_time = current_event[-(zero_streak)].get('time')
                
                # Convert current_event to DataFrame
                event_df = pd.DataFrame(current_event)
                event_duration = (event_df['time'].max() - event_df['time'].min()).total_seconds() / 60.0
                total_rain = event_df['rain'].sum()
                
                if event_duration >= min_event_duration and total_rain >= min_event_rainfall:
                    events.append({
                        'start': current_event_start_time,
                        'end': event_end_time,
                        'duration': event_duration,
                        'total_rain': total_rain
                    })
                
                # Reset current event and zero streak
                current_event = []
                zero_streak = 0
    
    # After looping, check if there’s a valid ongoing event
    if current_event:
        event_df = pd.DataFrame(current_event)
        event_duration = (event_df['time'].max() - event_df['time'].min()).total_seconds() / 60.0
        total_rain = event_df['rain'].sum()
        if event_duration >= min_event_duration and total_rain >= min_event_rainfall:
            events.append({
                'start': current_event_start_time,
                'end': event_df['time'].max(),
                'duration': event_duration,
                'total_rain': total_rain
            })
        
    # create an intensity column
    events = pd.DataFrame(events)
    events['intensity'] = events['total_rain'] / events['duration']
    
    return events


In [None]:

def classify_rain_events(rain_series):
    # Thresholds
    min_event_duration = 30  # in minutes
    min_event_rainfall = 5  # in mm
    max_zero_gap = 15  # in minutes for relaxed events
    
    # Convert rain to a DataFrame with time as the index (assuming your index is time-based)
    rain_data = rain_series.reset_index()  # Ensuring that we have a proper time index
    rain_data.columns = ['time', 'rain']
    
    # Initialize variables for tracking events
    events = [] # List to store the events
    current_event = [] # List to store the current ongoing event
    zero_streak = 0 # Counter for the number of zero values
    
    for i, row in tqdm(rain_data.iterrows(), total=len(rain_data), desc='Classifying rain events'):
        # Check if it's raining
        if row['rain'] > 0:
            # If there's no ongoing event, start a new one
            if not current_event:
                current_event_start_time = row['time']
            current_event.append(row)
            zero_streak = 0  # Reset zero streak since it's raining
            
        # Check if it's not raining
        elif row['rain'] == 0 and current_event:
            zero_streak += 1  # Increase zero streak
            if zero_streak <= max_zero_gap:
                # Still within the relaxed gap period, consider it part of the event
                current_event.append(row)
            else:
                # Zero streak exceeds allowed gap, event ends here, but take the before the zero streak
                event_end_time = rain_data.loc[i - zero_streak, 'time']
                
                # Create a DataFrame for the event and apply filtering
                event_df = pd.DataFrame(current_event)
                event_duration = (event_df['time'].max() - event_df['time'].min()).total_seconds() / 60.0  # in minutes
                total_rain = event_df['rain'].sum()
                
                # Check if the event meets the criteria
                if event_duration >= min_event_duration and total_rain >= min_event_rainfall:
                    events.append({
                        'start': current_event_start_time,
                        'end': event_end_time,
                        'duration': event_duration,
                        'total_rain': total_rain
                    })
                
                # Reset the current event tracking
                current_event = []
                zero_streak = 0

    # After looping, check if there's a valid ongoing event
    if current_event:
        event_df = pd.DataFrame(current_event)
        event_duration = (event_df['time'].max() - event_df['time'].min()).total_seconds() / 60.0
        total_rain = event_df['rain'].sum()
        if event_duration >= min_event_duration and total_rain >= min_event_rainfall:
            events.append({
                'start': current_event_start_time,
                'end': event_df['time'].max(),
                'duration': event_duration,
                'total_rain': total_rain
            })
    
    return pd.DataFrame(events)


In [None]:
# get the rain series
rain = raw['5425'] # unit is mm/minute
rain_events = classify_rain_events(rain)

Classifying rain events: 100%|██████████| 6117121/6117121 [01:38<00:00, 62368.62it/s]


In [None]:
# combine both 5425 and 5427?

## Rain events timeline

## Histograms

## Scatter plots