In [None]:
import pandas as pd
import numpy as np

In [None]:
# calculate the rolling percentiles for each coordinate
def calculate_rolling_percentiles(file_path, output_file=None):
    # Load the data
    data = pd.read_csv(file_path)

    # Filter out temperature larger than 15℃
    data = data[data['tas'] < 288.15]

    # Create a datetime column from year, month, and day
    data['time'] = pd.to_datetime(data[['year', 'month', 'day']])

    # Sort by latitude, longitude, and time for consistency
    data.sort_values(by=['latitude', 'longitude', 'time'], inplace=True)

    # Extract day of the year for grouping
    data['DayOfYear'] = data['time'].dt.day_of_year
    
    # Perform a grouped rolling calculation
    # Group by latitude, longitude, and day of the year
    grouped = data.groupby(['latitude', 'longitude', 'DayOfYear'])
    
    # Calculate the 10th percentile within the rolling window centered
    data['PercentileQ10'] = grouped['tas'].transform(lambda x: x.rolling(window=7, center=True, min_periods=1).quantile(0.1))
      
    # Save or return the results
    if output_file:
        data.to_csv(output_file, index=False)
    else:
        return data

In [None]:
# identify cold wave by if the cold days are consecutive
def identify_cold_wave_events(file_path):
    # Load the data
    data = pd.read_csv(file_path, parse_dates=['time'])

    # Sort data by latitude, longitude, and time to ensure chronological order
    data.sort_values(by=['latitude', 'longitude', 'time'], inplace=True)

    # Group by latitude and longitude
    def check_continuous_cold_wave(group):
        # Calculate the difference between consecutive days
        group['date_diff'] = group['time'].diff().dt.days

        # Identify the start of new cold wave sequences
        group['new_cw_sequence'] = (group['date_diff'] > 1) | (group['date_diff'].isna())
        
        # Mark sequences of cold wave days
        group['cw_sequence'] = group['new_cw_sequence'].cumsum()

        # Count the number of days in each cold wave sequence
        cw_counts = group.groupby('cw_sequence')['IsColdWave'].transform('sum')

        # Determine valid cold wave events (sequences of at least two days)
        group['IsColdWaveEvent'] = (group['IsColdWave'] & (cw_counts >= 2))

        # Clean up temporary columns
        group.drop(['date_diff', 'new_cw_sequence', 'cw_sequence'], axis=1, inplace=True)
        return group

    # Apply the function to each group defined by unique lat and lon
    data = data.groupby(['latitude', 'longitude'],group_keys=True).apply(check_continuous_cold_wave)

    return data

In [None]:
# identify future cold wave

def mark_and_check_cold_wave_days(group):
    # Ensure 'time' is in datetime format and sort the group by 'time'
    group['time'] = pd.to_datetime(group['time'])
    group = group.sort_values('time').reset_index(drop=True)
    
    # Initialize 'cold_wave_valid' to False for all entries in the group
    group['cold_wave_valid'] = False
    
    # Determine potential cold wave days
    group['IsColdWave'] = (group['tas'] <= group['Threshold']).astype(int)
    
    # Identify changes in 'IsColdWave' to mark sequences
    group['IsColdWaveChange'] = group['IsColdWave'].ne(group['IsColdWave'].shift()).cumsum()
    
    # Group by 'IsColdWaveChange' to identify sequences
    for seq_id, seq_group in group.groupby('IsColdWaveChange'):
        is_cold_wave = seq_group['IsColdWave'].iloc[0]
        sequence_length = len(seq_group)
        
        if is_cold_wave == 1 and sequence_length >= 2:
            # Check if dates are consecutive
            date_diffs = seq_group['time'].diff().dt.days.fillna(1)
            if date_diffs.eq(1).all():
                group.loc[seq_group.index, 'cold_wave_valid'] = True
                
    # Clean up temporary columns
    group.drop(['IsColdWaveChange'], axis=1, inplace=True)
    
    return group