In [None]:
### Imports

import pandas as pd
import numpy as np
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
### Create DataFrame

df = pd.read_excel('path_to_your_file')

In [None]:
# Calculate the porcentage of missiong data
# You should repeat this process for all columns that contain temperature data
# I had daily mean, max and minimum temperature columns, so i did this 3 times
# World Meteorological Organization recomends no more than 20% missing data
# However you shold consider the missing data pattern
# If you have data missing for long consecutive periods, you shold consider another method for filling in missing data because the one showed below is fragile in those scenarios
pct_missing = df['column you want to calculate missing data'].isna().mean() * 100
print(round(pct_missing,2))

In [None]:
# Filter the data by year to calculate your T95
startT95 = pd.to_datetime('your-start-date')
endT95 = pd.to_datetime('your-end-date')
# Define T95 based on the filter
dfT95 = df[(df['date'] >= startT95) & (df['date'] <= endT95)]
# Calculate T95
T95 = dfT95['DailyMeanTemperature'].quantile(0.95)

24.2

In [None]:
# Make sure your data is ordered
startDate = df['date'].min()
endDate = df['date'].max()
complete_index = pd.date_range(start=startDate, end=endDate)
df = df.set_index('date').reindex(complete_index).reset_index()
df['DailyMeanTemperature'] = df['DailyMeanTemperature'].fillna(method='ffill').fillna(method='bfill')
# Calculate the average temperature for the 3 day period and the 30 day period
df['TDP'] = df['DailyMeanTemperature'].shift(-2).rolling(window=3, min_periods=1).mean()
df['30DP'] = df['DailyMeanTemperature'].rolling(window=30, min_periods=1).mean()
# Keep in mind you should not use the 30 first days of your dataframe due to the calculation of the 30 day period
# Calculate EHIaccl
df['EHIaccl'] = df['TDP'] - df['30DP']
# Calculate EHIsigg
df['EHIsigg'] = df['TDP'] - T95
# Calculate EHF
df['EHF'] =  np.where(df['EHIaccl'] > 1, df['EHIsigg'] * df['EHIaccl'], df['EHIsigg'] * 1)
# Define the Heat Wave days
df['isHW'] = False
for i in range(len(df)):
    if df.loc[i, 'EHF'] > 0:
        df.loc[i:i+2, 'isHW'] = True

In [None]:
# Caculate EHF85
positiveEHFs = df[df['EHF'] > 0]
EHF85 = positiveEHFs['EHF'].quantile(0.85)
EHF85_3x = EHF85 * 3

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# Define the conditions and name possible heat wave intensities based on the EHF value
conditions = [
    (df['EHF'] <= 0),
    (df['EHF'] > 0) & (df['EHF'] < EHF85),
    (df['EHF'] >= EHF85) & (df['EHF'] < EHF85_3x),
    (df['EHF'] >= EHF85_3x)
]
intensities = ['Not HW','Low-Intensity','Severe', 'Extreme']
# Define the heat wave days's intensities
df['HW_Intensity'] = np.select(conditions, intensities)

# Calculate daily thermal range
df['thermalRange'] = df['maximum temperature column'] - df['minimum temperature column']

# Create a 'year' column to facilitate future calculations
df['year'] = df['index'].dt.year

In [None]:
# Calculate the number of heat wave days per year
HWDays = df.groupby('year')['isHW'].sum()
print(HWDays)

In [None]:
# Define a function to count the number of heat waves per year
def count_HW_periods(df):
    df['group'] = (df['isHW'] != df['isHW'].shift()).cumsum()
    HW_periods = df[df['isHW']].groupby(['year', 'group']).size().reset_index(name='count')
    HW_periods = HW_periods[HW_periods['count'] >= 3]
    result = HW_periods.groupby('year').size().reset_index(name='num_HW_periods')
    return result

# Count the number of heat waves per year
result = count_HW_periods(df)
print(result)

In [None]:
# Define a function to calculate the mean heat wave duration per year
def calculate_average_duration(df):
    # Identify the groups of heat wave periods
    df['group'] = (df['isHW'] != df['isHW'].shift()).cumsum()

    # Filter the heat wave periods
    HW_periods = df[df['isHW']].groupby(['year', 'group']).size().reset_index(name='count')

    # Filter only the periods with at least 3 days
    HW_periods = HW_periods[HW_periods['count'] >= 3]

    # Calculate the average heat wave duration per year
    average_duration = HW_periods.groupby('year')['count'].mean().reset_index(name='average_duration')

    return average_duration

# Print the average heat wave duration per year
average_duration = calculate_average_duration(df)
print(average_duration)

In [None]:
# Create new column for the mean thermal range (MTR) and assing initial value
df['MTR'] = 0

# Identify periods with consecutive true values for isHW
in_period = False
start_idx = 0

for i in range(len(df)):
    if df.loc[i, 'isHW']:
        if not in_period:
            in_period = True
            start_idx = i
    else:
        if in_period:
            in_period = False
            # Calculate the mean for the daily thermal range values in each heat wave
            end_idx = i
            mean_thermalRange = df.loc[start_idx:end_idx-1, 'thermalRange'].mean()
            # Fill in the MTR column with the calculated mean
            df.loc[start_idx:end_idx-1, 'MTR'] = mean_thermalRange

# Treat the last period in case it is the last dataframe entry
if in_period:
    mean_thermalRange = df.loc[start_idx:, 'thermalRange'].mean()
    df.loc[start_idx:, 'MTR'] = mean_thermalRange

In [1]:
# Create a function to calculate number of heatwaves per month across the dataframe
# This is intended to find out which are the monsths that heat waves happen most commonly
def count_heatWaves_by_month(df):
    # Identify groups
    df['group'] = (df['isHW'] != df['isHW'].shift()).cumsum()

    # Filter the heat waves
    Heat_Waves = df[df['isHW']].groupby(['group']).agg(
        start_date=('index', 'first'),
        end_date=('index', 'last')
    ).reset_index()

    # Extract the month of the start of the heat wave
    Heat_Waves['month'] = Heat_Waves['start_date'].dt.month

    # Count the number of heat waves per month across the years
    HW_by_month = Heat_Waves.groupby(['month']).size().reset_index(name='num_periods')

    return HW_by_month

# Executar a função
HW_by_month = count_heatWaves_by_month(df)
print(HW_by_month)

NameError: name 'df' is not defined