In [None]:
working_dir = "/content/drive/My Drive/Work/2025.04 ANU Bushfire"
download_barra2_data = False  # around 30 min
explore_barra2_data = False

In [None]:
# Connect to Google Drive

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Loading in-situ and remote data


In [None]:
# Load in-situ_topography.csv as the main df

import pandas as pd
import os

df = pd.read_csv(os.path.join(working_dir, "output/csv/in-situ_topography.csv"))
df['Datetime'] = pd.to_datetime(df['Datetime'])

first_datetime, last_datetime = min(df['Datetime']), (max(df['Datetime']) + pd.Timedelta(days=1))
print("First date: ", first_datetime.strftime("%Y%m%d"), ", last date: ", last_datetime.strftime("%Y%m%d"))

df.head()

First date:  20181221 , last date:  20201107


Unnamed: 0.1,Unnamed: 0,SiteID,X,Y,Datetime,Temperature,RH,VPD,slope,aspect,relief
0,0,79,150.2953,-35.48254,2019-01-20 04:00:00,17.941,100.5,-0.010282,7.76997,187.29736,27.341133
1,1,79,150.2953,-35.48254,2019-01-20 05:00:00,17.753,100.867,-0.017619,7.76997,187.29736,27.341133
2,2,79,150.2953,-35.48254,2019-01-20 06:00:00,17.878,101.233,-0.025254,7.76997,187.29736,27.341133
3,3,79,150.2953,-35.48254,2019-01-20 07:00:00,18.066,100.377,-0.007814,7.76997,187.29736,27.341133
4,4,79,150.2953,-35.48254,2019-01-20 08:00:00,18.379,100.867,-0.018326,7.76997,187.29736,27.341133


## Downloading BARRA2 data

In [None]:
# Find barra2 grid cell coordinates
import xarray as xr

url = "https://thredds.nci.org.au/thredds/fileServer/ob53/output/reanalysis/AUST-04/BOM/ERA5/historical/hres/BARRA-C2/v1/1hr/tas/latest/tas_AUST-04_ERA5_historical_hres_BOM_BARRA-C2_v1_1hr_201812-201812.nc"
ds = xr.open_dataset(url)

lats = ds['lat'].values
lons = ds['lon'].values
print("lats: ", lats)
print("lons: ", lons)

lats:  [-45.69 -45.65 -45.61 ...  -5.09  -5.05  -5.01]
lons:  [108.02 108.06 108.1  ... 159.82 159.86 159.9 ]


In [None]:
# Make a list of locations of all sites and reduce it to barra2 grid cell locations

import numpy as np

def find_nearest_grid_point(x, y, lon_grid, lat_grid):
    nearest_lon = lon_grid[np.abs(lon_grid - x).argmin()]
    nearest_lat = lat_grid[np.abs(lat_grid - y).argmin()]
    return nearest_lon, nearest_lat

df[['barra2_X', 'barra2_Y']] = df.apply(
    lambda row: pd.Series(find_nearest_grid_point(row['X'], row['Y'], lons, lats)),
    axis=1
)
barra2_cell_locations_list = list(set((x, y) for x, y in df[['barra2_X', 'barra2_Y']].values))
print("barra2_cell_locations_list length: ", len(barra2_cell_locations_list))
print("barra2_cell_locations_list: ", [(str(x), str(y)) for x, y in barra2_cell_locations_list])

barra2_cell_locations_list length:  22
barra2_cell_locations_list:  [('150.1', '-35.53'), ('150.14', '-35.57'), ('150.18', '-35.73'), ('150.34', '-35.49'), ('150.3', '-35.45'), ('150.14', '-35.77'), ('150.1', '-35.57'), ('150.26', '-35.57'), ('150.3', '-35.49'), ('150.26', '-35.61'), ('150.02', '-36.13'), ('150.06', '-36.13'), ('150.1', '-35.81'), ('150.18', '-35.77'), ('150.06', '-36.17'), ('150.14', '-35.73'), ('150.22', '-35.65'), ('150.18', '-35.65'), ('150.38', '-35.45'), ('150.22', '-35.69'), ('150.34', '-35.45'), ('150.26', '-35.53')]


In [None]:
 # Use Python loop and curl to download all barra2 data

import urllib
import calendar
from datetime import datetime, timedelta

if download_barra2_data:
    barra2_data_dir = os.path.join(working_dir, "Data/barra2")
    os.makedirs(barra2_data_dir, exist_ok=True)

    for i, (x, y) in enumerate(barra2_cell_locations_list):
        print(f"Downloading data for barra2 cell coordinate {i + 1} of {len(barra2_cell_locations_list)}")
        current_dt = first_datetime.replace(day=1)

        while current_dt <= last_datetime:
            year = current_dt.year
            month = current_dt.month
            yyyymm = f"{year}{month:02d}"

            # Determine time_start and time_end for this month
            month_start = current_dt
            last_day = calendar.monthrange(year, month)[1]
            month_end = datetime(year, month, last_day)

            raw_start = month_start.strftime('%Y-%m-%dT00:00:00Z')
            raw_end = month_end.strftime('%Y-%m-%dT23:00:00Z')
            time_start = urllib.parse.quote(raw_start, safe='')
            time_end = urllib.parse.quote(raw_end, safe='')

            # Download temperature and relative humidity data
            for var in ['tas', 'hurs']:
                url = f"https://thredds.nci.org.au/thredds/ncss/grid/ob53/output/reanalysis/AUST-04/BOM/ERA5/historical/hres/BARRA-C2/v1/1hr/{var}/latest/{var}_AUST-04_ERA5_historical_hres_BOM_BARRA-C2_v1_1hr_{yyyymm}-{yyyymm}.nc?var={var}&latitude={y}&longitude={x}&time_start={time_start}&time_end={time_end}&timeStride=&vertCoord=&accept=csv"
                output_file_path = os.path.join(barra2_data_dir, f"barra2_data_{var}_{x}_{y}_{yyyymm}.csv")
                print(f"Downloading {output_file_path} from {url}")
                !curl -s -L "{url}" -o "{output_file_path}" -C -

            # Go to next month
            if month == 12:
                current_dt = datetime(year + 1, 1, 1)
            else:
                current_dt = datetime(year, month + 1, 1)

In [None]:
# prompt: Find a file with 'tas' in os.path.join(working_dir, "data/barra2")

import os

working_dir = "/content/drive/My Drive/Work/2025.04 ANU Bushfire"
data_dir = os.path.join(working_dir, "Data/barra2")

for filename in os.listdir(data_dir):
    if "tas" in filename:
        print(os.path.join(data_dir, filename))


/content/drive/My Drive/Work/2025.04 ANU Bushfire/Data/barra2/barra2_data_tas_150.1_-35.53_201812.csv
/content/drive/My Drive/Work/2025.04 ANU Bushfire/Data/barra2/barra2_data_tas_150.1_-35.53_201901.csv
/content/drive/My Drive/Work/2025.04 ANU Bushfire/Data/barra2/barra2_data_tas_150.1_-35.53_201902.csv
/content/drive/My Drive/Work/2025.04 ANU Bushfire/Data/barra2/barra2_data_tas_150.1_-35.53_201903.csv
/content/drive/My Drive/Work/2025.04 ANU Bushfire/Data/barra2/barra2_data_tas_150.1_-35.53_201904.csv
/content/drive/My Drive/Work/2025.04 ANU Bushfire/Data/barra2/barra2_data_tas_150.1_-35.53_201905.csv
/content/drive/My Drive/Work/2025.04 ANU Bushfire/Data/barra2/barra2_data_tas_150.1_-35.53_201906.csv
/content/drive/My Drive/Work/2025.04 ANU Bushfire/Data/barra2/barra2_data_tas_150.1_-35.53_201907.csv
/content/drive/My Drive/Work/2025.04 ANU Bushfire/Data/barra2/barra2_data_tas_150.1_-35.53_201908.csv
/content/drive/My Drive/Work/2025.04 ANU Bushfire/Data/barra2/barra2_data_tas_150.

In [None]:
# BARRA2 data exploration

barra2_data_dir = os.path.join(working_dir, "Data/barra2")
barra2_df = pd.read_csv(os.path.join(barra2_data_dir, os.listdir(barra2_data_dir)[0]))
barra2_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 744 entries, 0 to 743
Data columns (total 5 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   time                            744 non-null    object 
 1   station                         744 non-null    object 
 2   latitude[unit="degrees_north"]  744 non-null    float64
 3   longitude[unit="degrees_east"]  744 non-null    float64
 4   tas[unit="K"]                   744 non-null    float64
dtypes: float64(3), object(2)
memory usage: 29.2+ KB


In [None]:
barra2_df.head()

Unnamed: 0,time,station,"latitude[unit=""degrees_north""]","longitude[unit=""degrees_east""]","tas[unit=""K""]"
0,2018-12-01T00:00:00Z,GridPointRequestedAt[35.530S_150.100E],-35.53,150.1,296.9375
1,2018-12-01T01:00:00Z,GridPointRequestedAt[35.530S_150.100E],-35.53,150.1,299.84375
2,2018-12-01T02:00:00Z,GridPointRequestedAt[35.530S_150.100E],-35.53,150.1,301.421875
3,2018-12-01T03:00:00Z,GridPointRequestedAt[35.530S_150.100E],-35.53,150.1,299.953125
4,2018-12-01T04:00:00Z,GridPointRequestedAt[35.530S_150.100E],-35.53,150.1,297.5625


In [None]:
# BARRA2 data exploration

import xarray as xr

if explore_barra2_data:
    tas_url = "https://thredds.nci.org.au/thredds/fileServer/ob53/output/reanalysis/AUST-04/BOM/ERA5/historical/hres/BARRA-C2/v1/1hr/tas/latest/tas_AUST-04_ERA5_historical_hres_BOM_BARRA-C2_v1_1hr_201812-201812.nc"
    hurs_url = "https://thredds.nci.org.au/thredds/fileServer/ob53/output/reanalysis/AUST-04/BOM/ERA5/historical/hres/BARRA-C2/v1/1hr/hurs/latest/hurs_AUST-04_ERA5_historical_hres_BOM_BARRA-C2_v1_1hr_201812-201812.nc"
    !curl -L {tas_url} -o "barra2_tas.nc"
    !curl -L {hurs_url} -o "barra2_hurs.nc"

    tas_ds = xr.open_dataset("barra2_tas.nc")
    hurs_ds = xr.open_dataset("barra2_hurs.nc")
    tas_ds

In [None]:
if explore_barra2_data:
    hurs_ds

# Combining in-situ and remote data into a single dataframe

In [None]:
# Filter out in-situ observation that is not HH.00.00 and generate UTC_Datetime

from pytz import timezone

df = df[df['Datetime'].dt.minute == 0]
df = df[df['Datetime'].dt.second == 0]

aus_tz = timezone('Australia/Sydney')
# aest_time = aus_tz.localize(pd.Timestamp('2019-04-07 02:00:00'), is_dst=False)  # Did not work
df['Datetime'] = df['Datetime'].dt.tz_localize(aus_tz, ambiguous='NaT', nonexistent='NaT')
df = df[~df['Datetime'].isna()]
df['UTC_Datetime'] = df['Datetime'].dt.tz_convert('UTC')

df.head()

# # (NOT NEEDED) Generate mean_hourly_df
# df['Hourly_Datetime'] = df['Datetime'].dt.floor('H')
# df = (
#     df.groupby(['SiteID', 'Hourly_Datetime'])['Temperature']
#     .apply(lambda x: ', '.join(sorted(x.dt.strftime('%Y-%m-%d').unique())))  # TODO
#     .reset_index()
# )
# df

Unnamed: 0.1,Unnamed: 0,SiteID,X,Y,Datetime,Temperature,RH,VPD,slope,aspect,relief,barra2_X,barra2_Y,UTC_Datetime
0,0,79,150.2953,-35.48254,2019-01-20 04:00:00+11:00,17.941,100.5,-0.010282,7.76997,187.29736,27.341133,150.3,-35.49,2019-01-19 17:00:00+00:00
1,1,79,150.2953,-35.48254,2019-01-20 05:00:00+11:00,17.753,100.867,-0.017619,7.76997,187.29736,27.341133,150.3,-35.49,2019-01-19 18:00:00+00:00
2,2,79,150.2953,-35.48254,2019-01-20 06:00:00+11:00,17.878,101.233,-0.025254,7.76997,187.29736,27.341133,150.3,-35.49,2019-01-19 19:00:00+00:00
3,3,79,150.2953,-35.48254,2019-01-20 07:00:00+11:00,18.066,100.377,-0.007814,7.76997,187.29736,27.341133,150.3,-35.49,2019-01-19 20:00:00+00:00
4,4,79,150.2953,-35.48254,2019-01-20 08:00:00+11:00,18.379,100.867,-0.018326,7.76997,187.29736,27.341133,150.3,-35.49,2019-01-19 21:00:00+00:00


In [None]:
# For each row, open barra2 data csv file one-by-one to get data

from scipy.constants import convert_temperature

def get_barra2_value(row, var):
    yyyymm = row['Datetime'].strftime('%Y%m')
    file_path = os.path.join(barra2_data_dir, f"barra2_data_{var}_{row['barra2_X']}_{row['barra2_Y']}_{yyyymm}.csv")
    df_barra2 = pd.read_csv(file_path)

    target_time = row['Datetime'].strftime('%Y-%m-%dT%H:%M:%SZ')
    column_name = df_barra2.columns[df_barra2.columns.str.contains(var)]
    barra2_value = df_barra2.loc[df_barra2['time'] == target_time, column_name].values[0][0]
    if var == 'tas':
        barra2_value = convert_temperature(barra2_value, 'Kelvin', 'Celsius')

    return round(barra2_value, 3)

df['barra2_Temperature'] = df.apply(lambda row: get_barra2_value(row, 'tas'), axis=1)
df['barra2_RH'] = df.apply(lambda row: get_barra2_value(row, 'hurs'), axis=1)
df.head()

Unnamed: 0.1,Unnamed: 0,SiteID,X,Y,Datetime,Temperature,RH,VPD,slope,aspect,relief,barra2_X,barra2_Y,UTC_Datetime,barra2_Temperature,barra2_RH
0,0,79,150.2953,-35.48254,2019-01-20 04:00:00+11:00,17.941,100.5,-0.010282,7.76997,187.29736,27.341133,150.3,-35.49,2019-01-19 17:00:00+00:00,25.084,61.084
1,1,79,150.2953,-35.48254,2019-01-20 05:00:00+11:00,17.753,100.867,-0.017619,7.76997,187.29736,27.341133,150.3,-35.49,2019-01-19 18:00:00+00:00,24.553,62.619
2,2,79,150.2953,-35.48254,2019-01-20 06:00:00+11:00,17.878,101.233,-0.025254,7.76997,187.29736,27.341133,150.3,-35.49,2019-01-19 19:00:00+00:00,23.553,67.526
3,3,79,150.2953,-35.48254,2019-01-20 07:00:00+11:00,18.066,100.377,-0.007814,7.76997,187.29736,27.341133,150.3,-35.49,2019-01-19 20:00:00+00:00,22.163,74.565
4,4,79,150.2953,-35.48254,2019-01-20 08:00:00+11:00,18.379,100.867,-0.018326,7.76997,187.29736,27.341133,150.3,-35.49,2019-01-19 21:00:00+00:00,21.584,77.59


In [None]:
# Investigate null values.
df[df.isna().any(axis=1)]

Unnamed: 0.1,Unnamed: 0,SiteID,X,Y,Datetime,Temperature,RH,VPD,slope,aspect,relief,barra2_X,barra2_Y,UTC_Datetime,barra2_Temperature,barra2_RH


# Calculating remote VPD from remote temperature and remote relative humidity

In [None]:
# Write a function for deriving VPD from temp and RH

import math
import numpy as np

def calculate_vpd(temp, rh):
    if pd.isna(temp) or pd.isna(rh):
        return np.nan
    es = 0.6108 * math.exp(17.27 * temp / (237.3 + temp))
    e = es * rh / 100
    vpd = es - e
    return vpd

df['barra2_VPD'] = df.apply(lambda row: calculate_vpd(row['barra2_Temperature'], row['barra2_RH']), axis=1)
df.head()

Unnamed: 0.1,Unnamed: 0,SiteID,X,Y,Datetime,Temperature,RH,VPD,slope,aspect,relief,barra2_X,barra2_Y,UTC_Datetime,barra2_Temperature,barra2_RH,barra2_VPD
0,0,79,150.2953,-35.48254,2019-01-20 04:00:00+11:00,17.941,100.5,-0.010282,7.76997,187.29736,27.341133,150.3,-35.49,2019-01-19 17:00:00+00:00,25.084,61.084,1.238954
1,1,79,150.2953,-35.48254,2019-01-20 05:00:00+11:00,17.753,100.867,-0.017619,7.76997,187.29736,27.341133,150.3,-35.49,2019-01-19 18:00:00+00:00,24.553,62.619,1.152982
2,2,79,150.2953,-35.48254,2019-01-20 06:00:00+11:00,17.878,101.233,-0.025254,7.76997,187.29736,27.341133,150.3,-35.49,2019-01-19 19:00:00+00:00,23.553,67.526,0.943302
3,3,79,150.2953,-35.48254,2019-01-20 07:00:00+11:00,18.066,100.377,-0.007814,7.76997,187.29736,27.341133,150.3,-35.49,2019-01-19 20:00:00+00:00,22.163,74.565,0.679194
4,4,79,150.2953,-35.48254,2019-01-20 08:00:00+11:00,18.379,100.867,-0.018326,7.76997,187.29736,27.341133,150.3,-35.49,2019-01-19 21:00:00+00:00,21.584,77.59,0.577647


# Save the resulting dataframes

In [None]:
df.to_csv(os.path.join(working_dir, "output/csv/barra2.csv"), index=False)