In [None]:
# Connect to Google Drive

from google.colab import drive

drive.mount('/content/drive')

# Explore in-situ sites

In [None]:
import os

import matplotlib.pyplot as plt
import pandas as pd

# Load the data from CSV
act_forest_sites_file_name = 'sites_latlon_dates_act_park_forest_corp.csv'
working_dir = "/content/drive/My Drive/Work/2025.04 ANU Bushfire"
df = pd.read_csv(os.path.join(working_dir, "output", "csv", act_forest_sites_file_name))
df

In [None]:
# Divide ACT Park data into two groups: row 9 and the rest

# Split the data into ACT Park and Forestry Corp
act_park = df[df['Source'] == 'ACT Park']
forestry_corp = df[df['Source'] == 'Forestry Corp']

# Create the plot
plt.figure(figsize=(12, 8))

# Plot ACT Park points in red
plt.scatter(act_park['Stn_long'], act_park['Stn_lat'], color='red', label='ACT Park', s=50)

# Plot Forestry Corp points in blue
plt.scatter(
    forestry_corp['Stn_long'], forestry_corp['Stn_lat'], color='blue', label='Forestry Corp', s=50
)

# Add labels and title
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title('Geographical Distribution of ACT Park and Forestry Corp Sites')
plt.legend()

# Optional: Adjust grid and layout
plt.grid(True, linestyle='--', alpha=0.7)
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
act_park['Stn_long'].max()

# veg_cover investigation

## Reading data

In [None]:
# Read site data

import os

import pandas as pd

file_name = "in-situ_topography_phd.csv"
working_dir = "/content/drive/My Drive/Work/2025.04 ANU Bushfire"

df = pd.read_csv(os.path.join(working_dir, "output", "csv", file_name))
df

In [None]:
# Load veg cover NetCFD

import xarray as xr

veg_cover_data_path = os.path.join(working_dir, "Data", "Vegetation_cover", "veg_cover_phd.nc")
veg_ds = xr.open_dataset(veg_cover_data_path)
print(
    "The actual crs of the veg_ds is EPSG:32754. The attribute was incorrectly saved when downloading the data."
)
veg_ds

## Investigation

In [None]:
# Investigate NaNs in veg_cover

df[df['veg_cover'].isna()]['SiteID'].unique()  # 14 sites

In [None]:
# Investigate whether all veg_cover in df for those sites are NaNs - Yes

df[df['SiteID'] == 92]['veg_cover'].unique()
df[df['SiteID'] == 284]['veg_cover'].unique()

In [None]:
# Investigate whether all veg_cover in veg_ds for those sites are NaNs - Yes

from pyproj import Transformer

X = df[df['SiteID'] == 156]['X'].unique()[0]
Y = df[df['SiteID'] == 156]['Y'].unique()[0]

transformer = Transformer.from_crs("EPSG:4326", "EPSG:32754", always_xy=True)
dea_x, dea_y = transformer.transform(X, Y)
df.head()

tmp = veg_ds.sel(x=dea_x, y=dea_y, method='nearest')
tmp['veg_cover'].values

In [None]:
# Investigate the number of non-NaN values for each site

# Load site df
site_file_name = "site_data_summary_phd.csv"
site_df = pd.read_csv(os.path.join(working_dir, "output", "csv", site_file_name))

# Make dea_x, dea_y columns
transformer = Transformer.from_crs("EPSG:4326", "EPSG:32754", always_xy=True)
site_df['dea_x'], site_df['dea_y'] = transformer.transform(site_df['X'].values, site_df['Y'].values)

# Use dea_x, dea_y columns to get veg_cover list and count non-NaNs


def count(row):
    x = row['dea_x']
    y = row['dea_y']
    point_series = veg_ds.sel(x=x, y=y, method='nearest')['veg_cover']
    import numpy as np

    return np.count_nonzero(~np.isnan(point_series))


site_df['non-NaN veg_cover count'] = site_df.apply(count, axis=1)
site_df

In [None]:
site_df[site_df['non-NaN veg_cover count'] < 100]

In [None]:
# Investigate site 251 - only 5 pv+npv left after cloud filtering

x, y = site_df.loc[site_df['SiteID'] == 251, ['dea_x', 'dea_y']].values[0]
point_series = veg_ds.sel(x=x, y=y, method='nearest')['veg_cover']
point_series.values

In [None]:
# Investigate pv of site 74 before cloud filtering
# 92, 149, 74, 284, 167, 176, 249, 271, 155, 156, 195, 170, 184, 274

import xarray as xr

veg_cover_with_cloud_data_path = os.path.join(
    working_dir, "Data", "Vegetation_cover", "DEA_Fractional_cover_PV_phd_tight_time_coverage.nc"
)
veg_cloud_ds = xr.open_dataset(veg_cover_with_cloud_data_path)

x, y = site_df.loc[site_df['SiteID'] == 249, ['dea_x', 'dea_y']].values[0]
point_series = veg_cloud_ds.sel(x=x, y=y, method='nearest')['pv']
point_series.values