# Import

In [2]:
import numpy as np
import tensorflow as tf
import netCDF4 as nc
import os
import rasterio
import matplotlib.pyplot as plt
import rioxarray

### Helper functions

In [26]:
def latitude_converted(lat: float):
    """
    Convert latitude value to value between 0 and 720 (0.25 degree representation of latitude)
    :param lat: The actual latitude we want to covert to the 720 format
    """
    return (90 - lat) * 4

def longitude_converted(lon: float):
    """
    Convert longitude value to value between 0 and 1440 (0.25 degree representation of longitude)
    :param lon: The actual longitude we want to convert to the 1440 format 
    """
    return (-180 - lon) * -4

Function that loads a netCDF File

In [None]:
def load_netcdf_file(path: str):
    """
    Open a netCDF File, NOTE: After being done with the file don't forgoet to use nc_file.close() to close the file.
    :param path: The absolute path to the netCDF file.
    """
    nc_file = nc.Dataset(file, 'r')
    return nc_file

file = "E:\\Project\\Urban computing\\data\\2003\\20030801-ESACCI-L4_FIRE-BA-MODIS-fv5.1.nc"
data = load_netcdf_file(file)

Create the South korea matrix from the "burned_area" variable in the netCDF file

In [2]:
def create_ba_matrix_one_month(nc_file):
    """
    Create the Burned Area matrix for South Korea NOTE: Do not forget to call nc_file.close() after finishing with the file.
    :param nc_file: The netCDF Dataset of the monthly data.
    """
    ba_matrix = np.array(nc_file.variables['burned_area'])
    
    # Convert lat and long to the 720 and 1440 degree format respectively
    max_lat_idx = latitude_converted(south_korea_lat_max)
    min_lat_idx = latitude_converted(south_korea_lat_min)
    max_lon_idx = longitude_converted(south_korea_lon_max)
    min_lon_idx = longitude_converted(south_korea_lon_min)

    # Create the south korea BA matrix
    south_korea_matrix = ba_matrix[:, max_lat_idx:min_lat_idx, min_lon_idx:max_lon_idx]
    return south_korea_matrix


# south_korea = create_ba_matrix_one_month(data)
# south_korea.shape

### Combine data
Create a single dataset which contains the korea BA matrix over all the months of the years range 2001-2020

In [15]:
def combine_data(root_folder):
    """
    Combine data from multiple NetCDF files.
    :param root_folder: The root folder containing yearly subfolders.
    :return: Combined dataset for South Korea.
    """
    combined_data = None

    for year_folder in os.listdir(root_folder):
        year_path = os.path.join(root_folder, year_folder)

        for month_file in os.listdir(year_path):
            file_path = os.path.join(year_path, month_file)

            # Open the NetCDF file
            with nc.Dataset(file_path, 'r') as nc_file:
                south_korea_data = create_ba_matrix_one_month(nc_file)

                if combined_data is None:
                    combined_data = south_korea_data
                else:
                    # Combine the data along the time dimension (assuming the first dimension is time)
                    combined_data = np.concatenate((combined_data, south_korea_data), axis=0)

    return combined_data

# Specify the root folder where your data is stored
data_root_folder = 'E:\\Project\\Urban computing\\data'

# Call the combine_data function
combined_data = combine_data(data_root_folder)

# Now, combined_data contains the aggregated burned area data for South Korea from all files.
np.save(file="combined_data.npy" , arr=combined_data)

In [None]:
loaded_data = np.load('combined_data.npy')
loaded_data.shape
loaded_data[1]

# Load GeoTIFF

In [3]:
south_korea_lat_min = 32
south_korea_lat_max = 39
south_korea_lon_min = 125
south_korea_lon_max = 130

In [4]:
def pixel_long_to_idx(long: float):
    if long < 52.999301 or  long > 179.99999999952965:
        return ValueError
    return (long - 52.999301) / 0.0022457331

def pixel_lat_to_idx(latitude: float):
    if latitude < 0 or  latitude > 83.00004990518016:
        return ValueError
    return (latitude - 1.4210854715202004e-14) / 0.0022457331

# Function to clip the TIFF file
def clip_tiff(file_path, lon_min, lon_max, lat_min, lat_max):
    data = rioxarray.open_rasterio(file_path)
    clipped_data = data.rio.clip_box(minx=lon_min, miny=lat_min, maxx=lon_max, maxy=lat_max)
    return clipped_data


In [5]:
# Base directory containing the folders
base_dir = "E:\\Project\\Urban computing\\data\\Pixel Data"

# Dataset to store the clipped data
dataset = []

# Loop through each year directory
for year in os.listdir(base_dir):
    year_dir = os.path.join(base_dir, year)

    # Loop through each subdirectory in the year directory
    for root, dirs, files in os.walk(year_dir):
        for file in files:
            if file.endswith("-CL.tif"):
                file_path = os.path.join(root, file)

                # Extract the 'month' from the directory name if necessary
                month = os.path.basename(root)  # or any other logic to determine the month from the folder name

                # Clip the data and add to the dataset
                clipped_data = clip_tiff(file_path, south_korea_lon_min, south_korea_lon_max, south_korea_lat_min, south_korea_lat_max)
                dataset.append((year, month, clipped_data))


In [24]:
data_arrays = [item[2] for item in dataset]  # Extract the DataArray from each tuple
# print(dataset[150][2])

# Stack the arrays along a new axis (0th axis for time)
combined_numpy_array = np.stack(data_arrays, axis=0)

# Print the shape of the resulting NumPy array
print(combined_numpy_array[10, 0])
# np.save(arr=combined_numpy_array, file="south_korea.npy")


[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
